1
# Copyright (C) 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
from SimpleHTTPServer import SimpleHTTPRequestHandler
31
from bzrlib.transport import Server
32
from bzrlib.transport.local import LocalURLServer
35
class WebserverNotAvailable(Exception):
39
class BadWebserverPath(ValueError):
41
return 'path %s is not in %s' % self.args
44
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
45
"""Handles one request.
47
A TestingHTTPRequestHandler is instantiated for every request
48
received by the associated server.
51
def log_message(self, format, *args):
52
tcs = self.server.test_case_server
53
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
54
self.address_string(),
55
self.log_date_time_string(),
57
self.headers.get('referer', '-'),
58
self.headers.get('user-agent', '-'))
60
def handle_one_request(self):
61
"""Handle a single HTTP request.
63
We catch all socket errors occurring when the client close the
64
connection early to avoid polluting the test results.
67
SimpleHTTPRequestHandler.handle_one_request(self)
68
except socket.error, e:
70
and e.args[0] in (errno.EPIPE, errno.ECONNRESET,
71
errno.ECONNABORTED,)):
72
self.close_connection = 1
77
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
78
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
80
def parse_ranges(self, ranges_header):
81
"""Parse the range header value and returns ranges and tail.
83
RFC2616 14.35 says that syntactically invalid range
84
specifiers MUST be ignored. In that case, we return 0 for
85
tail and [] for ranges.
89
if not ranges_header.startswith('bytes='):
90
# Syntactically invalid header
93
ranges_header = ranges_header[len('bytes='):]
94
for range_str in ranges_header.split(','):
95
# FIXME: RFC2616 says end is optional and default to file_size
96
range_match = self._range_regexp.match(range_str)
97
if range_match is not None:
98
start = int(range_match.group('start'))
99
end = int(range_match.group('end'))
101
# Syntactically invalid range
103
ranges.append((start, end))
105
tail_match = self._tail_regexp.match(range_str)
106
if tail_match is not None:
107
tail = int(tail_match.group('tail'))
109
# Syntactically invalid range
113
def send_range_content(self, file, start, length):
115
self.wfile.write(file.read(length))
117
def get_single_range(self, file, file_size, start, end):
118
self.send_response(206)
119
length = end - start + 1
120
self.send_header('Accept-Ranges', 'bytes')
121
self.send_header("Content-Length", "%d" % length)
123
self.send_header("Content-Type", 'application/octet-stream')
124
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
128
self.send_range_content(file, start, length)
130
def get_multiple_ranges(self, file, file_size, ranges):
131
self.send_response(206)
132
self.send_header('Accept-Ranges', 'bytes')
133
boundary = "%d" % random.randint(0,0x7FFFFFFF)
134
self.send_header("Content-Type",
135
"multipart/byteranges; boundary=%s" % boundary)
137
for (start, end) in ranges:
138
self.wfile.write("--%s\r\n" % boundary)
139
self.send_header("Content-type", 'application/octet-stream')
140
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
144
self.send_range_content(file, start, end - start + 1)
146
self.wfile.write("--%s\r\n" % boundary)
149
"""Serve a GET request.
151
Handles the Range header.
154
self.server.test_case_server.GET_request_nb += 1
156
path = self.translate_path(self.path)
157
ranges_header_value = self.headers.get('Range')
158
if ranges_header_value is None or os.path.isdir(path):
159
# Let the mother class handle most cases
160
return SimpleHTTPRequestHandler.do_GET(self)
163
# Always read in binary mode. Opening files in text
164
# mode may cause newline translations, making the
165
# actual size of the content transmitted *less* than
166
# the content-length!
167
file = open(path, 'rb')
169
self.send_error(404, "File not found")
172
file_size = os.fstat(file.fileno())[6]
173
tail, ranges = self.parse_ranges(ranges_header_value)
174
# Normalize tail into ranges
176
ranges.append((file_size - tail, file_size))
178
self._satisfiable_ranges = True
180
self._satisfiable_ranges = False
182
def check_range(range_specifier):
183
start, end = range_specifier
184
# RFC2616 14.35, ranges are invalid if start >= file_size
185
if start >= file_size:
186
self._satisfiable_ranges = False # Side-effect !
188
# RFC2616 14.35, end values should be truncated
189
# to file_size -1 if they exceed it
190
end = min(end, file_size - 1)
193
ranges = map(check_range, ranges)
195
if not self._satisfiable_ranges:
196
# RFC2616 14.16 and 14.35 says that when a server
197
# encounters unsatisfiable range specifiers, it
198
# SHOULD return a 416.
200
# FIXME: We SHOULD send a Content-Range header too,
201
# but the implementation of send_error does not
202
# allows that. So far.
203
self.send_error(416, "Requested range not satisfiable")
207
(start, end) = ranges[0]
208
self.get_single_range(file, file_size, start, end)
210
self.get_multiple_ranges(file, file_size, ranges)
213
def translate_path(self, path):
214
"""Translate a /-separated PATH to the local filename syntax.
216
If the server requires it, proxy the path before the usual translation
218
if self.server.test_case_server.proxy_requests:
219
# We need to act as a proxy and accept absolute urls,
220
# which SimpleHTTPRequestHandler (parent) is not
221
# ready for. So we just drop the protocol://host:port
222
# part in front of the request-url (because we know
223
# we would not forward the request to *another*
226
# So we do what SimpleHTTPRequestHandler.translate_path
227
# do beginning with python 2.4.3: abandon query
228
# parameters, scheme, host port, etc (which ensure we
229
# provide the right behaviour on all python versions).
230
path = urlparse.urlparse(path)[2]
231
# And now, we can apply *our* trick to proxy files
234
return self._translate_path(path)
236
def _translate_path(self, path):
237
return SimpleHTTPRequestHandler.translate_path(self, path)
239
if sys.platform == 'win32':
240
# On win32 you cannot access non-ascii filenames without
241
# decoding them into unicode first.
242
# However, under Linux, you can access bytestream paths
243
# without any problems. If this function was always active
244
# it would probably break tests when LANG=C was set
245
def _translate_path(self, path):
246
"""Translate a /-separated PATH to the local filename syntax.
248
For bzr, all url paths are considered to be utf8 paths.
249
On Linux, you can access these paths directly over the bytestream
250
request, but on win32, you must decode them, and access them
253
# abandon query parameters
254
path = urlparse.urlparse(path)[2]
255
path = posixpath.normpath(urllib.unquote(path))
256
path = path.decode('utf-8')
257
words = path.split('/')
258
words = filter(None, words)
261
drive, word = os.path.splitdrive(word)
262
head, word = os.path.split(word)
263
if word in (os.curdir, os.pardir): continue
264
path = os.path.join(path, word)
268
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
270
def __init__(self, server_address, RequestHandlerClass,
272
BaseHTTPServer.HTTPServer.__init__(self, server_address,
274
# test_case_server can be used to communicate between the
275
# tests and the server (or the request handler and the
276
# server), allowing dynamic behaviors to be defined from
278
self.test_case_server = test_case_server
280
def server_close(self):
281
"""Called to clean-up the server.
283
Since the server may be in a blocking read, we shutdown the socket
286
self.socket.shutdown(socket.SHUT_RDWR)
287
BaseHTTPServer.HTTPServer.server_close(self)
290
class HttpServer(Server):
291
"""A test server for http transports.
293
Subclasses can provide a specific request handler.
296
# Whether or not we proxy the requests (see
297
# TestingHTTPRequestHandler.translate_path).
298
proxy_requests = False
300
# used to form the url that connects to this server
301
_url_protocol = 'http'
303
# Subclasses can provide a specific request handler
304
def __init__(self, request_handler=TestingHTTPRequestHandler):
305
Server.__init__(self)
306
self.request_handler = request_handler
307
self.host = 'localhost'
310
# Allows tests to verify number of GET requests issued
311
self.GET_request_nb = 0
313
def _get_httpd(self):
314
if self._httpd is None:
315
self._httpd = TestingHTTPServer((self.host, self.port),
316
self.request_handler,
318
host, self.port = self._httpd.socket.getsockname()
321
def _http_start(self):
322
httpd = self._get_httpd()
323
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
326
self._http_starting.release()
328
while self._http_running:
330
httpd.handle_request()
331
except socket.timeout:
334
def _get_remote_url(self, path):
335
path_parts = path.split(os.path.sep)
336
if os.path.isabs(path):
337
if path_parts[:len(self._local_path_parts)] != \
338
self._local_path_parts:
339
raise BadWebserverPath(path, self.test_dir)
340
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
342
remote_path = '/'.join(path_parts)
344
return self._http_base_url + remote_path
346
def log(self, format, *args):
347
"""Capture Server log output."""
348
self.logs.append(format % args)
350
def setUp(self, backing_transport_server=None):
351
"""See bzrlib.transport.Server.setUp.
353
:param backing_transport_server: The transport that requests over this
354
protocol should be forwarded to. Note that this is currently not
357
# XXX: TODO: make the server back onto vfs_server rather than local
359
assert backing_transport_server is None or \
360
isinstance(backing_transport_server, LocalURLServer), \
361
"HTTPServer currently assumes local transport, got %s" % \
362
backing_transport_server
363
self._home_dir = os.getcwdu()
364
self._local_path_parts = self._home_dir.split(os.path.sep)
365
self._http_starting = threading.Lock()
366
self._http_starting.acquire()
367
self._http_running = True
368
self._http_base_url = None
369
self._http_thread = threading.Thread(target=self._http_start)
370
self._http_thread.setDaemon(True)
371
self._http_thread.start()
372
# Wait for the server thread to start (i.e release the lock)
373
self._http_starting.acquire()
374
self._http_starting.release()
378
"""See bzrlib.transport.Server.tearDown."""
379
self._httpd.server_close()
380
self._http_running = False
381
self._http_thread.join()
384
"""See bzrlib.transport.Server.get_url."""
385
return self._get_remote_url(self._home_dir)
387
def get_bogus_url(self):
388
"""See bzrlib.transport.Server.get_bogus_url."""
389
# this is chosen to try to prevent trouble with proxies, weird dns,
391
return 'http://127.0.0.1:1/'
394
class HttpServer_urllib(HttpServer):
395
"""Subclass of HttpServer that gives http+urllib urls.
397
This is for use in testing: connections to this server will always go
398
through urllib where possible.
401
# urls returned by this server should require the urllib client impl
402
_url_protocol = 'http+urllib'
405
class HttpServer_PyCurl(HttpServer):
406
"""Subclass of HttpServer that gives http+pycurl urls.
408
This is for use in testing: connections to this server will always go
409
through pycurl where possible.
412
# We don't care about checking the pycurl availability as
413
# this server will be required only when pycurl is present
415
# urls returned by this server should require the pycurl client impl
416
_url_protocol = 'http+pycurl'