1
# Copyright (C) 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24
import SimpleHTTPServer
33
from bzrlib import transport
34
from bzrlib.transport import local
37
class WebserverNotAvailable(Exception):
41
class BadWebserverPath(ValueError):
43
return 'path %s is not in %s' % self.args
46
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
47
"""Handles one request.
49
A TestingHTTPRequestHandler is instantiated for every request
50
received by the associated server.
52
# The Message-like class used to parse the request headers
53
MessageClass = httplib.HTTPMessage
55
def log_message(self, format, *args):
56
tcs = self.server.test_case_server
57
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
58
self.address_string(),
59
self.log_date_time_string(),
61
self.headers.get('referrer', '-'),
62
self.headers.get('user-agent', '-'))
64
def handle_one_request(self):
65
"""Handle a single HTTP request.
67
We catch all socket errors occurring when the client close the
68
connection early to avoid polluting the test results.
71
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
72
except socket.error, e:
74
and e.args[0] in (errno.EPIPE, errno.ECONNRESET,
75
errno.ECONNABORTED,)):
76
self.close_connection = 1
80
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
81
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
83
def parse_ranges(self, ranges_header):
84
"""Parse the range header value and returns ranges and tail.
86
RFC2616 14.35 says that syntactically invalid range
87
specifiers MUST be ignored. In that case, we return 0 for
88
tail and [] for ranges.
92
if not ranges_header.startswith('bytes='):
93
# Syntactically invalid header
96
ranges_header = ranges_header[len('bytes='):]
97
for range_str in ranges_header.split(','):
98
# FIXME: RFC2616 says end is optional and default to file_size
99
range_match = self._range_regexp.match(range_str)
100
if range_match is not None:
101
start = int(range_match.group('start'))
102
end = int(range_match.group('end'))
104
# Syntactically invalid range
106
ranges.append((start, end))
108
tail_match = self._tail_regexp.match(range_str)
109
if tail_match is not None:
110
tail = int(tail_match.group('tail'))
112
# Syntactically invalid range
116
def send_range_content(self, file, start, length):
118
self.wfile.write(file.read(length))
120
def get_single_range(self, file, file_size, start, end):
121
self.send_response(206)
122
length = end - start + 1
123
self.send_header('Accept-Ranges', 'bytes')
124
self.send_header("Content-Length", "%d" % length)
126
self.send_header("Content-Type", 'application/octet-stream')
127
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
131
self.send_range_content(file, start, length)
133
def get_multiple_ranges(self, file, file_size, ranges):
134
self.send_response(206)
135
self.send_header('Accept-Ranges', 'bytes')
136
boundary = "%d" % random.randint(0,0x7FFFFFFF)
137
self.send_header("Content-Type",
138
"multipart/byteranges; boundary=%s" % boundary)
140
for (start, end) in ranges:
141
self.wfile.write("--%s\r\n" % boundary)
142
self.send_header("Content-type", 'application/octet-stream')
143
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
147
self.send_range_content(file, start, end - start + 1)
149
self.wfile.write("--%s\r\n" % boundary)
152
"""Serve a GET request.
154
Handles the Range header.
157
self.server.test_case_server.GET_request_nb += 1
159
path = self.translate_path(self.path)
160
ranges_header_value = self.headers.get('Range')
161
if ranges_header_value is None or os.path.isdir(path):
162
# Let the mother class handle most cases
163
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
166
# Always read in binary mode. Opening files in text
167
# mode may cause newline translations, making the
168
# actual size of the content transmitted *less* than
169
# the content-length!
170
file = open(path, 'rb')
172
self.send_error(404, "File not found")
175
file_size = os.fstat(file.fileno())[6]
176
tail, ranges = self.parse_ranges(ranges_header_value)
177
# Normalize tail into ranges
179
ranges.append((file_size - tail, file_size))
181
self._satisfiable_ranges = True
183
self._satisfiable_ranges = False
185
def check_range(range_specifier):
186
start, end = range_specifier
187
# RFC2616 14.35, ranges are invalid if start >= file_size
188
if start >= file_size:
189
self._satisfiable_ranges = False # Side-effect !
191
# RFC2616 14.35, end values should be truncated
192
# to file_size -1 if they exceed it
193
end = min(end, file_size - 1)
196
ranges = map(check_range, ranges)
198
if not self._satisfiable_ranges:
199
# RFC2616 14.16 and 14.35 says that when a server
200
# encounters unsatisfiable range specifiers, it
201
# SHOULD return a 416.
203
# FIXME: We SHOULD send a Content-Range header too,
204
# but the implementation of send_error does not
205
# allows that. So far.
206
self.send_error(416, "Requested range not satisfiable")
210
(start, end) = ranges[0]
211
self.get_single_range(file, file_size, start, end)
213
self.get_multiple_ranges(file, file_size, ranges)
216
def translate_path(self, path):
217
"""Translate a /-separated PATH to the local filename syntax.
219
If the server requires it, proxy the path before the usual translation
221
if self.server.test_case_server.proxy_requests:
222
# We need to act as a proxy and accept absolute urls,
223
# which SimpleHTTPRequestHandler (parent) is not
224
# ready for. So we just drop the protocol://host:port
225
# part in front of the request-url (because we know
226
# we would not forward the request to *another*
229
# So we do what SimpleHTTPRequestHandler.translate_path
230
# do beginning with python 2.4.3: abandon query
231
# parameters, scheme, host port, etc (which ensure we
232
# provide the right behaviour on all python versions).
233
path = urlparse.urlparse(path)[2]
234
# And now, we can apply *our* trick to proxy files
237
return self._translate_path(path)
239
def _translate_path(self, path):
240
return SimpleHTTPServer.SimpleHTTPRequestHandler.translate_path(
243
if sys.platform == 'win32':
244
# On win32 you cannot access non-ascii filenames without
245
# decoding them into unicode first.
246
# However, under Linux, you can access bytestream paths
247
# without any problems. If this function was always active
248
# it would probably break tests when LANG=C was set
249
def _translate_path(self, path):
250
"""Translate a /-separated PATH to the local filename syntax.
252
For bzr, all url paths are considered to be utf8 paths.
253
On Linux, you can access these paths directly over the bytestream
254
request, but on win32, you must decode them, and access them
257
# abandon query parameters
258
path = urlparse.urlparse(path)[2]
259
path = posixpath.normpath(urllib.unquote(path))
260
path = path.decode('utf-8')
261
words = path.split('/')
262
words = filter(None, words)
265
drive, word = os.path.splitdrive(word)
266
head, word = os.path.split(word)
267
if word in (os.curdir, os.pardir): continue
268
path = os.path.join(path, word)
272
class TestingHTTPServerWrapper(object):
273
"""Isolate the wrapper itself to make the server use transparent.
275
Daughter classes can override any method and/or directly call the _server
279
def __init__(self, server_class, test_case_server,
280
server_address, request_handler_class):
281
self._server = server_class(server_address, request_handler_class)
282
# test_case_server can be used to communicate between the
283
# tests and the server (or the request handler and the
284
# server), allowing dynamic behaviors to be defined from
286
self._server.test_case_server = test_case_server
288
def __getattr__(self, name):
289
return getattr(self._server, name)
291
def server_bind(self):
292
"""Override server_bind to store the server name."""
293
self._server.server_bind()
294
host, port = self._server.socket.getsockname()[:2]
295
self._server.server_name = socket.getfqdn(host)
296
self._server.server_port = port
298
def server_close(self):
299
"""Called to clean-up the server.
301
Since the server may be (surely is, even) in a blocking listen, we
302
shutdown its socket before closing it.
304
# Note that is this executed as part of the implicit tear down in the
305
# main thread while the server runs in its own thread. The clean way
306
# to tear down the server will be to instruct him to stop accepting
307
# connections and wait for the current connection to end naturally. To
308
# end the connection naturally, the http transports should close their
309
# socket when they do not need to talk to the server anymore. We
310
# don't want to impose such a constraint on the http transports (and
311
# we can't anyway ;). So we must tear down here, from the main thread,
312
# when the test have ended. Note that since the server is in a
313
# blocking operation and since python use select internally, shutting
314
# down the socket is reliable and relatively clean.
315
self._server.socket.shutdown(socket.SHUT_RDWR)
316
# Let the server properly close the socket
317
self._server.server_close()
319
class TestingHTTPServer(TestingHTTPServerWrapper):
321
def __init__(self, server_address, request_handler_class, test_case_server):
322
super(TestingHTTPServer, self).__init__(
323
SocketServer.TCPServer, test_case_server,
324
server_address, request_handler_class)
327
class TestingThreadingHTTPServer(TestingHTTPServerWrapper):
328
"""A threading HTTP test server for HTTP 1.1.
330
Since tests can initiate several concurrent connections to the same http
331
server, we need an independent connection for each of them. We achieve that
332
by spawning a new thread for each connection.
335
def __init__(self, server_address, request_handler_class, test_case_server):
336
super(TestingThreadingHTTPServer, self).__init__(
337
SocketServer.ThreadingTCPServer, test_case_server,
338
server_address, request_handler_class)
339
# Decides how threads will act upon termination of the main
340
# process. This is prophylactic as we should not leave the threads
342
self._server.daemon_threads = True
345
class HttpServer(transport.Server):
346
"""A test server for http transports.
348
Subclasses can provide a specific request handler.
351
# The real servers depending on the protocol
352
http_server_class = {'HTTP/1.0': TestingHTTPServer,
353
'HTTP/1.1': TestingThreadingHTTPServer,
356
# Whether or not we proxy the requests (see
357
# TestingHTTPRequestHandler.translate_path).
358
proxy_requests = False
360
# used to form the url that connects to this server
361
_url_protocol = 'http'
363
# Subclasses can provide a specific request handler
364
def __init__(self, request_handler=TestingHTTPRequestHandler):
365
transport.Server.__init__(self)
366
self.request_handler = request_handler
367
self.host = 'localhost'
370
# Allows tests to verify number of GET requests issued
371
self.GET_request_nb = 0
373
def _get_httpd(self):
374
if self._httpd is None:
375
rhandler = self.request_handler
376
proto_vers = rhandler.protocol_version
377
# Create the appropriate server for the required protocol
378
serv_cls = self.http_server_class.get(proto_vers, None)
380
raise httplib.UnknownProtocol(proto_vers)
382
self._httpd = serv_cls((self.host, self.port), rhandler, self)
383
host, self.port = self._httpd.socket.getsockname()
386
def _http_start(self):
387
"""Server thread main entry point. """
388
self._http_running = False
391
httpd = self._get_httpd()
392
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
393
self.host, self.port)
394
self._http_running = True
396
# Whatever goes wrong, we save the exception for the main
397
# thread. Note that since we are running in a thread, no signal
398
# can be received, so we don't care about KeyboardInterrupt.
399
self._http_exception = sys.exc_info()
401
# Release the lock or the main thread will block and the whole
403
self._http_starting.release()
405
# From now on, exceptions are taken care of by the
406
# SocketServer.BaseServer or the request handler.
407
while self._http_running:
409
# Really an HTTP connection but the python framework is generic
410
# and call them requests
411
httpd.handle_request()
412
except socket.timeout:
415
def _get_remote_url(self, path):
416
path_parts = path.split(os.path.sep)
417
if os.path.isabs(path):
418
if path_parts[:len(self._local_path_parts)] != \
419
self._local_path_parts:
420
raise BadWebserverPath(path, self.test_dir)
421
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
423
remote_path = '/'.join(path_parts)
425
return self._http_base_url + remote_path
427
def log(self, format, *args):
428
"""Capture Server log output."""
429
self.logs.append(format % args)
431
def setUp(self, backing_transport_server=None):
432
"""See bzrlib.transport.Server.setUp.
434
:param backing_transport_server: The transport that requests over this
435
protocol should be forwarded to. Note that this is currently not
438
# XXX: TODO: make the server back onto vfs_server rather than local
440
assert backing_transport_server is None or \
441
isinstance(backing_transport_server, local.LocalURLServer), \
442
"HTTPServer currently assumes local transport, got %s" % \
443
backing_transport_server
444
self._home_dir = os.getcwdu()
445
self._local_path_parts = self._home_dir.split(os.path.sep)
446
self._http_base_url = None
448
# Create the server thread
449
self._http_starting = threading.Lock()
450
self._http_starting.acquire()
451
self._http_thread = threading.Thread(target=self._http_start)
452
self._http_thread.setDaemon(True)
453
self._http_exception = None
454
self._http_thread.start()
456
# Wait for the server thread to start (i.e release the lock)
457
self._http_starting.acquire()
459
if self._http_exception is not None:
460
exc_class, exc_value, exc_tb = self._http_exception
461
raise exc_class, exc_value, exc_tb
462
self._http_starting.release()
466
"""See bzrlib.transport.Server.tearDown."""
467
self._httpd.server_close()
468
self._http_running = False
469
self._http_thread.join()
472
"""See bzrlib.transport.Server.get_url."""
473
return self._get_remote_url(self._home_dir)
475
def get_bogus_url(self):
476
"""See bzrlib.transport.Server.get_bogus_url."""
477
# this is chosen to try to prevent trouble with proxies, weird dns,
479
return self._url_protocol + '://127.0.0.1:1/'
482
class HttpServer_urllib(HttpServer):
483
"""Subclass of HttpServer that gives http+urllib urls.
485
This is for use in testing: connections to this server will always go
486
through urllib where possible.
489
# urls returned by this server should require the urllib client impl
490
_url_protocol = 'http+urllib'
493
class HttpServer_PyCurl(HttpServer):
494
"""Subclass of HttpServer that gives http+pycurl urls.
496
This is for use in testing: connections to this server will always go
497
through pycurl where possible.
500
# We don't care about checking the pycurl availability as
501
# this server will be required only when pycurl is present
503
# urls returned by this server should require the pycurl client impl
504
_url_protocol = 'http+pycurl'