1
# Copyright (C) 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24
import SimpleHTTPServer
33
from bzrlib import transport
34
from bzrlib.transport import local
37
class WebserverNotAvailable(Exception):
41
class BadWebserverPath(ValueError):
43
return 'path %s is not in %s' % self.args
46
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
47
"""Handles one request.
49
A TestingHTTPRequestHandler is instantiated for every request
50
received by the associated server.
52
# The Message-like class used to parse the request headers
53
MessageClass = httplib.HTTPMessage
56
SimpleHTTPServer.SimpleHTTPRequestHandler.setup(self)
57
tcs = self.server.test_case_server
58
if tcs.protocol_version is not None:
59
# If the test server forced a protocol version, use it
60
self.protocol_version = tcs.protocol_version
62
def log_message(self, format, *args):
63
tcs = self.server.test_case_server
64
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
65
self.address_string(),
66
self.log_date_time_string(),
68
self.headers.get('referer', '-'),
69
self.headers.get('user-agent', '-'))
71
def handle_one_request(self):
72
"""Handle a single HTTP request.
74
We catch all socket errors occurring when the client close the
75
connection early to avoid polluting the test results.
78
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
79
except socket.error, e:
81
and e.args[0] in (errno.EPIPE, errno.ECONNRESET,
82
errno.ECONNABORTED,)):
83
self.close_connection = 1
87
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
88
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
90
def parse_ranges(self, ranges_header):
91
"""Parse the range header value and returns ranges and tail.
93
RFC2616 14.35 says that syntactically invalid range
94
specifiers MUST be ignored. In that case, we return 0 for
95
tail and [] for ranges.
99
if not ranges_header.startswith('bytes='):
100
# Syntactically invalid header
103
ranges_header = ranges_header[len('bytes='):]
104
for range_str in ranges_header.split(','):
105
# FIXME: RFC2616 says end is optional and default to file_size
106
range_match = self._range_regexp.match(range_str)
107
if range_match is not None:
108
start = int(range_match.group('start'))
109
end = int(range_match.group('end'))
111
# Syntactically invalid range
113
ranges.append((start, end))
115
tail_match = self._tail_regexp.match(range_str)
116
if tail_match is not None:
117
tail = int(tail_match.group('tail'))
119
# Syntactically invalid range
123
def send_range_content(self, file, start, length):
125
self.wfile.write(file.read(length))
127
def get_single_range(self, file, file_size, start, end):
128
self.send_response(206)
129
length = end - start + 1
130
self.send_header('Accept-Ranges', 'bytes')
131
self.send_header("Content-Length", "%d" % length)
133
self.send_header("Content-Type", 'application/octet-stream')
134
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
138
self.send_range_content(file, start, length)
140
def get_multiple_ranges(self, file, file_size, ranges):
141
self.send_response(206)
142
self.send_header('Accept-Ranges', 'bytes')
143
boundary = "%d" % random.randint(0,0x7FFFFFFF)
144
self.send_header("Content-Type",
145
"multipart/byteranges; boundary=%s" % boundary)
147
for (start, end) in ranges:
148
self.wfile.write("--%s\r\n" % boundary)
149
self.send_header("Content-type", 'application/octet-stream')
150
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
154
self.send_range_content(file, start, end - start + 1)
156
self.wfile.write("--%s\r\n" % boundary)
159
"""Serve a GET request.
161
Handles the Range header.
164
self.server.test_case_server.GET_request_nb += 1
166
path = self.translate_path(self.path)
167
ranges_header_value = self.headers.get('Range')
168
if ranges_header_value is None or os.path.isdir(path):
169
# Let the mother class handle most cases
170
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
173
# Always read in binary mode. Opening files in text
174
# mode may cause newline translations, making the
175
# actual size of the content transmitted *less* than
176
# the content-length!
177
file = open(path, 'rb')
179
self.send_error(404, "File not found")
182
file_size = os.fstat(file.fileno())[6]
183
tail, ranges = self.parse_ranges(ranges_header_value)
184
# Normalize tail into ranges
186
ranges.append((file_size - tail, file_size))
188
self._satisfiable_ranges = True
190
self._satisfiable_ranges = False
192
def check_range(range_specifier):
193
start, end = range_specifier
194
# RFC2616 14.35, ranges are invalid if start >= file_size
195
if start >= file_size:
196
self._satisfiable_ranges = False # Side-effect !
198
# RFC2616 14.35, end values should be truncated
199
# to file_size -1 if they exceed it
200
end = min(end, file_size - 1)
203
ranges = map(check_range, ranges)
205
if not self._satisfiable_ranges:
206
# RFC2616 14.16 and 14.35 says that when a server
207
# encounters unsatisfiable range specifiers, it
208
# SHOULD return a 416.
210
# FIXME: We SHOULD send a Content-Range header too,
211
# but the implementation of send_error does not
212
# allows that. So far.
213
self.send_error(416, "Requested range not satisfiable")
217
(start, end) = ranges[0]
218
self.get_single_range(file, file_size, start, end)
220
self.get_multiple_ranges(file, file_size, ranges)
223
def translate_path(self, path):
224
"""Translate a /-separated PATH to the local filename syntax.
226
If the server requires it, proxy the path before the usual translation
228
if self.server.test_case_server.proxy_requests:
229
# We need to act as a proxy and accept absolute urls,
230
# which SimpleHTTPRequestHandler (parent) is not
231
# ready for. So we just drop the protocol://host:port
232
# part in front of the request-url (because we know
233
# we would not forward the request to *another*
236
# So we do what SimpleHTTPRequestHandler.translate_path
237
# do beginning with python 2.4.3: abandon query
238
# parameters, scheme, host port, etc (which ensure we
239
# provide the right behaviour on all python versions).
240
path = urlparse.urlparse(path)[2]
241
# And now, we can apply *our* trick to proxy files
244
return self._translate_path(path)
246
def _translate_path(self, path):
247
return SimpleHTTPServer.SimpleHTTPRequestHandler.translate_path(
250
if sys.platform == 'win32':
251
# On win32 you cannot access non-ascii filenames without
252
# decoding them into unicode first.
253
# However, under Linux, you can access bytestream paths
254
# without any problems. If this function was always active
255
# it would probably break tests when LANG=C was set
256
def _translate_path(self, path):
257
"""Translate a /-separated PATH to the local filename syntax.
259
For bzr, all url paths are considered to be utf8 paths.
260
On Linux, you can access these paths directly over the bytestream
261
request, but on win32, you must decode them, and access them
264
# abandon query parameters
265
path = urlparse.urlparse(path)[2]
266
path = posixpath.normpath(urllib.unquote(path))
267
path = path.decode('utf-8')
268
words = path.split('/')
269
words = filter(None, words)
272
drive, word = os.path.splitdrive(word)
273
head, word = os.path.split(word)
274
if word in (os.curdir, os.pardir): continue
275
path = os.path.join(path, word)
279
class TestingHTTPServerWrapper(object):
280
"""Isolate the wrapper itself to make the server use transparent.
282
Daughter classes can override any method and/or directly call the _server
286
def __init__(self, server_class, test_case_server,
287
server_address, request_handler_class):
288
self._server = server_class(server_address, request_handler_class)
289
# test_case_server can be used to communicate between the
290
# tests and the server (or the request handler and the
291
# server), allowing dynamic behaviors to be defined from
293
self._server.test_case_server = test_case_server
295
def __getattr__(self, name):
296
return getattr(self._server, name)
298
def server_bind(self):
299
"""Override server_bind to store the server name."""
300
self._server.server_bind()
301
host, port = self._server.socket.getsockname()[:2]
302
self._server.server_name = socket.getfqdn(host)
303
self._server.server_port = port
305
def server_close(self):
306
"""Called to clean-up the server.
308
Since the server may be (surely is, even) in a blocking listen, we
309
shutdown its socket before closing it.
311
# Note that is this executed as part of the implicit tear down in the
312
# main thread while the server runs in its own thread. The clean way
313
# to tear down the server will be to instruct him to stop accepting
314
# connections and wait for the current connection to end naturally. To
315
# end the connection naturally, the http transports should close their
316
# socket when they do not need to talk to the server anymore. We
317
# don't want to impose such a constraint on the http transports (and
318
# we can't anyway ;). So we must tear down here, from the main thread,
319
# when the test have ended. Note that since the server is in a
320
# blocking operation and since python use select internally, shutting
321
# down the socket is reliable and relatively clean.
322
self._server.socket.shutdown(socket.SHUT_RDWR)
323
# Let the server properly close the socket
324
self._server.server_close()
326
class TestingHTTPServer(TestingHTTPServerWrapper):
328
def __init__(self, server_address, request_handler_class, test_case_server):
329
super(TestingHTTPServer, self).__init__(
330
SocketServer.TCPServer, test_case_server,
331
server_address, request_handler_class)
334
class TestingThreadingHTTPServer(TestingHTTPServerWrapper):
335
"""A threading HTTP test server for HTTP 1.1.
337
Since tests can initiate several concurrent connections to the same http
338
server, we need an independent connection for each of them. We achieve that
339
by spawning a new thread for each connection.
342
def __init__(self, server_address, request_handler_class, test_case_server):
343
super(TestingThreadingHTTPServer, self).__init__(
344
SocketServer.ThreadingTCPServer, test_case_server,
345
server_address, request_handler_class)
346
# Decides how threads will act upon termination of the main
347
# process. This is prophylactic as we should not leave the threads
349
self._server.daemon_threads = True
352
class HttpServer(transport.Server):
353
"""A test server for http transports.
355
Subclasses can provide a specific request handler.
358
# The real servers depending on the protocol
359
http_server_class = {'HTTP/1.0': TestingHTTPServer,
360
'HTTP/1.1': TestingThreadingHTTPServer,
363
# Whether or not we proxy the requests (see
364
# TestingHTTPRequestHandler.translate_path).
365
proxy_requests = False
367
# used to form the url that connects to this server
368
_url_protocol = 'http'
370
# Subclasses can provide a specific request handler
371
def __init__(self, request_handler=TestingHTTPRequestHandler,
372
protocol_version=None):
375
:param request_handler: a class that will be instantiated to handle an
376
http connection (one or several requests).
378
:param protocol_version: if specified, will override the protocol
379
version of the request handler.
381
transport.Server.__init__(self)
382
self.request_handler = request_handler
383
self.host = 'localhost'
386
self.protocol_version = protocol_version
387
# Allows tests to verify number of GET requests issued
388
self.GET_request_nb = 0
390
def _get_httpd(self):
391
if self._httpd is None:
392
rhandler = self.request_handler
393
if self.protocol_version is None:
394
proto_vers = rhandler.protocol_version
396
proto_vers = self.protocol_version
397
# Create the appropriate server for the required protocol
398
serv_cls = self.http_server_class.get(proto_vers, None)
400
raise httplib.UnknownProtocol(proto_vers)
402
self._httpd = serv_cls((self.host, self.port), rhandler, self)
403
host, self.port = self._httpd.socket.getsockname()
406
def _http_start(self):
407
"""Server thread main entry point. """
408
self._http_running = False
411
httpd = self._get_httpd()
412
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
413
self.host, self.port)
414
self._http_running = True
416
# Whatever goes wrong, we save the exception for the main
417
# thread. Note that since we are running in a thread, no signal
418
# can be received, so we don't care about KeyboardInterrupt.
419
self._http_exception = sys.exc_info()
421
# Release the lock or the main thread will block and the whole
423
self._http_starting.release()
425
# From now on, exceptions are taken care of by the
426
# SocketServer.BaseServer or the request handler.
427
while self._http_running:
429
# Really an HTTP connection but the python framework is generic
430
# and call them requests
431
httpd.handle_request()
432
except socket.timeout:
435
def _get_remote_url(self, path):
436
path_parts = path.split(os.path.sep)
437
if os.path.isabs(path):
438
if path_parts[:len(self._local_path_parts)] != \
439
self._local_path_parts:
440
raise BadWebserverPath(path, self.test_dir)
441
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
443
remote_path = '/'.join(path_parts)
445
return self._http_base_url + remote_path
447
def log(self, format, *args):
448
"""Capture Server log output."""
449
self.logs.append(format % args)
451
def setUp(self, backing_transport_server=None):
452
"""See bzrlib.transport.Server.setUp.
454
:param backing_transport_server: The transport that requests over this
455
protocol should be forwarded to. Note that this is currently not
458
# XXX: TODO: make the server back onto vfs_server rather than local
460
assert backing_transport_server is None or \
461
isinstance(backing_transport_server, local.LocalURLServer), \
462
"HTTPServer currently assumes local transport, got %s" % \
463
backing_transport_server
464
self._home_dir = os.getcwdu()
465
self._local_path_parts = self._home_dir.split(os.path.sep)
466
self._http_base_url = None
468
# Create the server thread
469
self._http_starting = threading.Lock()
470
self._http_starting.acquire()
471
self._http_thread = threading.Thread(target=self._http_start)
472
self._http_thread.setDaemon(True)
473
self._http_exception = None
474
self._http_thread.start()
476
# Wait for the server thread to start (i.e release the lock)
477
self._http_starting.acquire()
479
if self._http_exception is not None:
480
exc_class, exc_value, exc_tb = self._http_exception
481
raise exc_class, exc_value, exc_tb
482
self._http_starting.release()
486
"""See bzrlib.transport.Server.tearDown."""
487
self._httpd.server_close()
488
self._http_running = False
489
self._http_thread.join()
492
"""See bzrlib.transport.Server.get_url."""
493
return self._get_remote_url(self._home_dir)
495
def get_bogus_url(self):
496
"""See bzrlib.transport.Server.get_bogus_url."""
497
# this is chosen to try to prevent trouble with proxies, weird dns,
499
return self._url_protocol + '://127.0.0.1:1/'
502
class HttpServer_urllib(HttpServer):
503
"""Subclass of HttpServer that gives http+urllib urls.
505
This is for use in testing: connections to this server will always go
506
through urllib where possible.
509
# urls returned by this server should require the urllib client impl
510
_url_protocol = 'http+urllib'
513
class HttpServer_PyCurl(HttpServer):
514
"""Subclass of HttpServer that gives http+pycurl urls.
516
This is for use in testing: connections to this server will always go
517
through pycurl where possible.
520
# We don't care about checking the pycurl availability as
521
# this server will be required only when pycurl is present
523
# urls returned by this server should require the pycurl client impl
524
_url_protocol = 'http+pycurl'