1
# Copyright (C) 2006-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
import SimpleHTTPServer
31
from . import test_server
34
class BadWebserverPath(ValueError):
36
return 'path %s is not in %s' % self.args
39
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
40
"""Handles one request.
42
A TestingHTTPRequestHandler is instantiated for every request received by
43
the associated server. Note that 'request' here is inherited from the base
44
TCPServer class, for the HTTP server it is really a connection which itself
45
will handle one or several HTTP requests.
47
# Default protocol version
48
protocol_version = 'HTTP/1.1'
50
# The Message-like class used to parse the request headers
51
MessageClass = httplib.HTTPMessage
54
SimpleHTTPServer.SimpleHTTPRequestHandler.setup(self)
55
self._cwd = self.server._home_dir
56
tcs = self.server.test_case_server
57
if tcs.protocol_version is not None:
58
# If the test server forced a protocol version, use it
59
self.protocol_version = tcs.protocol_version
61
def log_message(self, format, *args):
62
tcs = self.server.test_case_server
63
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
64
self.address_string(),
65
self.log_date_time_string(),
67
self.headers.get('referer', '-'),
68
self.headers.get('user-agent', '-'))
71
SimpleHTTPServer.SimpleHTTPRequestHandler.handle(self)
72
# Some client (pycurl, I'm looking at you) are more picky than others
73
# and require that the socket itself is closed
74
# (SocketServer.StreamRequestHandler only close the two associated
76
self.connection.close()
78
def handle_one_request(self):
79
"""Handle a single HTTP request.
81
We catch all socket errors occurring when the client close the
82
connection early to avoid polluting the test results.
85
self._handle_one_request()
86
except socket.error as e:
87
# Any socket error should close the connection, but some errors are
88
# due to the client closing early and we don't want to pollute test
89
# results, so we raise only the others.
90
self.close_connection = 1
92
or e.args[0] not in (errno.EPIPE, errno.ECONNRESET,
93
errno.ECONNABORTED, errno.EBADF)):
96
error_content_type = 'text/plain'
97
error_message_format = '''\
102
def send_error(self, code, message=None):
103
"""Send and log an error reply.
105
We redefine the python-provided version to be able to set a
106
``Content-Length`` header as some http/1.1 clients complain otherwise
109
:param code: The HTTP error code.
111
:param message: The explanation of the error code, Defaults to a short
117
message = self.responses[code][0]
120
self.log_error("code %d, message %s", code, message)
121
content = (self.error_message_format %
122
{'code': code, 'message': message})
123
self.send_response(code, message)
124
self.send_header("Content-Type", self.error_content_type)
125
self.send_header("Content-Length", "%d" % len(content))
126
self.send_header('Connection', 'close')
128
if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
129
self.wfile.write(content)
131
def _handle_one_request(self):
132
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
134
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)?$')
135
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
137
def _parse_ranges(self, ranges_header, file_size):
138
"""Parse the range header value and returns ranges.
140
RFC2616 14.35 says that syntactically invalid range specifiers MUST be
141
ignored. In that case, we return None instead of a range list.
143
:param ranges_header: The 'Range' header value.
145
:param file_size: The size of the requested file.
147
:return: A list of (start, end) tuples or None if some invalid range
148
specifier is encountered.
150
if not ranges_header.startswith('bytes='):
151
# Syntactically invalid header
156
ranges_header = ranges_header[len('bytes='):]
157
for range_str in ranges_header.split(','):
158
range_match = self._range_regexp.match(range_str)
159
if range_match is not None:
160
start = int(range_match.group('start'))
161
end_match = range_match.group('end')
162
if end_match is None:
163
# RFC2616 says end is optional and default to file_size
168
# Syntactically invalid range
170
ranges.append((start, end))
172
tail_match = self._tail_regexp.match(range_str)
173
if tail_match is not None:
174
tail = int(tail_match.group('tail'))
176
# Syntactically invalid range
179
# Normalize tail into ranges
180
ranges.append((max(0, file_size - tail), file_size))
183
for start, end in ranges:
184
if start >= file_size:
185
# RFC2616 14.35, ranges are invalid if start >= file_size
187
# RFC2616 14.35, end values should be truncated
188
# to file_size -1 if they exceed it
189
end = min(end, file_size - 1)
190
checked_ranges.append((start, end))
191
return checked_ranges
193
def _header_line_length(self, keyword, value):
194
header_line = '%s: %s\r\n' % (keyword, value)
195
return len(header_line)
198
"""Overrides base implementation to work around a bug in python2.5."""
199
path = self.translate_path(self.path)
200
if os.path.isdir(path) and not self.path.endswith('/'):
201
# redirect browser - doing basically what apache does when
202
# DirectorySlash option is On which is quite common (braindead, but
204
self.send_response(301)
205
self.send_header("Location", self.path + "/")
206
# Indicates that the body is empty for HTTP/1.1 clients
207
self.send_header('Content-Length', '0')
211
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
213
def send_range_content(self, file, start, length):
215
self.wfile.write(file.read(length))
217
def get_single_range(self, file, file_size, start, end):
218
self.send_response(206)
219
length = end - start + 1
220
self.send_header('Accept-Ranges', 'bytes')
221
self.send_header("Content-Length", "%d" % length)
223
self.send_header("Content-Type", 'application/octet-stream')
224
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
228
self.send_range_content(file, start, length)
230
def get_multiple_ranges(self, file, file_size, ranges):
231
self.send_response(206)
232
self.send_header('Accept-Ranges', 'bytes')
233
boundary = '%d' % random.randint(0,0x7FFFFFFF)
234
self.send_header('Content-Type',
235
'multipart/byteranges; boundary=%s' % boundary)
236
boundary_line = '--%s\r\n' % boundary
237
# Calculate the Content-Length
239
for (start, end) in ranges:
240
content_length += len(boundary_line)
241
content_length += self._header_line_length(
242
'Content-type', 'application/octet-stream')
243
content_length += self._header_line_length(
244
'Content-Range', 'bytes %d-%d/%d' % (start, end, file_size))
245
content_length += len('\r\n') # end headers
246
content_length += end - start + 1
247
content_length += len(boundary_line)
248
self.send_header('Content-length', content_length)
251
# Send the multipart body
252
for (start, end) in ranges:
253
self.wfile.write(boundary_line)
254
self.send_header('Content-type', 'application/octet-stream')
255
self.send_header('Content-Range', 'bytes %d-%d/%d'
256
% (start, end, file_size))
258
self.send_range_content(file, start, end - start + 1)
260
self.wfile.write(boundary_line)
263
"""Serve a GET request.
265
Handles the Range header.
268
self.server.test_case_server.GET_request_nb += 1
270
path = self.translate_path(self.path)
271
ranges_header_value = self.headers.get('Range')
272
if ranges_header_value is None or os.path.isdir(path):
273
# Let the mother class handle most cases
274
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
277
# Always read in binary mode. Opening files in text
278
# mode may cause newline translations, making the
279
# actual size of the content transmitted *less* than
280
# the content-length!
283
self.send_error(404, "File not found")
286
file_size = os.fstat(f.fileno())[6]
287
ranges = self._parse_ranges(ranges_header_value, file_size)
289
# RFC2616 14.16 and 14.35 says that when a server
290
# encounters unsatisfiable range specifiers, it
291
# SHOULD return a 416.
293
# FIXME: We SHOULD send a Content-Range header too,
294
# but the implementation of send_error does not
295
# allows that. So far.
296
self.send_error(416, "Requested range not satisfiable")
300
(start, end) = ranges[0]
301
self.get_single_range(f, file_size, start, end)
303
self.get_multiple_ranges(f, file_size, ranges)
306
def translate_path(self, path):
307
"""Translate a /-separated PATH to the local filename syntax.
309
If the server requires it, proxy the path before the usual translation
311
if self.server.test_case_server.proxy_requests:
312
# We need to act as a proxy and accept absolute urls,
313
# which SimpleHTTPRequestHandler (parent) is not
314
# ready for. So we just drop the protocol://host:port
315
# part in front of the request-url (because we know
316
# we would not forward the request to *another*
319
# So we do what SimpleHTTPRequestHandler.translate_path
320
# do beginning with python 2.4.3: abandon query
321
# parameters, scheme, host port, etc (which ensure we
322
# provide the right behaviour on all python versions).
323
path = urlparse.urlparse(path)[2]
324
# And now, we can apply *our* trick to proxy files
327
return self._translate_path(path)
329
def _translate_path(self, path):
330
"""Translate a /-separated PATH to the local filename syntax.
332
Note that we're translating http URLs here, not file URLs.
333
The URL root location is the server's startup directory.
334
Components that mean special things to the local file system
335
(e.g. drive or directory names) are ignored. (XXX They should
336
probably be diagnosed.)
338
Override from python standard library to stop it calling os.getcwd()
340
# abandon query parameters
341
path = urlparse.urlparse(path)[2]
342
path = posixpath.normpath(urlutils.unquote(path))
343
path = path.decode('utf-8')
344
words = path.split('/')
346
for num, word in enumerate(w for w in words if w):
348
drive, word = os.path.splitdrive(word)
349
head, word = os.path.split(word)
350
if word in (os.curdir, os.pardir): continue
351
path = os.path.join(path, word)
355
class TestingHTTPServerMixin:
357
def __init__(self, test_case_server):
358
# test_case_server can be used to communicate between the
359
# tests and the server (or the request handler and the
360
# server), allowing dynamic behaviors to be defined from
362
self.test_case_server = test_case_server
363
self._home_dir = test_case_server._home_dir
366
class TestingHTTPServer(test_server.TestingTCPServer, TestingHTTPServerMixin):
368
def __init__(self, server_address, request_handler_class,
370
test_server.TestingTCPServer.__init__(self, server_address,
371
request_handler_class)
372
TestingHTTPServerMixin.__init__(self, test_case_server)
375
class TestingThreadingHTTPServer(test_server.TestingThreadingTCPServer,
376
TestingHTTPServerMixin):
377
"""A threading HTTP test server for HTTP 1.1.
379
Since tests can initiate several concurrent connections to the same http
380
server, we need an independent connection for each of them. We achieve that
381
by spawning a new thread for each connection.
383
def __init__(self, server_address, request_handler_class,
385
test_server.TestingThreadingTCPServer.__init__(self, server_address,
386
request_handler_class)
387
TestingHTTPServerMixin.__init__(self, test_case_server)
390
class HttpServer(test_server.TestingTCPServerInAThread):
391
"""A test server for http transports.
393
Subclasses can provide a specific request handler.
396
# The real servers depending on the protocol
397
http_server_class = {'HTTP/1.0': TestingHTTPServer,
398
'HTTP/1.1': TestingThreadingHTTPServer,
401
# Whether or not we proxy the requests (see
402
# TestingHTTPRequestHandler.translate_path).
403
proxy_requests = False
405
# used to form the url that connects to this server
406
_url_protocol = 'http'
408
def __init__(self, request_handler=TestingHTTPRequestHandler,
409
protocol_version=None):
412
:param request_handler: a class that will be instantiated to handle an
413
http connection (one or several requests).
415
:param protocol_version: if specified, will override the protocol
416
version of the request handler.
418
# Depending on the protocol version, we will create the approriate
420
if protocol_version is None:
421
# Use the request handler one
422
proto_vers = request_handler.protocol_version
424
# Use our own, it will be used to override the request handler
426
proto_vers = protocol_version
427
# Get the appropriate server class for the required protocol
428
serv_cls = self.http_server_class.get(proto_vers, None)
430
raise httplib.UnknownProtocol(proto_vers)
431
self.host = 'localhost'
433
super(HttpServer, self).__init__((self.host, self.port),
436
self.protocol_version = proto_vers
437
# Allows tests to verify number of GET requests issued
438
self.GET_request_nb = 0
439
self._http_base_url = None
442
def create_server(self):
443
return self.server_class(
444
(self.host, self.port), self.request_handler_class, self)
446
def _get_remote_url(self, path):
447
path_parts = path.split(os.path.sep)
448
if os.path.isabs(path):
449
if path_parts[:len(self._local_path_parts)] != \
450
self._local_path_parts:
451
raise BadWebserverPath(path, self.test_dir)
452
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
454
remote_path = '/'.join(path_parts)
456
return self._http_base_url + remote_path
458
def log(self, format, *args):
459
"""Capture Server log output."""
460
self.logs.append(format % args)
462
def start_server(self, backing_transport_server=None):
463
"""See breezy.transport.Server.start_server.
465
:param backing_transport_server: The transport that requests over this
466
protocol should be forwarded to. Note that this is currently not
469
# XXX: TODO: make the server back onto vfs_server rather than local
471
if not (backing_transport_server is None
472
or isinstance(backing_transport_server,
473
test_server.LocalURLServer)):
474
raise AssertionError(
475
"HTTPServer currently assumes local transport, got %s" %
476
backing_transport_server)
477
self._home_dir = osutils.getcwd()
478
self._local_path_parts = self._home_dir.split(os.path.sep)
481
super(HttpServer, self).start_server()
482
self._http_base_url = '%s://%s:%s/' % (
483
self._url_protocol, self.host, self.port)
486
"""See breezy.transport.Server.get_url."""
487
return self._get_remote_url(self._home_dir)
489
def get_bogus_url(self):
490
"""See breezy.transport.Server.get_bogus_url."""
491
# this is chosen to try to prevent trouble with proxies, weird dns,
493
return self._url_protocol + '://127.0.0.1:1/'
496
class HttpServer_urllib(HttpServer):
497
"""Subclass of HttpServer that gives http+urllib urls.
499
This is for use in testing: connections to this server will always go
500
through urllib where possible.
503
# urls returned by this server should require the urllib client impl
504
_url_protocol = 'http+urllib'
507
class HttpServer_PyCurl(HttpServer):
508
"""Subclass of HttpServer that gives http+pycurl urls.
510
This is for use in testing: connections to this server will always go
511
through pycurl where possible.
514
# We don't care about checking the pycurl availability as
515
# this server will be required only when pycurl is present
517
# urls returned by this server should require the pycurl client impl
518
_url_protocol = 'http+pycurl'