1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
from SimpleHTTPServer import SimpleHTTPRequestHandler
31
from bzrlib.transport import Server
32
from bzrlib.transport.local import LocalURLServer
35
class WebserverNotAvailable(Exception):
39
class BadWebserverPath(ValueError):
41
return 'path %s is not in %s' % self.args
44
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
45
"""Handles one request.
47
A TestingHTTPRequestHandler is instantiated for every request
48
received by the associated server.
51
def log_message(self, format, *args):
52
tcs = self.server.test_case_server
53
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
54
self.address_string(),
55
self.log_date_time_string(),
57
self.headers.get('referer', '-'),
58
self.headers.get('user-agent', '-'))
60
def handle_one_request(self):
61
"""Handle a single HTTP request.
63
We catch all socket errors occurring when the client close the
64
connection early to avoid polluting the test results.
67
SimpleHTTPRequestHandler.handle_one_request(self)
68
except socket.error, e:
70
and e.args[0] in (errno.EPIPE, errno.ECONNRESET,
71
errno.ECONNABORTED,)):
72
self.close_connection = 1
77
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
78
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
80
def parse_ranges(self, ranges_header):
81
"""Parse the range header value and returns ranges and tail.
83
RFC2616 14.35 says that syntactically invalid range
84
specifiers MUST be ignored. In that case, we return 0 for
85
tail and [] for ranges.
89
if not ranges_header.startswith('bytes='):
90
# Syntactically invalid header
93
ranges_header = ranges_header[len('bytes='):]
94
for range_str in ranges_header.split(','):
95
# FIXME: RFC2616 says end is optional and default to file_size
96
range_match = self._range_regexp.match(range_str)
97
if range_match is not None:
98
start = int(range_match.group('start'))
99
end = int(range_match.group('end'))
101
# Syntactically invalid range
103
ranges.append((start, end))
105
tail_match = self._tail_regexp.match(range_str)
106
if tail_match is not None:
107
tail = int(tail_match.group('tail'))
109
# Syntactically invalid range
113
def send_range_content(self, file, start, length):
115
self.wfile.write(file.read(length))
117
def get_single_range(self, file, file_size, start, end):
118
self.send_response(206)
119
length = end - start + 1
120
self.send_header('Accept-Ranges', 'bytes')
121
self.send_header("Content-Length", "%d" % length)
123
self.send_header("Content-Type", 'application/octet-stream')
124
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
128
self.send_range_content(file, start, length)
130
def get_multiple_ranges(self, file, file_size, ranges):
131
self.send_response(206)
132
self.send_header('Accept-Ranges', 'bytes')
133
boundary = "%d" % random.randint(0,0x7FFFFFFF)
134
self.send_header("Content-Type",
135
"multipart/byteranges; boundary=%s" % boundary)
137
for (start, end) in ranges:
138
self.wfile.write("--%s\r\n" % boundary)
139
self.send_header("Content-type", 'application/octet-stream')
140
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
144
self.send_range_content(file, start, end - start + 1)
145
self.wfile.write("--%s\r\n" % boundary)
148
"""Serve a GET request.
150
Handles the Range header.
153
self.server.test_case_server.GET_request_nb += 1
155
path = self.translate_path(self.path)
156
ranges_header_value = self.headers.get('Range')
157
if ranges_header_value is None or os.path.isdir(path):
158
# Let the mother class handle most cases
159
return SimpleHTTPRequestHandler.do_GET(self)
162
# Always read in binary mode. Opening files in text
163
# mode may cause newline translations, making the
164
# actual size of the content transmitted *less* than
165
# the content-length!
166
file = open(path, 'rb')
168
self.send_error(404, "File not found")
171
file_size = os.fstat(file.fileno())[6]
172
tail, ranges = self.parse_ranges(ranges_header_value)
173
# Normalize tail into ranges
175
ranges.append((file_size - tail, file_size))
177
self._satisfiable_ranges = True
179
self._satisfiable_ranges = False
181
def check_range(range_specifier):
182
start, end = range_specifier
183
# RFC2616 14.35, ranges are invalid if start >= file_size
184
if start >= file_size:
185
self._satisfiable_ranges = False # Side-effect !
187
# RFC2616 14.35, end values should be truncated
188
# to file_size -1 if they exceed it
189
end = min(end, file_size - 1)
192
ranges = map(check_range, ranges)
194
if not self._satisfiable_ranges:
195
# RFC2616 14.16 and 14.35 says that when a server
196
# encounters unsatisfiable range specifiers, it
197
# SHOULD return a 416.
199
# FIXME: We SHOULD send a Content-Range header too,
200
# but the implementation of send_error does not
201
# allows that. So far.
202
self.send_error(416, "Requested range not satisfiable")
206
(start, end) = ranges[0]
207
self.get_single_range(file, file_size, start, end)
209
self.get_multiple_ranges(file, file_size, ranges)
212
def translate_path(self, path):
213
"""Translate a /-separated PATH to the local filename syntax.
215
If the server requires it, proxy the path before the usual translation
217
if self.server.test_case_server.proxy_requests:
218
# We need to act as a proxy and accept absolute urls,
219
# which SimpleHTTPRequestHandler (parent) is not
220
# ready for. So we just drop the protocol://host:port
221
# part in front of the request-url (because we know
222
# we would not forward the request to *another*
225
# So we do what SimpleHTTPRequestHandler.translate_path
226
# do beginning with python 2.4.3: abandon query
227
# parameters, scheme, host port, etc (which ensure we
228
# provide the right behaviour on all python versions).
229
path = urlparse.urlparse(path)[2]
230
# And now, we can apply *our* trick to proxy files
233
return self._translate_path(path)
235
def _translate_path(self, path):
236
return SimpleHTTPRequestHandler.translate_path(self, path)
238
if sys.platform == 'win32':
239
# On win32 you cannot access non-ascii filenames without
240
# decoding them into unicode first.
241
# However, under Linux, you can access bytestream paths
242
# without any problems. If this function was always active
243
# it would probably break tests when LANG=C was set
244
def _translate_path(self, path):
245
"""Translate a /-separated PATH to the local filename syntax.
247
For bzr, all url paths are considered to be utf8 paths.
248
On Linux, you can access these paths directly over the bytestream
249
request, but on win32, you must decode them, and access them
252
# abandon query parameters
253
path = urlparse.urlparse(path)[2]
254
path = posixpath.normpath(urllib.unquote(path))
255
path = path.decode('utf-8')
256
words = path.split('/')
257
words = filter(None, words)
260
drive, word = os.path.splitdrive(word)
261
head, word = os.path.split(word)
262
if word in (os.curdir, os.pardir): continue
263
path = os.path.join(path, word)
267
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
269
def __init__(self, server_address, RequestHandlerClass,
271
BaseHTTPServer.HTTPServer.__init__(self, server_address,
273
# test_case_server can be used to communicate between the
274
# tests and the server (or the request handler and the
275
# server), allowing dynamic behaviors to be defined from
277
self.test_case_server = test_case_server
279
def server_close(self):
280
"""Called to clean-up the server.
282
Since the server may be in a blocking read, we shutdown the socket
285
self.socket.shutdown(socket.SHUT_RDWR)
286
BaseHTTPServer.HTTPServer.server_close(self)
289
class HttpServer(Server):
290
"""A test server for http transports.
292
Subclasses can provide a specific request handler.
295
# Whether or not we proxy the requests (see
296
# TestingHTTPRequestHandler.translate_path).
297
proxy_requests = False
299
# used to form the url that connects to this server
300
_url_protocol = 'http'
302
# Subclasses can provide a specific request handler
303
def __init__(self, request_handler=TestingHTTPRequestHandler):
304
Server.__init__(self)
305
self.request_handler = request_handler
306
self.host = 'localhost'
309
# Allows tests to verify number of GET requests issued
310
self.GET_request_nb = 0
312
def _get_httpd(self):
313
if self._httpd is None:
314
self._httpd = TestingHTTPServer((self.host, self.port),
315
self.request_handler,
317
host, self.port = self._httpd.socket.getsockname()
320
def _http_start(self):
321
httpd = self._get_httpd()
322
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
325
self._http_starting.release()
327
while self._http_running:
329
httpd.handle_request()
330
except socket.timeout:
333
def _get_remote_url(self, path):
334
path_parts = path.split(os.path.sep)
335
if os.path.isabs(path):
336
if path_parts[:len(self._local_path_parts)] != \
337
self._local_path_parts:
338
raise BadWebserverPath(path, self.test_dir)
339
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
341
remote_path = '/'.join(path_parts)
343
return self._http_base_url + remote_path
345
def log(self, format, *args):
346
"""Capture Server log output."""
347
self.logs.append(format % args)
349
def setUp(self, backing_transport_server=None):
350
"""See bzrlib.transport.Server.setUp.
352
:param backing_transport_server: The transport that requests over this
353
protocol should be forwarded to. Note that this is currently not
356
# XXX: TODO: make the server back onto vfs_server rather than local
358
assert backing_transport_server is None or \
359
isinstance(backing_transport_server, LocalURLServer), \
360
"HTTPServer currently assumes local transport, got %s" % \
361
backing_transport_server
362
self._home_dir = os.getcwdu()
363
self._local_path_parts = self._home_dir.split(os.path.sep)
364
self._http_starting = threading.Lock()
365
self._http_starting.acquire()
366
self._http_running = True
367
self._http_base_url = None
368
self._http_thread = threading.Thread(target=self._http_start)
369
self._http_thread.setDaemon(True)
370
self._http_thread.start()
371
# Wait for the server thread to start (i.e release the lock)
372
self._http_starting.acquire()
373
self._http_starting.release()
377
"""See bzrlib.transport.Server.tearDown."""
378
self._httpd.server_close()
379
self._http_running = False
380
self._http_thread.join()
383
"""See bzrlib.transport.Server.get_url."""
384
return self._get_remote_url(self._home_dir)
386
def get_bogus_url(self):
387
"""See bzrlib.transport.Server.get_bogus_url."""
388
# this is chosen to try to prevent trouble with proxies, weird dns,
390
return 'http://127.0.0.1:1/'
393
class HttpServer_urllib(HttpServer):
394
"""Subclass of HttpServer that gives http+urllib urls.
396
This is for use in testing: connections to this server will always go
397
through urllib where possible.
400
# urls returned by this server should require the urllib client impl
401
_url_protocol = 'http+urllib'
404
class HttpServer_PyCurl(HttpServer):
405
"""Subclass of HttpServer that gives http+pycurl urls.
407
This is for use in testing: connections to this server will always go
408
through pycurl where possible.
411
# We don't care about checking the pycurl availability as
412
# this server will be required only when pycurl is present
414
# urls returned by this server should require the pycurl client impl
415
_url_protocol = 'http+pycurl'