/brz/remove-bazaar : contents of bzrlib/tests/HttpServer.py at revision 2182.3.4

: (revision 2182.3.4)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

# Copyright (C) 2006 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import BaseHTTPServer
import errno
import os
from SimpleHTTPServer import SimpleHTTPRequestHandler
import socket
import posixpath
import random
import re
import sys
import threading
import time
import urllib
import urlparse

from bzrlib.transport import Server


class WebserverNotAvailable(Exception):
    pass


class BadWebserverPath(ValueError):
    def __str__(self):
        return 'path %s is not in %s' % self.args


class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):

    def log_message(self, format, *args):
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
                                  self.address_string(),
                                  self.log_date_time_string(),
                                  format % args,
                                  self.headers.get('referer', '-'),
                                  self.headers.get('user-agent', '-'))

    def handle_one_request(self):
        """Handle a single HTTP request.

        You normally don't need to override this method; see the class
        __doc__ string for information on how to handle specific HTTP
        commands such as GET and POST.

        """
        for i in xrange(1,11): # Don't try more than 10 times
            try:
                self.raw_requestline = self.rfile.readline()
            except socket.error, e:
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
                    # omitted for now because some tests look at the log of
                    # the server and expect to see no errors.  see recent
                    # email thread. -- mbp 20051021. 
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
                    time.sleep(0.01)
                    continue
                raise
            else:
                break
        if not self.raw_requestline:
            self.close_connection = 1
            return
        if not self.parse_request(): # An error code has been sent, just exit
            return
        mname = 'do_' + self.command
        if getattr(self, mname, None) is None:
            self.send_error(501, "Unsupported method (%r)" % self.command)
            return
        method = getattr(self, mname)
        method()

    _range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
    _tail_regexp = re.compile(r'^-(?P<tail>\d+)$')

    def parse_ranges(self, ranges_header):
        """Parse the range header value and returns ranges and tail"""
        tail = 0
        ranges = []
        assert ranges_header.startswith('bytes=')
        ranges_header = ranges_header[len('bytes='):]
        for range_str in ranges_header.split(','):
            range_match = self._range_regexp.match(range_str)
            if range_match is not None:
                ranges.append((int(range_match.group('start')),
                               int(range_match.group('end'))))
            else:
                tail_match = self._tail_regexp.match(range_str)
                if tail_match is not None:
                    tail = int(tail_match.group('tail'))
        return tail, ranges

    def send_range_content(self, file, start, length):
        file.seek(start)
        self.wfile.write(file.read(length))

    def get_single_range(self, file, file_size, start, end):
        self.send_response(206)
        length = end - start + 1
        self.send_header('Accept-Ranges', 'bytes')
        self.send_header("Content-Length", "%d" % length)

        self.send_header("Content-Type", 'application/octet-stream')
        self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
                                                              end,
                                                              file_size))
        self.end_headers()
        self.send_range_content(file, start, length)

    def get_multiple_ranges(self, file, file_size, ranges):
        self.send_response(206)
        self.send_header('Accept-Ranges', 'bytes')
        boundary = "%d" % random.randint(0,0x7FFFFFFF)
        self.send_header("Content-Type",
                         "multipart/byteranges; boundary=%s" % boundary)
        self.end_headers()
        for (start, end) in ranges:
            self.wfile.write("--%s\r\n" % boundary)
            self.send_header("Content-type", 'application/octet-stream')
            self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
                                                                  end,
                                                                  file_size))
            self.end_headers()
            self.send_range_content(file, start, end - start + 1)
            self.wfile.write("--%s\r\n" % boundary)
            pass

    def do_GET(self):
        """Serve a GET request.

        Handles the Range header.
        """

        path = self.translate_path(self.path)
        ranges_header_value = self.headers.get('Range')
        if ranges_header_value is None or os.path.isdir(path):
            # Let the mother class handle most cases
            return SimpleHTTPRequestHandler.do_GET(self)

        try:
            # Always read in binary mode. Opening files in text
            # mode may cause newline translations, making the
            # actual size of the content transmitted *less* than
            # the content-length!
            file = open(path, 'rb')
        except IOError:
            self.send_error(404, "File not found")
            return

        file_size = os.fstat(file.fileno())[6]
        tail, ranges = self.parse_ranges(ranges_header_value)
        # Normalize tail into ranges
        if tail != 0:
            ranges.append((file_size - tail, file_size))

        ranges_valid = True
        if len(ranges) == 0:
            ranges_valid = False
        else:
            for (start, end) in ranges:
                if start >= file_size or end >= file_size:
                    ranges_valid = False
                    break
        if not ranges_valid:
            # RFC2616 14.35 says that invalid Range headers must
            # be ignored. If they are, the whole file should be
            # returned as though no Range header was present. If
            # they aren't, the server should return a 416 error.
            # FIXME: per 14.35, ranges are only invalid if start > end.
            # end values should be truncated to file_size -1 if they exceed it.
            # only start values >= file_size should produce a 416.
            file.close()
            self.send_error(416, "Requested range not satisfiable")
            return

        if len(ranges) == 1:
            (start, end) = ranges[0]
            self.get_single_range(file, file_size, start, end)
        else:
            self.get_multiple_ranges(file, file_size, ranges)
        file.close()

    if sys.platform == 'win32':
        # On win32 you cannot access non-ascii filenames without
        # decoding them into unicode first.
        # However, under Linux, you can access bytestream paths
        # without any problems. If this function was always active
        # it would probably break tests when LANG=C was set
        def translate_path(self, path):
            """Translate a /-separated PATH to the local filename syntax.

            For bzr, all url paths are considered to be utf8 paths.
            On Linux, you can access these paths directly over the bytestream
            request, but on win32, you must decode them, and access them
            as Unicode files.
            """
            # abandon query parameters
            path = urlparse.urlparse(path)[2]
            path = posixpath.normpath(urllib.unquote(path))
            path = path.decode('utf-8')
            words = path.split('/')
            words = filter(None, words)
            path = os.getcwdu()
            for word in words:
                drive, word = os.path.splitdrive(word)
                head, word = os.path.split(word)
                if word in (os.curdir, os.pardir): continue
                path = os.path.join(path, word)
            return path


class TestingHTTPServer(BaseHTTPServer.HTTPServer):
    def __init__(self, server_address, RequestHandlerClass, test_case):
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
                                                RequestHandlerClass)
        self.test_case = test_case


class HttpServer(Server):
    """A test server for http transports.

    Subclasses can provide a specific request handler.
    """

    # used to form the url that connects to this server
    _url_protocol = 'http'

    # Subclasses can provide a specific request handler
    def __init__(self, request_handler=TestingHTTPRequestHandler):
        Server.__init__(self)
        self.request_handler = request_handler

    def _get_httpd(self):
        return TestingHTTPServer(('localhost', 0),
                                  self.request_handler,
                                  self)

    def _http_start(self):
        httpd = None
        httpd = self._get_httpd()
        host, port = httpd.socket.getsockname()
        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
        self._http_starting.release()
        httpd.socket.settimeout(0.1)

        while self._http_running:
            try:
                httpd.handle_request()
            except socket.timeout:
                pass

    def _get_remote_url(self, path):
        path_parts = path.split(os.path.sep)
        if os.path.isabs(path):
            if path_parts[:len(self._local_path_parts)] != \
                   self._local_path_parts:
                raise BadWebserverPath(path, self.test_dir)
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
        else:
            remote_path = '/'.join(path_parts)

        self._http_starting.acquire()
        self._http_starting.release()
        return self._http_base_url + remote_path

    def log(self, format, *args):
        """Capture Server log output."""
        self.logs.append(format % args)

    def setUp(self):
        """See bzrlib.transport.Server.setUp."""
        self._home_dir = os.getcwdu()
        self._local_path_parts = self._home_dir.split(os.path.sep)
        self._http_starting = threading.Lock()
        self._http_starting.acquire()
        self._http_running = True
        self._http_base_url = None
        self._http_thread = threading.Thread(target=self._http_start)
        self._http_thread.setDaemon(True)
        self._http_thread.start()
        self._http_proxy = os.environ.get("http_proxy")
        if self._http_proxy is not None:
            del os.environ["http_proxy"]
        self.logs = []

    def tearDown(self):
        """See bzrlib.transport.Server.tearDown."""
        self._http_running = False
        self._http_thread.join()
        if self._http_proxy is not None:
            import os
            os.environ["http_proxy"] = self._http_proxy

    def get_url(self):
        """See bzrlib.transport.Server.get_url."""
        return self._get_remote_url(self._home_dir)

    def get_bogus_url(self):
        """See bzrlib.transport.Server.get_bogus_url."""
        # this is chosen to try to prevent trouble with proxies, weird dns,
        # etc
        return 'http://127.0.0.1:1/'


class HttpServer_urllib(HttpServer):
    """Subclass of HttpServer that gives http+urllib urls.

    This is for use in testing: connections to this server will always go
    through urllib where possible.
    """

    # urls returned by this server should require the urllib client impl
    _url_protocol = 'http+urllib'


class HttpServer_PyCurl(HttpServer):
    """Subclass of HttpServer that gives http+pycurl urls.

    This is for use in testing: connections to this server will always go
    through pycurl where possible.
    """

    # We don't care about checking the pycurl availability as
    # this server will be required only when pycurl is present

    # urls returned by this server should require the pycurl client impl
    _url_protocol = 'http+pycurl'

2004.1.40 by v.ladeuil+lp at free Fix the race condition again and correct some small typos to be in	1	# Copyright (C) 2006 Canonical Ltd
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
	17	import BaseHTTPServer
	18	import errno
	19	import os
	20	from SimpleHTTPServer import SimpleHTTPRequestHandler
	21	import socket
2146.1.1 by Alexander Belchenko fixes for test suite: forgotten imports in HttpServer.py	22	import posixpath
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	23	import random
	24	import re
	25	import sys
	26	import threading
	27	import time
2146.1.1 by Alexander Belchenko fixes for test suite: forgotten imports in HttpServer.py	28	import urllib
	29	import urlparse
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	30
	31	from bzrlib.transport import Server
	32
	33
	34	class WebserverNotAvailable(Exception):
	35	pass
	36
	37
	38	class BadWebserverPath(ValueError):
	39	def __str__(self):
	40	return 'path %s is not in %s' % self.args
	41
	42
	43	class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
	44
	45	def log_message(self, format, *args):
	46	self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
	47	self.address_string(),
	48	self.log_date_time_string(),
	49	format % args,
	50	self.headers.get('referer', '-'),
	51	self.headers.get('user-agent', '-'))
	52
	53	def handle_one_request(self):
	54	"""Handle a single HTTP request.
	55
	56	You normally don't need to override this method; see the class
	57	__doc__ string for information on how to handle specific HTTP
	58	commands such as GET and POST.
	59
	60	"""
	61	for i in xrange(1,11): # Don't try more than 10 times
	62	try:
	63	self.raw_requestline = self.rfile.readline()
	64	except socket.error, e:
	65	if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
	66	# omitted for now because some tests look at the log of
	67	# the server and expect to see no errors. see recent
	68	# email thread. -- mbp 20051021.
	69	## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
	70	time.sleep(0.01)
	71	continue
	72	raise
	73	else:
	74	break
	75	if not self.raw_requestline:
	76	self.close_connection = 1
	77	return
	78	if not self.parse_request(): # An error code has been sent, just exit
	79	return
	80	mname = 'do_' + self.command
	81	if getattr(self, mname, None) is None:
	82	self.send_error(501, "Unsupported method (%r)" % self.command)
	83	return
	84	method = getattr(self, mname)
	85	method()
	86
	87	_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
	88	_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
	89
	90	def parse_ranges(self, ranges_header):
	91	"""Parse the range header value and returns ranges and tail"""
	92	tail = 0
	93	ranges = []
94	assert ranges_header.startswith('bytes=')
95	ranges_header = ranges_header[len('bytes='):]
96	for range_str in ranges_header.split(','):
97	range_match = self._range_regexp.match(range_str)
98	if range_match is not None:
99	ranges.append((int(range_match.group('start')),
100	int(range_match.group('end'))))
101	else:
102	tail_match = self._tail_regexp.match(range_str)
103	if tail_match is not None:
104	tail = int(tail_match.group('tail'))
105	return tail, ranges
106
107	def send_range_content(self, file, start, length):
108	file.seek(start)
109	self.wfile.write(file.read(length))
110
111	def get_single_range(self, file, file_size, start, end):
112	self.send_response(206)
113	length = end - start + 1
114	self.send_header('Accept-Ranges', 'bytes')
115	self.send_header("Content-Length", "%d" % length)
116
117	self.send_header("Content-Type", 'application/octet-stream')
118	self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
119	end,
120	file_size))
121	self.end_headers()
122	self.send_range_content(file, start, length)
123
124	def get_multiple_ranges(self, file, file_size, ranges):
125	self.send_response(206)
126	self.send_header('Accept-Ranges', 'bytes')
127	boundary = "%d" % random.randint(0,0x7FFFFFFF)
128	self.send_header("Content-Type",
129	"multipart/byteranges; boundary=%s" % boundary)
130	self.end_headers()
131	for (start, end) in ranges:
132	self.wfile.write("--%s\r\n" % boundary)
133	self.send_header("Content-type", 'application/octet-stream')
134	self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
135	end,
136	file_size))
137	self.end_headers()
138	self.send_range_content(file, start, end - start + 1)
139	self.wfile.write("--%s\r\n" % boundary)
140	pass
141
142	def do_GET(self):
143	"""Serve a GET request.
144
145	Handles the Range header.
146	"""
147
148	path = self.translate_path(self.path)
149	ranges_header_value = self.headers.get('Range')
150	if ranges_header_value is None or os.path.isdir(path):
151	# Let the mother class handle most cases
152	return SimpleHTTPRequestHandler.do_GET(self)
153
154	try:
155	# Always read in binary mode. Opening files in text
156	# mode may cause newline translations, making the
157	# actual size of the content transmitted less than
158	# the content-length!
159	file = open(path, 'rb')
160	except IOError:
161	self.send_error(404, "File not found")
2000.3.9 by v.ladeuil+lp at free The tests that would have help avoid bug #73948 and all that mess :)	162	return
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	163
	164	file_size = os.fstat(file.fileno())[6]
	165	tail, ranges = self.parse_ranges(ranges_header_value)
	166	# Normalize tail into ranges
	167	if tail != 0:
	168	ranges.append((file_size - tail, file_size))
	169
	170	ranges_valid = True
	171	if len(ranges) == 0:
	172	ranges_valid = False
	173	else:
	174	for (start, end) in ranges:
	175	if start >= file_size or end >= file_size:
	176	ranges_valid = False
	177	break
	178	if not ranges_valid:
2180.1.2 by Aaron Bentley Grammar fixes	179	# RFC2616 14.35 says that invalid Range headers must
2172.3.1 by v.ladeuil+lp at free Merge a recent bzr.dev (2172) and takes John's remarks into account.	180	# be ignored. If they are, the whole file should be
	181	# returned as though no Range header was present. If
	182	# they aren't, the server should return a 416 error.
2180.1.2 by Aaron Bentley Grammar fixes	183	# FIXME: per 14.35, ranges are only invalid if start > end.
	184	# end values should be truncated to file_size -1 if they exceed it.
	185	# only start values >= file_size should produce a 416.
2000.3.9 by v.ladeuil+lp at free The tests that would have help avoid bug #73948 and all that mess :)	186	file.close()
	187	self.send_error(416, "Requested range not satisfiable")
	188	return
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	189
	190	if len(ranges) == 1:
	191	(start, end) = ranges[0]
	192	self.get_single_range(file, file_size, start, end)
	193	else:
	194	self.get_multiple_ranges(file, file_size, ranges)
	195	file.close()
	196
	197	if sys.platform == 'win32':
	198	# On win32 you cannot access non-ascii filenames without
	199	# decoding them into unicode first.
	200	# However, under Linux, you can access bytestream paths
	201	# without any problems. If this function was always active
	202	# it would probably break tests when LANG=C was set
	203	def translate_path(self, path):
	204	"""Translate a /-separated PATH to the local filename syntax.
	205
	206	For bzr, all url paths are considered to be utf8 paths.
	207	On Linux, you can access these paths directly over the bytestream
	208	request, but on win32, you must decode them, and access them
	209	as Unicode files.
	210	"""
	211	# abandon query parameters
	212	path = urlparse.urlparse(path)[2]
	213	path = posixpath.normpath(urllib.unquote(path))
	214	path = path.decode('utf-8')
	215	words = path.split('/')
	216	words = filter(None, words)
	217	path = os.getcwdu()
	218	for word in words:
	219	drive, word = os.path.splitdrive(word)
	220	head, word = os.path.split(word)
	221	if word in (os.curdir, os.pardir): continue
	222	path = os.path.join(path, word)
	223	return path
	224
	225
	226	class TestingHTTPServer(BaseHTTPServer.HTTPServer):
	227	def __init__(self, server_address, RequestHandlerClass, test_case):
	228	BaseHTTPServer.HTTPServer.__init__(self, server_address,
	229	RequestHandlerClass)
	230	self.test_case = test_case
	231
	232
	233	class HttpServer(Server):
	234	"""A test server for http transports.
	235
	236	Subclasses can provide a specific request handler.
	237	"""
	238
	239	# used to form the url that connects to this server
	240	_url_protocol = 'http'
	241
	242	# Subclasses can provide a specific request handler
	243	def __init__(self, request_handler=TestingHTTPRequestHandler):
	244	Server.__init__(self)
	245	self.request_handler = request_handler
	246
2004.1.28 by v.ladeuil+lp at free Merge bzr.dev. Including http modifications by "smart" related code	247	def _get_httpd(self):
	248	return TestingHTTPServer(('localhost', 0),
	249	self.request_handler,
	250	self)
	251
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	252	def _http_start(self):
	253	httpd = None
2004.1.28 by v.ladeuil+lp at free Merge bzr.dev. Including http modifications by "smart" related code	254	httpd = self._get_httpd()
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	255	host, port = httpd.socket.getsockname()
	256	self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
	257	self._http_starting.release()
	258	httpd.socket.settimeout(0.1)
	259
	260	while self._http_running:
	261	try:
	262	httpd.handle_request()
	263	except socket.timeout:
	264	pass
	265
	266	def _get_remote_url(self, path):
	267	path_parts = path.split(os.path.sep)
	268	if os.path.isabs(path):
	269	if path_parts[:len(self._local_path_parts)] != \
	270	self._local_path_parts:
	271	raise BadWebserverPath(path, self.test_dir)
	272	remote_path = '/'.join(path_parts[len(self._local_path_parts):])
	273	else:
	274	remote_path = '/'.join(path_parts)
	275
	276	self._http_starting.acquire()
	277	self._http_starting.release()
	278	return self._http_base_url + remote_path
	279
	280	def log(self, format, *args):
	281	"""Capture Server log output."""
	282	self.logs.append(format % args)
	283
	284	def setUp(self):
	285	"""See bzrlib.transport.Server.setUp."""
	286	self._home_dir = os.getcwdu()
	287	self._local_path_parts = self._home_dir.split(os.path.sep)
	288	self._http_starting = threading.Lock()
	289	self._http_starting.acquire()
	290	self._http_running = True
	291	self._http_base_url = None
	292	self._http_thread = threading.Thread(target=self._http_start)
	293	self._http_thread.setDaemon(True)
	294	self._http_thread.start()
	295	self._http_proxy = os.environ.get("http_proxy")
	296	if self._http_proxy is not None:
	297	del os.environ["http_proxy"]
	298	self.logs = []
	299
	300	def tearDown(self):
	301	"""See bzrlib.transport.Server.tearDown."""
	302	self._http_running = False
	303	self._http_thread.join()
	304	if self._http_proxy is not None:
	305	import os
	306	os.environ["http_proxy"] = self._http_proxy
	307
	308	def get_url(self):
	309	"""See bzrlib.transport.Server.get_url."""
	310	return self._get_remote_url(self._home_dir)
	311
	312	def get_bogus_url(self):
	313	"""See bzrlib.transport.Server.get_bogus_url."""
	314	# this is chosen to try to prevent trouble with proxies, weird dns,
	315	# etc
	316	return 'http://127.0.0.1:1/'
	317
	318
319	class HttpServer_urllib(HttpServer):
320	"""Subclass of HttpServer that gives http+urllib urls.
321
322	This is for use in testing: connections to this server will always go
323	through urllib where possible.
324	"""
325
326	# urls returned by this server should require the urllib client impl
327	_url_protocol = 'http+urllib'
328
329
330	class HttpServer_PyCurl(HttpServer):
331	"""Subclass of HttpServer that gives http+pycurl urls.
332
333	This is for use in testing: connections to this server will always go
334	through pycurl where possible.
335	"""
336
337	# We don't care about checking the pycurl availability as
338	# this server will be required only when pycurl is present
339
340	# urls returned by this server should require the pycurl client impl
341	_url_protocol = 'http+pycurl'