/brz/remove-bazaar : contents of bzrlib/tests/HttpServer.py at revision 2000.3.9

: (revision 2000.3.9)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

# Copyright (C) 2006 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import BaseHTTPServer
import errno
import os
from SimpleHTTPServer import SimpleHTTPRequestHandler
import socket
import posixpath
import random
import re
import sys
import threading
import time
import urllib
import urlparse

from bzrlib.transport import Server


class WebserverNotAvailable(Exception):
    pass


class BadWebserverPath(ValueError):
    def __str__(self):
        return 'path %s is not in %s' % self.args


class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):

    def log_message(self, format, *args):
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
                                  self.address_string(),
                                  self.log_date_time_string(),
                                  format % args,
                                  self.headers.get('referer', '-'),
                                  self.headers.get('user-agent', '-'))

    def handle_one_request(self):
        """Handle a single HTTP request.

        You normally don't need to override this method; see the class
        __doc__ string for information on how to handle specific HTTP
        commands such as GET and POST.

        """
        for i in xrange(1,11): # Don't try more than 10 times
            try:
                self.raw_requestline = self.rfile.readline()
            except socket.error, e:
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
                    # omitted for now because some tests look at the log of
                    # the server and expect to see no errors.  see recent
                    # email thread. -- mbp 20051021. 
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
                    time.sleep(0.01)
                    continue
                raise
            else:
                break
        if not self.raw_requestline:
            self.close_connection = 1
            return
        if not self.parse_request(): # An error code has been sent, just exit
            return
        mname = 'do_' + self.command
        if getattr(self, mname, None) is None:
            self.send_error(501, "Unsupported method (%r)" % self.command)
            return
        method = getattr(self, mname)
        method()

    _range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
    _tail_regexp = re.compile(r'^-(?P<tail>\d+)$')

    def parse_ranges(self, ranges_header):
        """Parse the range header value and returns ranges and tail"""
        tail = 0
        ranges = []
        assert ranges_header.startswith('bytes=')
        ranges_header = ranges_header[len('bytes='):]
        for range_str in ranges_header.split(','):
            range_match = self._range_regexp.match(range_str)
            if range_match is not None:
                ranges.append((int(range_match.group('start')),
                               int(range_match.group('end'))))
            else:
                tail_match = self._tail_regexp.match(range_str)
                if tail_match is not None:
                    tail = int(tail_match.group('tail'))
        return tail, ranges

    def send_range_content(self, file, start, length):
        file.seek(start)
        self.wfile.write(file.read(length))

    def get_single_range(self, file, file_size, start, end):
        self.send_response(206)
        length = end - start + 1
        self.send_header('Accept-Ranges', 'bytes')
        self.send_header("Content-Length", "%d" % length)

        self.send_header("Content-Type", 'application/octet-stream')
        self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
                                                              end,
                                                              file_size))
        self.end_headers()
        self.send_range_content(file, start, length)

    def get_multiple_ranges(self, file, file_size, ranges):
        self.send_response(206)
        self.send_header('Accept-Ranges', 'bytes')
        boundary = "%d" % random.randint(0,0x7FFFFFFF)
        self.send_header("Content-Type",
                         "multipart/byteranges; boundary=%s" % boundary)
        self.end_headers()
        for (start, end) in ranges:
            self.wfile.write("--%s\r\n" % boundary)
            self.send_header("Content-type", 'application/octet-stream')
            self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
                                                                  end,
                                                                  file_size))
            self.end_headers()
            self.send_range_content(file, start, end - start + 1)
            self.wfile.write("--%s\r\n" % boundary)
            pass

    def do_GET(self):
        """Serve a GET request.

        Handles the Range header.
        """

        path = self.translate_path(self.path)
        ranges_header_value = self.headers.get('Range')
        if ranges_header_value is None or os.path.isdir(path):
            # Let the mother class handle most cases
            return SimpleHTTPRequestHandler.do_GET(self)

        try:
            # Always read in binary mode. Opening files in text
            # mode may cause newline translations, making the
            # actual size of the content transmitted *less* than
            # the content-length!
            file = open(path, 'rb')
        except IOError:
            self.send_error(404, "File not found")
            return

        file_size = os.fstat(file.fileno())[6]
        tail, ranges = self.parse_ranges(ranges_header_value)
        # Normalize tail into ranges
        if tail != 0:
            ranges.append((file_size - tail, file_size))

        ranges_valid = True
        if len(ranges) == 0:
            ranges_valid = False
        else:
            for (start, end) in ranges:
                if start >= file_size or end >= file_size:
                    ranges_valid = False
                    break
        if not ranges_valid:
            # RFC2616 14-16 says that invalid Range headers could
            # be ignored and in that case, the whole file should
            # be returned as if no Range header was present. Or
            # that the server should returns a 416 error.
            file.close()
            self.send_error(416, "Requested range not satisfiable")
            return

        if len(ranges) == 1:
            (start, end) = ranges[0]
            self.get_single_range(file, file_size, start, end)
        else:
            self.get_multiple_ranges(file, file_size, ranges)
        file.close()

    if sys.platform == 'win32':
        # On win32 you cannot access non-ascii filenames without
        # decoding them into unicode first.
        # However, under Linux, you can access bytestream paths
        # without any problems. If this function was always active
        # it would probably break tests when LANG=C was set
        def translate_path(self, path):
            """Translate a /-separated PATH to the local filename syntax.

            For bzr, all url paths are considered to be utf8 paths.
            On Linux, you can access these paths directly over the bytestream
            request, but on win32, you must decode them, and access them
            as Unicode files.
            """
            # abandon query parameters
            path = urlparse.urlparse(path)[2]
            path = posixpath.normpath(urllib.unquote(path))
            path = path.decode('utf-8')
            words = path.split('/')
            words = filter(None, words)
            path = os.getcwdu()
            for word in words:
                drive, word = os.path.splitdrive(word)
                head, word = os.path.split(word)
                if word in (os.curdir, os.pardir): continue
                path = os.path.join(path, word)
            return path


class TestingHTTPServer(BaseHTTPServer.HTTPServer):
    def __init__(self, server_address, RequestHandlerClass, test_case):
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
                                                RequestHandlerClass)
        self.test_case = test_case


class HttpServer(Server):
    """A test server for http transports.

    Subclasses can provide a specific request handler.
    """

    # used to form the url that connects to this server
    _url_protocol = 'http'

    # Subclasses can provide a specific request handler
    def __init__(self, request_handler=TestingHTTPRequestHandler):
        Server.__init__(self)
        self.request_handler = request_handler

    def _get_httpd(self):
        return TestingHTTPServer(('localhost', 0),
                                  self.request_handler,
                                  self)

    def _http_start(self):
        httpd = None
        httpd = self._get_httpd()
        host, port = httpd.socket.getsockname()
        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
        self._http_starting.release()
        httpd.socket.settimeout(0.1)

        while self._http_running:
            try:
                httpd.handle_request()
            except socket.timeout:
                pass

    def _get_remote_url(self, path):
        path_parts = path.split(os.path.sep)
        if os.path.isabs(path):
            if path_parts[:len(self._local_path_parts)] != \
                   self._local_path_parts:
                raise BadWebserverPath(path, self.test_dir)
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
        else:
            remote_path = '/'.join(path_parts)

        self._http_starting.acquire()
        self._http_starting.release()
        return self._http_base_url + remote_path

    def log(self, format, *args):
        """Capture Server log output."""
        self.logs.append(format % args)

    def setUp(self):
        """See bzrlib.transport.Server.setUp."""
        self._home_dir = os.getcwdu()
        self._local_path_parts = self._home_dir.split(os.path.sep)
        self._http_starting = threading.Lock()
        self._http_starting.acquire()
        self._http_running = True
        self._http_base_url = None
        self._http_thread = threading.Thread(target=self._http_start)
        self._http_thread.setDaemon(True)
        self._http_thread.start()
        self._http_proxy = os.environ.get("http_proxy")
        if self._http_proxy is not None:
            del os.environ["http_proxy"]
        self.logs = []

    def tearDown(self):
        """See bzrlib.transport.Server.tearDown."""
        self._http_running = False
        self._http_thread.join()
        if self._http_proxy is not None:
            import os
            os.environ["http_proxy"] = self._http_proxy

    def get_url(self):
        """See bzrlib.transport.Server.get_url."""
        return self._get_remote_url(self._home_dir)

    def get_bogus_url(self):
        """See bzrlib.transport.Server.get_bogus_url."""
        # this is chosen to try to prevent trouble with proxies, weird dns,
        # etc
        return 'http://127.0.0.1:1/'


class HttpServer_urllib(HttpServer):
    """Subclass of HttpServer that gives http+urllib urls.

    This is for use in testing: connections to this server will always go
    through urllib where possible.
    """

    # urls returned by this server should require the urllib client impl
    _url_protocol = 'http+urllib'


class HttpServer_PyCurl(HttpServer):
    """Subclass of HttpServer that gives http+pycurl urls.

    This is for use in testing: connections to this server will always go
    through pycurl where possible.
    """

    # We don't care about checking the pycurl availability as
    # this server will be required only when pycurl is present

    # urls returned by this server should require the pycurl client impl
    _url_protocol = 'http+pycurl'

2004.1.40 by v.ladeuil+lp at free Fix the race condition again and correct some small typos to be in	1	# Copyright (C) 2006 Canonical Ltd
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
	17	import BaseHTTPServer
	18	import errno
	19	import os
	20	from SimpleHTTPServer import SimpleHTTPRequestHandler
	21	import socket
2146.1.1 by Alexander Belchenko fixes for test suite: forgotten imports in HttpServer.py	22	import posixpath
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	23	import random
	24	import re
	25	import sys
	26	import threading
	27	import time
2146.1.1 by Alexander Belchenko fixes for test suite: forgotten imports in HttpServer.py	28	import urllib
	29	import urlparse
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	30
	31	from bzrlib.transport import Server
	32
	33
	34	class WebserverNotAvailable(Exception):
	35	pass
	36
	37
	38	class BadWebserverPath(ValueError):
	39	def __str__(self):
	40	return 'path %s is not in %s' % self.args
	41
	42
	43	class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
	44
	45	def log_message(self, format, *args):
	46	self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
	47	self.address_string(),
	48	self.log_date_time_string(),
	49	format % args,
	50	self.headers.get('referer', '-'),
	51	self.headers.get('user-agent', '-'))
	52
	53	def handle_one_request(self):
	54	"""Handle a single HTTP request.
	55
	56	You normally don't need to override this method; see the class
	57	__doc__ string for information on how to handle specific HTTP
	58	commands such as GET and POST.
	59
	60	"""
	61	for i in xrange(1,11): # Don't try more than 10 times
	62	try:
	63	self.raw_requestline = self.rfile.readline()
	64	except socket.error, e:
	65	if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
	66	# omitted for now because some tests look at the log of
	67	# the server and expect to see no errors. see recent
	68	# email thread. -- mbp 20051021.
	69	## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
	70	time.sleep(0.01)
	71	continue
	72	raise
	73	else:
	74	break
	75	if not self.raw_requestline:
	76	self.close_connection = 1
	77	return
	78	if not self.parse_request(): # An error code has been sent, just exit
	79	return
	80	mname = 'do_' + self.command
	81	if getattr(self, mname, None) is None:
	82	self.send_error(501, "Unsupported method (%r)" % self.command)
	83	return
	84	method = getattr(self, mname)
	85	method()
	86
	87	_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
	88	_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
	89
	90	def parse_ranges(self, ranges_header):
	91	"""Parse the range header value and returns ranges and tail"""
	92	tail = 0
	93	ranges = []
94	assert ranges_header.startswith('bytes=')
95	ranges_header = ranges_header[len('bytes='):]
96	for range_str in ranges_header.split(','):
97	range_match = self._range_regexp.match(range_str)
98	if range_match is not None:
99	ranges.append((int(range_match.group('start')),
100	int(range_match.group('end'))))
101	else:
102	tail_match = self._tail_regexp.match(range_str)
103	if tail_match is not None:
104	tail = int(tail_match.group('tail'))
105	return tail, ranges
106
107	def send_range_content(self, file, start, length):
108	file.seek(start)
109	self.wfile.write(file.read(length))
110
111	def get_single_range(self, file, file_size, start, end):
112	self.send_response(206)
113	length = end - start + 1
114	self.send_header('Accept-Ranges', 'bytes')
115	self.send_header("Content-Length", "%d" % length)
116
117	self.send_header("Content-Type", 'application/octet-stream')
118	self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
119	end,
120	file_size))
121	self.end_headers()
122	self.send_range_content(file, start, length)
123
124	def get_multiple_ranges(self, file, file_size, ranges):
125	self.send_response(206)
126	self.send_header('Accept-Ranges', 'bytes')
127	boundary = "%d" % random.randint(0,0x7FFFFFFF)
128	self.send_header("Content-Type",
129	"multipart/byteranges; boundary=%s" % boundary)
130	self.end_headers()
131	for (start, end) in ranges:
132	self.wfile.write("--%s\r\n" % boundary)
133	self.send_header("Content-type", 'application/octet-stream')
134	self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
135	end,
136	file_size))
137	self.end_headers()
138	self.send_range_content(file, start, end - start + 1)
139	self.wfile.write("--%s\r\n" % boundary)
140	pass
141
142	def do_GET(self):
143	"""Serve a GET request.
144
145	Handles the Range header.
146	"""
147
148	path = self.translate_path(self.path)
149	ranges_header_value = self.headers.get('Range')
150	if ranges_header_value is None or os.path.isdir(path):
151	# Let the mother class handle most cases
152	return SimpleHTTPRequestHandler.do_GET(self)
153
154	try:
155	# Always read in binary mode. Opening files in text
156	# mode may cause newline translations, making the
157	# actual size of the content transmitted less than
158	# the content-length!
159	file = open(path, 'rb')
160	except IOError:
161	self.send_error(404, "File not found")
2000.3.9 by v.ladeuil+lp at free The tests that would have help avoid bug #73948 and all that mess :)	162	return
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	163
	164	file_size = os.fstat(file.fileno())[6]
	165	tail, ranges = self.parse_ranges(ranges_header_value)
	166	# Normalize tail into ranges
	167	if tail != 0:
	168	ranges.append((file_size - tail, file_size))
	169
	170	ranges_valid = True
	171	if len(ranges) == 0:
	172	ranges_valid = False
	173	else:
	174	for (start, end) in ranges:
	175	if start >= file_size or end >= file_size:
	176	ranges_valid = False
	177	break
	178	if not ranges_valid:
2000.3.9 by v.ladeuil+lp at free The tests that would have help avoid bug #73948 and all that mess :)	179	# RFC2616 14-16 says that invalid Range headers could
	180	# be ignored and in that case, the whole file should
	181	# be returned as if no Range header was present. Or
	182	# that the server should returns a 416 error.
	183	file.close()
	184	self.send_error(416, "Requested range not satisfiable")
	185	return
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	186
	187	if len(ranges) == 1:
	188	(start, end) = ranges[0]
	189	self.get_single_range(file, file_size, start, end)
	190	else:
	191	self.get_multiple_ranges(file, file_size, ranges)
	192	file.close()
	193
	194	if sys.platform == 'win32':
	195	# On win32 you cannot access non-ascii filenames without
	196	# decoding them into unicode first.
	197	# However, under Linux, you can access bytestream paths
	198	# without any problems. If this function was always active
	199	# it would probably break tests when LANG=C was set
	200	def translate_path(self, path):
	201	"""Translate a /-separated PATH to the local filename syntax.
	202
	203	For bzr, all url paths are considered to be utf8 paths.
	204	On Linux, you can access these paths directly over the bytestream
	205	request, but on win32, you must decode them, and access them
	206	as Unicode files.
	207	"""
	208	# abandon query parameters
	209	path = urlparse.urlparse(path)[2]
	210	path = posixpath.normpath(urllib.unquote(path))
	211	path = path.decode('utf-8')
	212	words = path.split('/')
	213	words = filter(None, words)
	214	path = os.getcwdu()
	215	for word in words:
	216	drive, word = os.path.splitdrive(word)
	217	head, word = os.path.split(word)
	218	if word in (os.curdir, os.pardir): continue
	219	path = os.path.join(path, word)
	220	return path
	221
	222
	223	class TestingHTTPServer(BaseHTTPServer.HTTPServer):
	224	def __init__(self, server_address, RequestHandlerClass, test_case):
	225	BaseHTTPServer.HTTPServer.__init__(self, server_address,
	226	RequestHandlerClass)
	227	self.test_case = test_case
	228
	229
	230	class HttpServer(Server):
	231	"""A test server for http transports.
	232
	233	Subclasses can provide a specific request handler.
	234	"""
	235
	236	# used to form the url that connects to this server
	237	_url_protocol = 'http'
	238
	239	# Subclasses can provide a specific request handler
	240	def __init__(self, request_handler=TestingHTTPRequestHandler):
	241	Server.__init__(self)
	242	self.request_handler = request_handler
	243
2004.1.28 by v.ladeuil+lp at free Merge bzr.dev. Including http modifications by "smart" related code	244	def _get_httpd(self):
	245	return TestingHTTPServer(('localhost', 0),
	246	self.request_handler,
	247	self)
	248
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	249	def _http_start(self):
	250	httpd = None
2004.1.28 by v.ladeuil+lp at free Merge bzr.dev. Including http modifications by "smart" related code	251	httpd = self._get_httpd()
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	252	host, port = httpd.socket.getsockname()
	253	self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
	254	self._http_starting.release()
	255	httpd.socket.settimeout(0.1)
	256
	257	while self._http_running:
	258	try:
	259	httpd.handle_request()
	260	except socket.timeout:
	261	pass
	262
	263	def _get_remote_url(self, path):
	264	path_parts = path.split(os.path.sep)
	265	if os.path.isabs(path):
	266	if path_parts[:len(self._local_path_parts)] != \
	267	self._local_path_parts:
	268	raise BadWebserverPath(path, self.test_dir)
	269	remote_path = '/'.join(path_parts[len(self._local_path_parts):])
	270	else:
	271	remote_path = '/'.join(path_parts)
	272
	273	self._http_starting.acquire()
	274	self._http_starting.release()
	275	return self._http_base_url + remote_path
	276
	277	def log(self, format, *args):
	278	"""Capture Server log output."""
	279	self.logs.append(format % args)
	280
	281	def setUp(self):
	282	"""See bzrlib.transport.Server.setUp."""
	283	self._home_dir = os.getcwdu()
	284	self._local_path_parts = self._home_dir.split(os.path.sep)
	285	self._http_starting = threading.Lock()
	286	self._http_starting.acquire()
	287	self._http_running = True
	288	self._http_base_url = None
	289	self._http_thread = threading.Thread(target=self._http_start)
	290	self._http_thread.setDaemon(True)
	291	self._http_thread.start()
	292	self._http_proxy = os.environ.get("http_proxy")
	293	if self._http_proxy is not None:
	294	del os.environ["http_proxy"]
	295	self.logs = []
	296
	297	def tearDown(self):
	298	"""See bzrlib.transport.Server.tearDown."""
	299	self._http_running = False
	300	self._http_thread.join()
	301	if self._http_proxy is not None:
	302	import os
	303	os.environ["http_proxy"] = self._http_proxy
	304
	305	def get_url(self):
	306	"""See bzrlib.transport.Server.get_url."""
	307	return self._get_remote_url(self._home_dir)
	308
	309	def get_bogus_url(self):
	310	"""See bzrlib.transport.Server.get_bogus_url."""
	311	# this is chosen to try to prevent trouble with proxies, weird dns,
	312	# etc
	313	return 'http://127.0.0.1:1/'
	314
	315
316	class HttpServer_urllib(HttpServer):
317	"""Subclass of HttpServer that gives http+urllib urls.
318
319	This is for use in testing: connections to this server will always go
320	through urllib where possible.
321	"""
322
323	# urls returned by this server should require the urllib client impl
324	_url_protocol = 'http+urllib'
325
326
327	class HttpServer_PyCurl(HttpServer):
328	"""Subclass of HttpServer that gives http+pycurl urls.
329
330	This is for use in testing: connections to this server will always go
331	through pycurl where possible.
332	"""
333
334	# We don't care about checking the pycurl availability as
335	# this server will be required only when pycurl is present
336
337	# urls returned by this server should require the pycurl client impl
338	_url_protocol = 'http+pycurl'