/brz/remove-bazaar : contents of bzrlib/tests/HttpServer.py at revision 2004.1.28

: (revision 2004.1.28)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

# Copyright (C) 2005 by Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import BaseHTTPServer
import errno
import os
from SimpleHTTPServer import SimpleHTTPRequestHandler
import socket
import random
import re
import sys
import threading
import time

from bzrlib.transport import Server


class WebserverNotAvailable(Exception):
    pass


class BadWebserverPath(ValueError):
    def __str__(self):
        return 'path %s is not in %s' % self.args


class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):

    def log_message(self, format, *args):
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
                                  self.address_string(),
                                  self.log_date_time_string(),
                                  format % args,
                                  self.headers.get('referer', '-'),
                                  self.headers.get('user-agent', '-'))

    def handle_one_request(self):
        """Handle a single HTTP request.

        You normally don't need to override this method; see the class
        __doc__ string for information on how to handle specific HTTP
        commands such as GET and POST.

        """
        for i in xrange(1,11): # Don't try more than 10 times
            try:
                self.raw_requestline = self.rfile.readline()
            except socket.error, e:
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
                    # omitted for now because some tests look at the log of
                    # the server and expect to see no errors.  see recent
                    # email thread. -- mbp 20051021. 
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
                    time.sleep(0.01)
                    continue
                raise
            else:
                break
        if not self.raw_requestline:
            self.close_connection = 1
            return
        if not self.parse_request(): # An error code has been sent, just exit
            return
        mname = 'do_' + self.command
        if getattr(self, mname, None) is None:
            self.send_error(501, "Unsupported method (%r)" % self.command)
            return
        method = getattr(self, mname)
        method()

    _range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
    _tail_regexp = re.compile(r'^-(?P<tail>\d+)$')

    def parse_ranges(self, ranges_header):
        """Parse the range header value and returns ranges and tail"""
        tail = 0
        ranges = []
        assert ranges_header.startswith('bytes=')
        ranges_header = ranges_header[len('bytes='):]
        for range_str in ranges_header.split(','):
            range_match = self._range_regexp.match(range_str)
            if range_match is not None:
                ranges.append((int(range_match.group('start')),
                               int(range_match.group('end'))))
            else:
                tail_match = self._tail_regexp.match(range_str)
                if tail_match is not None:
                    tail = int(tail_match.group('tail'))
        return tail, ranges

    def send_range_content(self, file, start, length):
        file.seek(start)
        self.wfile.write(file.read(length))

    def get_single_range(self, file, file_size, start, end):
        self.send_response(206)
        length = end - start + 1
        self.send_header('Accept-Ranges', 'bytes')
        self.send_header("Content-Length", "%d" % length)

        self.send_header("Content-Type", 'application/octet-stream')
        self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
                                                              end,
                                                              file_size))
        self.end_headers()
        self.send_range_content(file, start, length)

    def get_multiple_ranges(self, file, file_size, ranges):
        self.send_response(206)
        self.send_header('Accept-Ranges', 'bytes')
        boundary = "%d" % random.randint(0,0x7FFFFFFF)
        self.send_header("Content-Type",
                         "multipart/byteranges; boundary=%s" % boundary)
        self.end_headers()
        for (start, end) in ranges:
            self.wfile.write("--%s\r\n" % boundary)
            self.send_header("Content-type", 'application/octet-stream')
            self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
                                                                  end,
                                                                  file_size))
            self.end_headers()
            self.send_range_content(file, start, end - start + 1)
            self.wfile.write("--%s\r\n" % boundary)
            pass

    def do_GET(self):
        """Serve a GET request.

        Handles the Range header.
        """

        path = self.translate_path(self.path)
        ranges_header_value = self.headers.get('Range')
        if ranges_header_value is None or os.path.isdir(path):
            # Let the mother class handle most cases
            return SimpleHTTPRequestHandler.do_GET(self)

        try:
            # Always read in binary mode. Opening files in text
            # mode may cause newline translations, making the
            # actual size of the content transmitted *less* than
            # the content-length!
            file = open(path, 'rb')
        except IOError:
            self.send_error(404, "File not found")
            return None

        file_size = os.fstat(file.fileno())[6]
        tail, ranges = self.parse_ranges(ranges_header_value)
        # Normalize tail into ranges
        if tail != 0:
            ranges.append((file_size - tail, file_size))

        ranges_valid = True
        if len(ranges) == 0:
            ranges_valid = False
        else:
            for (start, end) in ranges:
                if start >= file_size or end >= file_size:
                    ranges_valid = False
                    break
        if not ranges_valid:
            # RFC2616 14-16 says that invalid Range headers
            # should be ignored and in that case, the whole file
            # should be returned as if no Range header was
            # present
            file.close() # Will be reopened by the following call
            return SimpleHTTPRequestHandler.do_GET(self)

        if len(ranges) == 1:
            (start, end) = ranges[0]
            self.get_single_range(file, file_size, start, end)
        else:
            self.get_multiple_ranges(file, file_size, ranges)
        file.close()

    if sys.platform == 'win32':
        # On win32 you cannot access non-ascii filenames without
        # decoding them into unicode first.
        # However, under Linux, you can access bytestream paths
        # without any problems. If this function was always active
        # it would probably break tests when LANG=C was set
        def translate_path(self, path):
            """Translate a /-separated PATH to the local filename syntax.

            For bzr, all url paths are considered to be utf8 paths.
            On Linux, you can access these paths directly over the bytestream
            request, but on win32, you must decode them, and access them
            as Unicode files.
            """
            # abandon query parameters
            path = urlparse.urlparse(path)[2]
            path = posixpath.normpath(urllib.unquote(path))
            path = path.decode('utf-8')
            words = path.split('/')
            words = filter(None, words)
            path = os.getcwdu()
            for word in words:
                drive, word = os.path.splitdrive(word)
                head, word = os.path.split(word)
                if word in (os.curdir, os.pardir): continue
                path = os.path.join(path, word)
            return path


class TestingHTTPServer(BaseHTTPServer.HTTPServer):
    def __init__(self, server_address, RequestHandlerClass, test_case):
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
                                                RequestHandlerClass)
        self.test_case = test_case


class HttpServer(Server):
    """A test server for http transports.

    Subclasses can provide a specific request handler.
    """

    # used to form the url that connects to this server
    _url_protocol = 'http'

    # Subclasses can provide a specific request handler
    def __init__(self, request_handler=TestingHTTPRequestHandler):
        Server.__init__(self)
        self.request_handler = request_handler

    def _get_httpd(self):
        return TestingHTTPServer(('localhost', 0),
                                  self.request_handler,
                                  self)

    def _http_start(self):
        httpd = None
        httpd = self._get_httpd()
        host, port = httpd.socket.getsockname()
        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
        self._http_starting.release()
        httpd.socket.settimeout(0.1)

        while self._http_running:
            try:
                httpd.handle_request()
            except socket.timeout:
                pass

    def _get_remote_url(self, path):
        path_parts = path.split(os.path.sep)
        if os.path.isabs(path):
            if path_parts[:len(self._local_path_parts)] != \
                   self._local_path_parts:
                raise BadWebserverPath(path, self.test_dir)
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
        else:
            remote_path = '/'.join(path_parts)

        self._http_starting.acquire()
        self._http_starting.release()
        return self._http_base_url + remote_path

    def log(self, format, *args):
        """Capture Server log output."""
        self.logs.append(format % args)

    def setUp(self):
        """See bzrlib.transport.Server.setUp."""
        self._home_dir = os.getcwdu()
        self._local_path_parts = self._home_dir.split(os.path.sep)
        self._http_starting = threading.Lock()
        self._http_starting.acquire()
        self._http_running = True
        self._http_base_url = None
        self._http_thread = threading.Thread(target=self._http_start)
        self._http_thread.setDaemon(True)
        self._http_thread.start()
        self._http_proxy = os.environ.get("http_proxy")
        if self._http_proxy is not None:
            del os.environ["http_proxy"]
        self.logs = []

    def tearDown(self):
        """See bzrlib.transport.Server.tearDown."""
        self._http_running = False
        self._http_thread.join()
        if self._http_proxy is not None:
            import os
            os.environ["http_proxy"] = self._http_proxy

    def get_url(self):
        """See bzrlib.transport.Server.get_url."""
        return self._get_remote_url(self._home_dir)

    def get_bogus_url(self):
        """See bzrlib.transport.Server.get_bogus_url."""
        # this is chosen to try to prevent trouble with proxies, weird dns,
        # etc
        return 'http://127.0.0.1:1/'


class HttpServer_urllib(HttpServer):
    """Subclass of HttpServer that gives http+urllib urls.

    This is for use in testing: connections to this server will always go
    through urllib where possible.
    """

    # urls returned by this server should require the urllib client impl
    _url_protocol = 'http+urllib'


class HttpServer_PyCurl(HttpServer):
    """Subclass of HttpServer that gives http+pycurl urls.

    This is for use in testing: connections to this server will always go
    through pycurl where possible.
    """

    # We don't care about checking the pycurl availability as
    # this server will be required only when pycurl is present

    # urls returned by this server should require the pycurl client impl
    _url_protocol = 'http+pycurl'

2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	1	# Copyright (C) 2005 by Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
	17	import BaseHTTPServer
	18	import errno
	19	import os
	20	from SimpleHTTPServer import SimpleHTTPRequestHandler
	21	import socket
	22	import random
	23	import re
	24	import sys
	25	import threading
	26	import time
	27
	28	from bzrlib.transport import Server
	29
	30
	31	class WebserverNotAvailable(Exception):
	32	pass
	33
	34
	35	class BadWebserverPath(ValueError):
	36	def __str__(self):
	37	return 'path %s is not in %s' % self.args
	38
	39
	40	class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
	41
	42	def log_message(self, format, *args):
	43	self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
	44	self.address_string(),
	45	self.log_date_time_string(),
	46	format % args,
	47	self.headers.get('referer', '-'),
	48	self.headers.get('user-agent', '-'))
	49
	50	def handle_one_request(self):
	51	"""Handle a single HTTP request.
	52
	53	You normally don't need to override this method; see the class
	54	__doc__ string for information on how to handle specific HTTP
	55	commands such as GET and POST.
	56
	57	"""
	58	for i in xrange(1,11): # Don't try more than 10 times
	59	try:
	60	self.raw_requestline = self.rfile.readline()
	61	except socket.error, e:
	62	if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
	63	# omitted for now because some tests look at the log of
	64	# the server and expect to see no errors. see recent
65	# email thread. -- mbp 20051021.
66	## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
67	time.sleep(0.01)
68	continue
69	raise
70	else:
71	break
72	if not self.raw_requestline:
73	self.close_connection = 1
74	return
75	if not self.parse_request(): # An error code has been sent, just exit
76	return
77	mname = 'do_' + self.command
78	if getattr(self, mname, None) is None:
79	self.send_error(501, "Unsupported method (%r)" % self.command)
80	return
81	method = getattr(self, mname)
82	method()
83
84	_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
85	_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
86
87	def parse_ranges(self, ranges_header):
88	"""Parse the range header value and returns ranges and tail"""
89	tail = 0
90	ranges = []
91	assert ranges_header.startswith('bytes=')
92	ranges_header = ranges_header[len('bytes='):]
93	for range_str in ranges_header.split(','):
94	range_match = self._range_regexp.match(range_str)
95	if range_match is not None:
96	ranges.append((int(range_match.group('start')),
97	int(range_match.group('end'))))
98	else:
99	tail_match = self._tail_regexp.match(range_str)
100	if tail_match is not None:
101	tail = int(tail_match.group('tail'))
102	return tail, ranges
103
104	def send_range_content(self, file, start, length):
105	file.seek(start)
106	self.wfile.write(file.read(length))
107
108	def get_single_range(self, file, file_size, start, end):
109	self.send_response(206)
110	length = end - start + 1
111	self.send_header('Accept-Ranges', 'bytes')
112	self.send_header("Content-Length", "%d" % length)
113
114	self.send_header("Content-Type", 'application/octet-stream')
115	self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
116	end,
117	file_size))
118	self.end_headers()
119	self.send_range_content(file, start, length)
120
121	def get_multiple_ranges(self, file, file_size, ranges):
122	self.send_response(206)
123	self.send_header('Accept-Ranges', 'bytes')
124	boundary = "%d" % random.randint(0,0x7FFFFFFF)
125	self.send_header("Content-Type",
126	"multipart/byteranges; boundary=%s" % boundary)
127	self.end_headers()
128	for (start, end) in ranges:
129	self.wfile.write("--%s\r\n" % boundary)
130	self.send_header("Content-type", 'application/octet-stream')
131	self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
132	end,
133	file_size))
134	self.end_headers()
135	self.send_range_content(file, start, end - start + 1)
136	self.wfile.write("--%s\r\n" % boundary)
137	pass
138
139	def do_GET(self):
140	"""Serve a GET request.
141
142	Handles the Range header.
143	"""
144
145	path = self.translate_path(self.path)
146	ranges_header_value = self.headers.get('Range')
147	if ranges_header_value is None or os.path.isdir(path):
148	# Let the mother class handle most cases
149	return SimpleHTTPRequestHandler.do_GET(self)
150
151	try:
152	# Always read in binary mode. Opening files in text
153	# mode may cause newline translations, making the
154	# actual size of the content transmitted less than
155	# the content-length!
156	file = open(path, 'rb')
157	except IOError:
158	self.send_error(404, "File not found")
159	return None
160
161	file_size = os.fstat(file.fileno())[6]
162	tail, ranges = self.parse_ranges(ranges_header_value)
163	# Normalize tail into ranges
164	if tail != 0:
165	ranges.append((file_size - tail, file_size))
166
167	ranges_valid = True
168	if len(ranges) == 0:
169	ranges_valid = False
170	else:
171	for (start, end) in ranges:
172	if start >= file_size or end >= file_size:
173	ranges_valid = False
174	break
175	if not ranges_valid:
176	# RFC2616 14-16 says that invalid Range headers
177	# should be ignored and in that case, the whole file
178	# should be returned as if no Range header was
179	# present
180	file.close() # Will be reopened by the following call
181	return SimpleHTTPRequestHandler.do_GET(self)
182
183	if len(ranges) == 1:
184	(start, end) = ranges[0]
185	self.get_single_range(file, file_size, start, end)
186	else:
187	self.get_multiple_ranges(file, file_size, ranges)
188	file.close()
189
190	if sys.platform == 'win32':
191	# On win32 you cannot access non-ascii filenames without
192	# decoding them into unicode first.
193	# However, under Linux, you can access bytestream paths
194	# without any problems. If this function was always active
195	# it would probably break tests when LANG=C was set
196	def translate_path(self, path):
197	"""Translate a /-separated PATH to the local filename syntax.
198
199	For bzr, all url paths are considered to be utf8 paths.
200	On Linux, you can access these paths directly over the bytestream
201	request, but on win32, you must decode them, and access them
202	as Unicode files.
203	"""
204	# abandon query parameters
205	path = urlparse.urlparse(path)[2]
206	path = posixpath.normpath(urllib.unquote(path))
207	path = path.decode('utf-8')
208	words = path.split('/')
209	words = filter(None, words)
210	path = os.getcwdu()
211	for word in words:
212	drive, word = os.path.splitdrive(word)
213	head, word = os.path.split(word)
214	if word in (os.curdir, os.pardir): continue
215	path = os.path.join(path, word)
216	return path
217
218
219	class TestingHTTPServer(BaseHTTPServer.HTTPServer):
220	def __init__(self, server_address, RequestHandlerClass, test_case):
221	BaseHTTPServer.HTTPServer.__init__(self, server_address,
222	RequestHandlerClass)
223	self.test_case = test_case
224
225
226	class HttpServer(Server):
227	"""A test server for http transports.
228
229	Subclasses can provide a specific request handler.
230	"""
231
232	# used to form the url that connects to this server
233	_url_protocol = 'http'
234
235	# Subclasses can provide a specific request handler
236	def __init__(self, request_handler=TestingHTTPRequestHandler):
237	Server.__init__(self)
238	self.request_handler = request_handler
239
2004.1.28 by v.ladeuil+lp at free Merge bzr.dev. Including http modifications by "smart" related code	240	def _get_httpd(self):
	241	return TestingHTTPServer(('localhost', 0),
	242	self.request_handler,
	243	self)
	244
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	245	def _http_start(self):
	246	httpd = None
2004.1.28 by v.ladeuil+lp at free Merge bzr.dev. Including http modifications by "smart" related code	247	httpd = self._get_httpd()
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	248	host, port = httpd.socket.getsockname()
	249	self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
	250	self._http_starting.release()
	251	httpd.socket.settimeout(0.1)
	252
	253	while self._http_running:
	254	try:
	255	httpd.handle_request()
	256	except socket.timeout:
	257	pass
	258
	259	def _get_remote_url(self, path):
	260	path_parts = path.split(os.path.sep)
	261	if os.path.isabs(path):
	262	if path_parts[:len(self._local_path_parts)] != \
	263	self._local_path_parts:
	264	raise BadWebserverPath(path, self.test_dir)
	265	remote_path = '/'.join(path_parts[len(self._local_path_parts):])
	266	else:
	267	remote_path = '/'.join(path_parts)
	268
	269	self._http_starting.acquire()
	270	self._http_starting.release()
	271	return self._http_base_url + remote_path
	272
	273	def log(self, format, *args):
	274	"""Capture Server log output."""
	275	self.logs.append(format % args)
	276
	277	def setUp(self):
	278	"""See bzrlib.transport.Server.setUp."""
	279	self._home_dir = os.getcwdu()
	280	self._local_path_parts = self._home_dir.split(os.path.sep)
	281	self._http_starting = threading.Lock()
	282	self._http_starting.acquire()
	283	self._http_running = True
	284	self._http_base_url = None
	285	self._http_thread = threading.Thread(target=self._http_start)
	286	self._http_thread.setDaemon(True)
	287	self._http_thread.start()
	288	self._http_proxy = os.environ.get("http_proxy")
	289	if self._http_proxy is not None:
	290	del os.environ["http_proxy"]
	291	self.logs = []
	292
	293	def tearDown(self):
	294	"""See bzrlib.transport.Server.tearDown."""
	295	self._http_running = False
	296	self._http_thread.join()
	297	if self._http_proxy is not None:
	298	import os
	299	os.environ["http_proxy"] = self._http_proxy
	300
	301	def get_url(self):
	302	"""See bzrlib.transport.Server.get_url."""
	303	return self._get_remote_url(self._home_dir)
	304
	305	def get_bogus_url(self):
	306	"""See bzrlib.transport.Server.get_bogus_url."""
	307	# this is chosen to try to prevent trouble with proxies, weird dns,
	308	# etc
	309	return 'http://127.0.0.1:1/'
	310
	311
312	class HttpServer_urllib(HttpServer):
313	"""Subclass of HttpServer that gives http+urllib urls.
314
315	This is for use in testing: connections to this server will always go
316	through urllib where possible.
317	"""
318
319	# urls returned by this server should require the urllib client impl
320	_url_protocol = 'http+urllib'
321
322
323	class HttpServer_PyCurl(HttpServer):
324	"""Subclass of HttpServer that gives http+pycurl urls.
325
326	This is for use in testing: connections to this server will always go
327	through pycurl where possible.
328	"""
329
330	# We don't care about checking the pycurl availability as
331	# this server will be required only when pycurl is present
332
333	# urls returned by this server should require the pycurl client impl
334	_url_protocol = 'http+pycurl'