/brz/remove-bazaar : contents of bzrlib/tests/HttpServer.py at revision 2004.1.25

: (revision 2004.1.25)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

# Copyright (C) 2005 by Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import BaseHTTPServer
import errno
import os
from SimpleHTTPServer import SimpleHTTPRequestHandler
import socket
import random
import re
import sys
import threading
import time

from bzrlib.transport import Server


class WebserverNotAvailable(Exception):
    pass


class BadWebserverPath(ValueError):
    def __str__(self):
        return 'path %s is not in %s' % self.args


class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):

    def log_message(self, format, *args):
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
                                  self.address_string(),
                                  self.log_date_time_string(),
                                  format % args,
                                  self.headers.get('referer', '-'),
                                  self.headers.get('user-agent', '-'))

    def handle_one_request(self):
        """Handle a single HTTP request.

        You normally don't need to override this method; see the class
        __doc__ string for information on how to handle specific HTTP
        commands such as GET and POST.

        """
        for i in xrange(1,11): # Don't try more than 10 times
            try:
                self.raw_requestline = self.rfile.readline()
            except socket.error, e:
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
                    # omitted for now because some tests look at the log of
                    # the server and expect to see no errors.  see recent
                    # email thread. -- mbp 20051021. 
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
                    time.sleep(0.01)
                    continue
                raise
            else:
                break
        if not self.raw_requestline:
            self.close_connection = 1
            return
        if not self.parse_request(): # An error code has been sent, just exit
            return
        mname = 'do_' + self.command
        if getattr(self, mname, None) is None:
            self.send_error(501, "Unsupported method (%r)" % self.command)
            return
        method = getattr(self, mname)
        method()

    _range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
    _tail_regexp = re.compile(r'^-(?P<tail>\d+)$')

    def parse_ranges(self, ranges_header):
        """Parse the range header value and returns ranges and tail"""
        tail = 0
        ranges = []
        assert ranges_header.startswith('bytes=')
        ranges_header = ranges_header[len('bytes='):]
        for range_str in ranges_header.split(','):
            range_match = self._range_regexp.match(range_str)
            if range_match is not None:
                ranges.append((int(range_match.group('start')),
                               int(range_match.group('end'))))
            else:
                tail_match = self._tail_regexp.match(range_str)
                if tail_match is not None:
                    tail = int(tail_match.group('tail'))
        return tail, ranges

    def send_range_content(self, file, start, length):
        file.seek(start)
        self.wfile.write(file.read(length))

    def get_single_range(self, file, file_size, start, end):
        self.send_response(206)
        length = end - start + 1
        self.send_header('Accept-Ranges', 'bytes')
        self.send_header("Content-Length", "%d" % length)

        self.send_header("Content-Type", 'application/octet-stream')
        self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
                                                              end,
                                                              file_size))
        self.end_headers()
        self.send_range_content(file, start, length)

    def get_multiple_ranges(self, file, file_size, ranges):
        self.send_response(206)
        self.send_header('Accept-Ranges', 'bytes')
        boundary = "%d" % random.randint(0,0x7FFFFFFF)
        self.send_header("Content-Type",
                         "multipart/byteranges; boundary=%s" % boundary)
        self.end_headers()
        for (start, end) in ranges:
            self.wfile.write("--%s\r\n" % boundary)
            self.send_header("Content-type", 'application/octet-stream')
            self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
                                                                  end,
                                                                  file_size))
            self.end_headers()
            self.send_range_content(file, start, end - start + 1)
            self.wfile.write("--%s\r\n" % boundary)
            pass

    def do_GET(self):
        """Serve a GET request.

        Handles the Range header.
        """

        path = self.translate_path(self.path)
        ranges_header_value = self.headers.get('Range')
        if ranges_header_value is None or os.path.isdir(path):
            # Let the mother class handle most cases
            return SimpleHTTPRequestHandler.do_GET(self)

        try:
            # Always read in binary mode. Opening files in text
            # mode may cause newline translations, making the
            # actual size of the content transmitted *less* than
            # the content-length!
            file = open(path, 'rb')
        except IOError:
            self.send_error(404, "File not found")
            return None

        file_size = os.fstat(file.fileno())[6]
        tail, ranges = self.parse_ranges(ranges_header_value)
        # Normalize tail into ranges
        if tail != 0:
            ranges.append((file_size - tail, file_size))

        ranges_valid = True
        if len(ranges) == 0:
            ranges_valid = False
        else:
            for (start, end) in ranges:
                if start >= file_size or end >= file_size:
                    ranges_valid = False
                    break
        if not ranges_valid:
            # RFC2616 14-16 says that invalid Range headers
            # should be ignored and in that case, the whole file
            # should be returned as if no Range header was
            # present
            file.close() # Will be reopened by the following call
            return SimpleHTTPRequestHandler.do_GET(self)

        if len(ranges) == 1:
            (start, end) = ranges[0]
            self.get_single_range(file, file_size, start, end)
        else:
            self.get_multiple_ranges(file, file_size, ranges)
        file.close()

    if sys.platform == 'win32':
        # On win32 you cannot access non-ascii filenames without
        # decoding them into unicode first.
        # However, under Linux, you can access bytestream paths
        # without any problems. If this function was always active
        # it would probably break tests when LANG=C was set
        def translate_path(self, path):
            """Translate a /-separated PATH to the local filename syntax.

            For bzr, all url paths are considered to be utf8 paths.
            On Linux, you can access these paths directly over the bytestream
            request, but on win32, you must decode them, and access them
            as Unicode files.
            """
            # abandon query parameters
            path = urlparse.urlparse(path)[2]
            path = posixpath.normpath(urllib.unquote(path))
            path = path.decode('utf-8')
            words = path.split('/')
            words = filter(None, words)
            path = os.getcwdu()
            for word in words:
                drive, word = os.path.splitdrive(word)
                head, word = os.path.split(word)
                if word in (os.curdir, os.pardir): continue
                path = os.path.join(path, word)
            return path


class TestingHTTPServer(BaseHTTPServer.HTTPServer):
    def __init__(self, server_address, RequestHandlerClass, test_case):
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
                                                RequestHandlerClass)
        self.test_case = test_case


class HttpServer(Server):
    """A test server for http transports.

    Subclasses can provide a specific request handler.
    """

    # used to form the url that connects to this server
    _url_protocol = 'http'

    # Subclasses can provide a specific request handler
    def __init__(self, request_handler=TestingHTTPRequestHandler):
        Server.__init__(self)
        self.request_handler = request_handler

    def _http_start(self):
        httpd = None
        httpd = TestingHTTPServer(('localhost', 0),
                                  self.request_handler,
                                  self)
        host, port = httpd.socket.getsockname()
        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
        self._http_starting.release()
        httpd.socket.settimeout(0.1)

        while self._http_running:
            try:
                httpd.handle_request()
            except socket.timeout:
                pass

    def _get_remote_url(self, path):
        path_parts = path.split(os.path.sep)
        if os.path.isabs(path):
            if path_parts[:len(self._local_path_parts)] != \
                   self._local_path_parts:
                raise BadWebserverPath(path, self.test_dir)
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
        else:
            remote_path = '/'.join(path_parts)

        self._http_starting.acquire()
        self._http_starting.release()
        return self._http_base_url + remote_path

    def log(self, format, *args):
        """Capture Server log output."""
        self.logs.append(format % args)

    def setUp(self):
        """See bzrlib.transport.Server.setUp."""
        self._home_dir = os.getcwdu()
        self._local_path_parts = self._home_dir.split(os.path.sep)
        self._http_starting = threading.Lock()
        self._http_starting.acquire()
        self._http_running = True
        self._http_base_url = None
        self._http_thread = threading.Thread(target=self._http_start)
        self._http_thread.setDaemon(True)
        self._http_thread.start()
        self._http_proxy = os.environ.get("http_proxy")
        if self._http_proxy is not None:
            del os.environ["http_proxy"]
        self.logs = []

    def tearDown(self):
        """See bzrlib.transport.Server.tearDown."""
        self._http_running = False
        self._http_thread.join()
        if self._http_proxy is not None:
            import os
            os.environ["http_proxy"] = self._http_proxy

    def get_url(self):
        """See bzrlib.transport.Server.get_url."""
        return self._get_remote_url(self._home_dir)

    def get_bogus_url(self):
        """See bzrlib.transport.Server.get_bogus_url."""
        # this is chosen to try to prevent trouble with proxies, weird dns,
        # etc
        return 'http://127.0.0.1:1/'


class HttpServer_urllib(HttpServer):
    """Subclass of HttpServer that gives http+urllib urls.

    This is for use in testing: connections to this server will always go
    through urllib where possible.
    """

    # urls returned by this server should require the urllib client impl
    _url_protocol = 'http+urllib'


class HttpServer_PyCurl(HttpServer):
    """Subclass of HttpServer that gives http+pycurl urls.

    This is for use in testing: connections to this server will always go
    through pycurl where possible.
    """

    # We don't care about checking the pycurl availability as
    # this server will be required only when pycurl is present

    # urls returned by this server should require the pycurl client impl
    _url_protocol = 'http+pycurl'

2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	1	# Copyright (C) 2005 by Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
	17	import BaseHTTPServer
	18	import errno
	19	import os
	20	from SimpleHTTPServer import SimpleHTTPRequestHandler
	21	import socket
	22	import random
	23	import re
	24	import sys
	25	import threading
	26	import time
	27
	28	from bzrlib.transport import Server
	29
	30
	31	class WebserverNotAvailable(Exception):
	32	pass
	33
	34
	35	class BadWebserverPath(ValueError):
	36	def __str__(self):
	37	return 'path %s is not in %s' % self.args
	38
	39
	40	class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
	41
	42	def log_message(self, format, *args):
	43	self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
	44	self.address_string(),
	45	self.log_date_time_string(),
	46	format % args,
	47	self.headers.get('referer', '-'),
	48	self.headers.get('user-agent', '-'))
	49
	50	def handle_one_request(self):
	51	"""Handle a single HTTP request.
	52
	53	You normally don't need to override this method; see the class
	54	__doc__ string for information on how to handle specific HTTP
	55	commands such as GET and POST.
	56
	57	"""
	58	for i in xrange(1,11): # Don't try more than 10 times
	59	try:
	60	self.raw_requestline = self.rfile.readline()
	61	except socket.error, e:
	62	if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
	63	# omitted for now because some tests look at the log of
	64	# the server and expect to see no errors. see recent
65	# email thread. -- mbp 20051021.
66	## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
67	time.sleep(0.01)
68	continue
69	raise
70	else:
71	break
72	if not self.raw_requestline:
73	self.close_connection = 1
74	return
75	if not self.parse_request(): # An error code has been sent, just exit
76	return
77	mname = 'do_' + self.command
78	if getattr(self, mname, None) is None:
79	self.send_error(501, "Unsupported method (%r)" % self.command)
80	return
81	method = getattr(self, mname)
82	method()
83
84	_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
85	_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
86
87	def parse_ranges(self, ranges_header):
88	"""Parse the range header value and returns ranges and tail"""
89	tail = 0
90	ranges = []
91	assert ranges_header.startswith('bytes=')
92	ranges_header = ranges_header[len('bytes='):]
93	for range_str in ranges_header.split(','):
94	range_match = self._range_regexp.match(range_str)
95	if range_match is not None:
96	ranges.append((int(range_match.group('start')),
97	int(range_match.group('end'))))
98	else:
99	tail_match = self._tail_regexp.match(range_str)
100	if tail_match is not None:
101	tail = int(tail_match.group('tail'))
102	return tail, ranges
103
104	def send_range_content(self, file, start, length):
105	file.seek(start)
106	self.wfile.write(file.read(length))
107
108	def get_single_range(self, file, file_size, start, end):
109	self.send_response(206)
110	length = end - start + 1
111	self.send_header('Accept-Ranges', 'bytes')
112	self.send_header("Content-Length", "%d" % length)
113
114	self.send_header("Content-Type", 'application/octet-stream')
115	self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
116	end,
117	file_size))
118	self.end_headers()
119	self.send_range_content(file, start, length)
120
121	def get_multiple_ranges(self, file, file_size, ranges):
122	self.send_response(206)
123	self.send_header('Accept-Ranges', 'bytes')
124	boundary = "%d" % random.randint(0,0x7FFFFFFF)
125	self.send_header("Content-Type",
126	"multipart/byteranges; boundary=%s" % boundary)
127	self.end_headers()
128	for (start, end) in ranges:
129	self.wfile.write("--%s\r\n" % boundary)
130	self.send_header("Content-type", 'application/octet-stream')
131	self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
132	end,
133	file_size))
134	self.end_headers()
135	self.send_range_content(file, start, end - start + 1)
136	self.wfile.write("--%s\r\n" % boundary)
137	pass
138
139	def do_GET(self):
140	"""Serve a GET request.
141
142	Handles the Range header.
143	"""
144
145	path = self.translate_path(self.path)
146	ranges_header_value = self.headers.get('Range')
147	if ranges_header_value is None or os.path.isdir(path):
148	# Let the mother class handle most cases
149	return SimpleHTTPRequestHandler.do_GET(self)
150
151	try:
152	# Always read in binary mode. Opening files in text
153	# mode may cause newline translations, making the
154	# actual size of the content transmitted less than
155	# the content-length!
156	file = open(path, 'rb')
157	except IOError:
158	self.send_error(404, "File not found")
159	return None
160
161	file_size = os.fstat(file.fileno())[6]
162	tail, ranges = self.parse_ranges(ranges_header_value)
163	# Normalize tail into ranges
164	if tail != 0:
165	ranges.append((file_size - tail, file_size))
166
167	ranges_valid = True
168	if len(ranges) == 0:
169	ranges_valid = False
170	else:
171	for (start, end) in ranges:
172	if start >= file_size or end >= file_size:
173	ranges_valid = False
174	break
175	if not ranges_valid:
176	# RFC2616 14-16 says that invalid Range headers
177	# should be ignored and in that case, the whole file
178	# should be returned as if no Range header was
179	# present
180	file.close() # Will be reopened by the following call
181	return SimpleHTTPRequestHandler.do_GET(self)
182
183	if len(ranges) == 1:
184	(start, end) = ranges[0]
185	self.get_single_range(file, file_size, start, end)
186	else:
187	self.get_multiple_ranges(file, file_size, ranges)
188	file.close()
189
190	if sys.platform == 'win32':
191	# On win32 you cannot access non-ascii filenames without
192	# decoding them into unicode first.
193	# However, under Linux, you can access bytestream paths
194	# without any problems. If this function was always active
195	# it would probably break tests when LANG=C was set
196	def translate_path(self, path):
197	"""Translate a /-separated PATH to the local filename syntax.
198
199	For bzr, all url paths are considered to be utf8 paths.
200	On Linux, you can access these paths directly over the bytestream
201	request, but on win32, you must decode them, and access them
202	as Unicode files.
203	"""
204	# abandon query parameters
205	path = urlparse.urlparse(path)[2]
206	path = posixpath.normpath(urllib.unquote(path))
207	path = path.decode('utf-8')
208	words = path.split('/')
209	words = filter(None, words)
210	path = os.getcwdu()
211	for word in words:
212	drive, word = os.path.splitdrive(word)
213	head, word = os.path.split(word)
214	if word in (os.curdir, os.pardir): continue
215	path = os.path.join(path, word)
216	return path
217
218
219	class TestingHTTPServer(BaseHTTPServer.HTTPServer):
220	def __init__(self, server_address, RequestHandlerClass, test_case):
221	BaseHTTPServer.HTTPServer.__init__(self, server_address,
222	RequestHandlerClass)
223	self.test_case = test_case
224
225
226	class HttpServer(Server):
227	"""A test server for http transports.
228
229	Subclasses can provide a specific request handler.
230	"""
231
232	# used to form the url that connects to this server
233	_url_protocol = 'http'
234
235	# Subclasses can provide a specific request handler
236	def __init__(self, request_handler=TestingHTTPRequestHandler):
237	Server.__init__(self)
238	self.request_handler = request_handler
239
240	def _http_start(self):
241	httpd = None
242	httpd = TestingHTTPServer(('localhost', 0),
243	self.request_handler,
244	self)
245	host, port = httpd.socket.getsockname()
246	self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
247	self._http_starting.release()
248	httpd.socket.settimeout(0.1)
249
250	while self._http_running:
251	try:
252	httpd.handle_request()
253	except socket.timeout:
254	pass
255
256	def _get_remote_url(self, path):
257	path_parts = path.split(os.path.sep)
258	if os.path.isabs(path):
259	if path_parts[:len(self._local_path_parts)] != \
260	self._local_path_parts:
261	raise BadWebserverPath(path, self.test_dir)
262	remote_path = '/'.join(path_parts[len(self._local_path_parts):])
263	else:
264	remote_path = '/'.join(path_parts)
265
266	self._http_starting.acquire()
267	self._http_starting.release()
268	return self._http_base_url + remote_path
269
270	def log(self, format, *args):
271	"""Capture Server log output."""
272	self.logs.append(format % args)
273
274	def setUp(self):
275	"""See bzrlib.transport.Server.setUp."""
276	self._home_dir = os.getcwdu()
277	self._local_path_parts = self._home_dir.split(os.path.sep)
278	self._http_starting = threading.Lock()
279	self._http_starting.acquire()
280	self._http_running = True
281	self._http_base_url = None
282	self._http_thread = threading.Thread(target=self._http_start)
283	self._http_thread.setDaemon(True)
284	self._http_thread.start()
285	self._http_proxy = os.environ.get("http_proxy")
286	if self._http_proxy is not None:
287	del os.environ["http_proxy"]
288	self.logs = []
289
290	def tearDown(self):
291	"""See bzrlib.transport.Server.tearDown."""
292	self._http_running = False
293	self._http_thread.join()
294	if self._http_proxy is not None:
295	import os
296	os.environ["http_proxy"] = self._http_proxy
297
298	def get_url(self):
299	"""See bzrlib.transport.Server.get_url."""
300	return self._get_remote_url(self._home_dir)
301
302	def get_bogus_url(self):
303	"""See bzrlib.transport.Server.get_bogus_url."""
304	# this is chosen to try to prevent trouble with proxies, weird dns,
305	# etc
306	return 'http://127.0.0.1:1/'
307
308
309	class HttpServer_urllib(HttpServer):
310	"""Subclass of HttpServer that gives http+urllib urls.
311
312	This is for use in testing: connections to this server will always go
313	through urllib where possible.
314	"""
315
316	# urls returned by this server should require the urllib client impl
317	_url_protocol = 'http+urllib'
318
319
320	class HttpServer_PyCurl(HttpServer):
321	"""Subclass of HttpServer that gives http+pycurl urls.
322
323	This is for use in testing: connections to this server will always go
324	through pycurl where possible.
325	"""
326
327	# We don't care about checking the pycurl availability as
328	# this server will be required only when pycurl is present
329
330	# urls returned by this server should require the pycurl client impl
331	_url_protocol = 'http+pycurl'