/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
2004.1.40 by v.ladeuil+lp at free
Fix the race condition again and correct some small typos to be in
1
# Copyright (C) 2006 Canonical Ltd
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
import BaseHTTPServer
18
import errno
19
import os
20
from SimpleHTTPServer import SimpleHTTPRequestHandler
21
import socket
2146.1.1 by Alexander Belchenko
fixes for test suite: forgotten imports in HttpServer.py
22
import posixpath
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
23
import random
24
import re
25
import sys
26
import threading
27
import time
2146.1.1 by Alexander Belchenko
fixes for test suite: forgotten imports in HttpServer.py
28
import urllib
29
import urlparse
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
30
31
from bzrlib.transport import Server
32
33
34
class WebserverNotAvailable(Exception):
35
    pass
36
37
38
class BadWebserverPath(ValueError):
39
    def __str__(self):
40
        return 'path %s is not in %s' % self.args
41
42
43
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
44
45
    def log_message(self, format, *args):
46
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
47
                                  self.address_string(),
48
                                  self.log_date_time_string(),
49
                                  format % args,
50
                                  self.headers.get('referer', '-'),
51
                                  self.headers.get('user-agent', '-'))
52
53
    def handle_one_request(self):
54
        """Handle a single HTTP request.
55
56
        You normally don't need to override this method; see the class
57
        __doc__ string for information on how to handle specific HTTP
58
        commands such as GET and POST.
59
60
        """
61
        for i in xrange(1,11): # Don't try more than 10 times
62
            try:
63
                self.raw_requestline = self.rfile.readline()
64
            except socket.error, e:
65
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
66
                    # omitted for now because some tests look at the log of
67
                    # the server and expect to see no errors.  see recent
68
                    # email thread. -- mbp 20051021. 
69
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
70
                    time.sleep(0.01)
71
                    continue
72
                raise
73
            else:
74
                break
75
        if not self.raw_requestline:
76
            self.close_connection = 1
77
            return
78
        if not self.parse_request(): # An error code has been sent, just exit
79
            return
80
        mname = 'do_' + self.command
81
        if getattr(self, mname, None) is None:
82
            self.send_error(501, "Unsupported method (%r)" % self.command)
83
            return
84
        method = getattr(self, mname)
85
        method()
86
87
    _range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
88
    _tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
89
90
    def parse_ranges(self, ranges_header):
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
91
        """Parse the range header value and returns ranges and tail.
92
93
        RFC2616 14.35 says that syntactically invalid range
94
        specifiers MUST be ignored. In that case, we return 0 for
95
        tail and [] for ranges.
96
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
97
        tail = 0
98
        ranges = []
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
99
        if not ranges_header.startswith('bytes='):
100
            # Syntactically invalid header
101
            return 0, []
102
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
103
        ranges_header = ranges_header[len('bytes='):]
104
        for range_str in ranges_header.split(','):
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
105
            # FIXME: RFC2616 says end is optional and default to file_size
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
106
            range_match = self._range_regexp.match(range_str)
107
            if range_match is not None:
108
                ranges.append((int(range_match.group('start')),
109
                               int(range_match.group('end'))))
110
            else:
111
                tail_match = self._tail_regexp.match(range_str)
112
                if tail_match is not None:
113
                    tail = int(tail_match.group('tail'))
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
114
                else:
115
                    # Syntactically invalid range
116
                    return 0, []
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
117
        return tail, ranges
118
119
    def send_range_content(self, file, start, length):
120
        file.seek(start)
121
        self.wfile.write(file.read(length))
122
123
    def get_single_range(self, file, file_size, start, end):
124
        self.send_response(206)
125
        length = end - start + 1
126
        self.send_header('Accept-Ranges', 'bytes')
127
        self.send_header("Content-Length", "%d" % length)
128
129
        self.send_header("Content-Type", 'application/octet-stream')
130
        self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
131
                                                              end,
132
                                                              file_size))
133
        self.end_headers()
134
        self.send_range_content(file, start, length)
135
136
    def get_multiple_ranges(self, file, file_size, ranges):
137
        self.send_response(206)
138
        self.send_header('Accept-Ranges', 'bytes')
139
        boundary = "%d" % random.randint(0,0x7FFFFFFF)
140
        self.send_header("Content-Type",
141
                         "multipart/byteranges; boundary=%s" % boundary)
142
        self.end_headers()
143
        for (start, end) in ranges:
144
            self.wfile.write("--%s\r\n" % boundary)
145
            self.send_header("Content-type", 'application/octet-stream')
146
            self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
147
                                                                  end,
148
                                                                  file_size))
149
            self.end_headers()
150
            self.send_range_content(file, start, end - start + 1)
151
            self.wfile.write("--%s\r\n" % boundary)
152
            pass
153
154
    def do_GET(self):
155
        """Serve a GET request.
156
157
        Handles the Range header.
158
        """
159
160
        path = self.translate_path(self.path)
161
        ranges_header_value = self.headers.get('Range')
162
        if ranges_header_value is None or os.path.isdir(path):
163
            # Let the mother class handle most cases
164
            return SimpleHTTPRequestHandler.do_GET(self)
165
166
        try:
167
            # Always read in binary mode. Opening files in text
168
            # mode may cause newline translations, making the
169
            # actual size of the content transmitted *less* than
170
            # the content-length!
171
            file = open(path, 'rb')
172
        except IOError:
173
            self.send_error(404, "File not found")
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
174
            return
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
175
176
        file_size = os.fstat(file.fileno())[6]
177
        tail, ranges = self.parse_ranges(ranges_header_value)
178
        # Normalize tail into ranges
179
        if tail != 0:
180
            ranges.append((file_size - tail, file_size))
181
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
182
        satisfiable_ranges = True
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
183
        if len(ranges) == 0:
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
184
            satisfiable_ranges = False
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
185
        else:
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
186
            def check_range(range_specifier):
187
                start, end = range_specifier
188
                # RFC2616 14.35, ranges are invalid if start > end
189
                # or start > file_size
190
                if start > end or start > file_size:
191
                    satisfiable_ranges = False
192
                    return 0, 0
193
                # RFC2616 14.35, end values should be truncated
194
                # to file_size -1 if they exceed it
195
                end = min(end, file_size - 1)
196
                return start, end
197
198
            ranges = map(check_range, ranges)
199
200
        if not satisfiable_ranges:
201
            # RFC2616 14.16 and 14.35 says that when a server
202
            # encounters unsatisfiable range specifiers, it
203
            # SHOULD return a 416.
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
204
            file.close()
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
205
            # FIXME: We SHOULD send a Content-Range header too,
206
            # but the implementation of send_error does not
207
            # allows that. So far.
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
208
            self.send_error(416, "Requested range not satisfiable")
209
            return
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
210
211
        if len(ranges) == 1:
212
            (start, end) = ranges[0]
213
            self.get_single_range(file, file_size, start, end)
214
        else:
215
            self.get_multiple_ranges(file, file_size, ranges)
216
        file.close()
217
218
    if sys.platform == 'win32':
219
        # On win32 you cannot access non-ascii filenames without
220
        # decoding them into unicode first.
221
        # However, under Linux, you can access bytestream paths
222
        # without any problems. If this function was always active
223
        # it would probably break tests when LANG=C was set
224
        def translate_path(self, path):
225
            """Translate a /-separated PATH to the local filename syntax.
226
227
            For bzr, all url paths are considered to be utf8 paths.
228
            On Linux, you can access these paths directly over the bytestream
229
            request, but on win32, you must decode them, and access them
230
            as Unicode files.
231
            """
232
            # abandon query parameters
233
            path = urlparse.urlparse(path)[2]
234
            path = posixpath.normpath(urllib.unquote(path))
235
            path = path.decode('utf-8')
236
            words = path.split('/')
237
            words = filter(None, words)
238
            path = os.getcwdu()
239
            for word in words:
240
                drive, word = os.path.splitdrive(word)
241
                head, word = os.path.split(word)
242
                if word in (os.curdir, os.pardir): continue
243
                path = os.path.join(path, word)
244
            return path
245
246
247
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
248
    def __init__(self, server_address, RequestHandlerClass, test_case):
249
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
250
                                                RequestHandlerClass)
251
        self.test_case = test_case
252
253
254
class HttpServer(Server):
255
    """A test server for http transports.
256
257
    Subclasses can provide a specific request handler.
258
    """
259
260
    # used to form the url that connects to this server
261
    _url_protocol = 'http'
262
263
    # Subclasses can provide a specific request handler
264
    def __init__(self, request_handler=TestingHTTPRequestHandler):
265
        Server.__init__(self)
266
        self.request_handler = request_handler
267
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
268
    def _get_httpd(self):
269
        return TestingHTTPServer(('localhost', 0),
270
                                  self.request_handler,
271
                                  self)
272
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
273
    def _http_start(self):
274
        httpd = None
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
275
        httpd = self._get_httpd()
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
276
        host, port = httpd.socket.getsockname()
277
        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
278
        self._http_starting.release()
279
        httpd.socket.settimeout(0.1)
280
281
        while self._http_running:
282
            try:
283
                httpd.handle_request()
284
            except socket.timeout:
285
                pass
286
287
    def _get_remote_url(self, path):
288
        path_parts = path.split(os.path.sep)
289
        if os.path.isabs(path):
290
            if path_parts[:len(self._local_path_parts)] != \
291
                   self._local_path_parts:
292
                raise BadWebserverPath(path, self.test_dir)
293
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
294
        else:
295
            remote_path = '/'.join(path_parts)
296
297
        self._http_starting.acquire()
298
        self._http_starting.release()
299
        return self._http_base_url + remote_path
300
301
    def log(self, format, *args):
302
        """Capture Server log output."""
303
        self.logs.append(format % args)
304
305
    def setUp(self):
306
        """See bzrlib.transport.Server.setUp."""
307
        self._home_dir = os.getcwdu()
308
        self._local_path_parts = self._home_dir.split(os.path.sep)
309
        self._http_starting = threading.Lock()
310
        self._http_starting.acquire()
311
        self._http_running = True
312
        self._http_base_url = None
313
        self._http_thread = threading.Thread(target=self._http_start)
314
        self._http_thread.setDaemon(True)
315
        self._http_thread.start()
316
        self._http_proxy = os.environ.get("http_proxy")
317
        if self._http_proxy is not None:
318
            del os.environ["http_proxy"]
319
        self.logs = []
320
321
    def tearDown(self):
322
        """See bzrlib.transport.Server.tearDown."""
323
        self._http_running = False
324
        self._http_thread.join()
325
        if self._http_proxy is not None:
326
            import os
327
            os.environ["http_proxy"] = self._http_proxy
328
329
    def get_url(self):
330
        """See bzrlib.transport.Server.get_url."""
331
        return self._get_remote_url(self._home_dir)
332
333
    def get_bogus_url(self):
334
        """See bzrlib.transport.Server.get_bogus_url."""
335
        # this is chosen to try to prevent trouble with proxies, weird dns,
336
        # etc
337
        return 'http://127.0.0.1:1/'
338
339
340
class HttpServer_urllib(HttpServer):
341
    """Subclass of HttpServer that gives http+urllib urls.
342
343
    This is for use in testing: connections to this server will always go
344
    through urllib where possible.
345
    """
346
347
    # urls returned by this server should require the urllib client impl
348
    _url_protocol = 'http+urllib'
349
350
351
class HttpServer_PyCurl(HttpServer):
352
    """Subclass of HttpServer that gives http+pycurl urls.
353
354
    This is for use in testing: connections to this server will always go
355
    through pycurl where possible.
356
    """
357
358
    # We don't care about checking the pycurl availability as
359
    # this server will be required only when pycurl is present
360
361
    # urls returned by this server should require the pycurl client impl
362
    _url_protocol = 'http+pycurl'