/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
1540.3.3 by Martin Pool
Review updates of pycurl transport
1
# Copyright (C) 2005, 2006 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1540.3.3 by Martin Pool
Review updates of pycurl transport
16
17
"""Base implementation of Transport over http.
18
19
There are separate implementation modules for each http client implementation.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
20
"""
21
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
22
from cStringIO import StringIO
1540.3.3 by Martin Pool
Review updates of pycurl transport
23
import errno
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
24
import mimetools
1540.3.6 by Martin Pool
[merge] update from bzr.dev
25
import os
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
26
import posixpath
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
27
import re
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
28
import sys
1540.3.3 by Martin Pool
Review updates of pycurl transport
29
import urlparse
30
import urllib
1530.1.11 by Robert Collins
Push the transport permutations list into each transport module allowing for automatic testing of new modules that are registered as transports.
31
from warnings import warn
1540.3.3 by Martin Pool
Review updates of pycurl transport
32
1786.1.6 by John Arbash Meinel
Missed a couple of imports
33
# TODO: load these only when running http tests
34
import BaseHTTPServer, SimpleHTTPServer, socket, time
35
import threading
36
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
37
from bzrlib import errors
1540.3.6 by Martin Pool
[merge] update from bzr.dev
38
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
1685.1.8 by John Arbash Meinel
Re-allow a couple more tests, fix a bug in http, non_ascii tests still fail.
39
                           TransportError, ConnectionError, InvalidURL)
1393.2.3 by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass.
40
from bzrlib.branch import Branch
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
41
from bzrlib.trace import mutter
1786.1.6 by John Arbash Meinel
Missed a couple of imports
42
from bzrlib.transport import Transport, register_transport, Server
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
43
from bzrlib.transport.http.response import (HttpMultipartRangeResponse,
44
                                            HttpRangeResponse)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
45
from bzrlib.ui import ui_factory
1540.3.6 by Martin Pool
[merge] update from bzr.dev
46
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
47
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
48
def extract_auth(url, password_manager):
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
49
    """Extract auth parameters from am HTTP/HTTPS url and add them to the given
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
50
    password manager.  Return the url, minus those auth parameters (which
51
    confuse urllib2).
52
    """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
53
    assert re.match(r'^(https?)(\+\w+)?://', url), \
54
            'invalid absolute url %r' % url
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
55
    scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
56
    
57
    if '@' in netloc:
58
        auth, netloc = netloc.split('@', 1)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
59
        if ':' in auth:
60
            username, password = auth.split(':', 1)
61
        else:
62
            username, password = auth, None
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
63
        if ':' in netloc:
64
            host = netloc.split(':', 1)[0]
65
        else:
66
            host = netloc
67
        username = urllib.unquote(username)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
68
        if password is not None:
69
            password = urllib.unquote(password)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
70
        else:
71
            password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',
72
                                               user=username, host=host)
73
        password_manager.add_password(None, host, username, password)
74
    url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
75
    return url
1553.1.5 by James Henstridge
Make HTTP transport has() method do HEAD requests, and update test to
76
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
77
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
78
def _extract_headers(header_text, url):
79
    """Extract the mapping for an rfc2822 header
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
80
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
81
    This is a helper function for the test suite and for _pycurl.
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
82
    (urllib already parses the headers for us)
83
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
84
    In the case that there are multiple headers inside the file,
85
    the last one is returned.
86
87
    :param header_text: A string of header information.
88
        This expects that the first line of a header will always be HTTP ...
89
    :param url: The url we are parsing, so we can raise nice errors
90
    :return: mimetools.Message object, which basically acts like a case 
91
        insensitive dictionary.
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
92
    """
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
93
    first_header = True
94
    remaining = header_text
95
96
    if not remaining:
97
        raise errors.InvalidHttpResponse(url, 'Empty headers')
98
99
    while remaining:
100
        header_file = StringIO(remaining)
101
        first_line = header_file.readline()
102
        if not first_line.startswith('HTTP'):
103
            if first_header: # The first header *must* start with HTTP
104
                raise errors.InvalidHttpResponse(url,
105
                    'Opening header line did not start with HTTP: %s' 
106
                    % (first_line,))
107
                assert False, 'Opening header line was not HTTP'
108
            else:
109
                break # We are done parsing
110
        first_header = False
111
        m = mimetools.Message(header_file)
112
113
        # mimetools.Message parses the first header up to a blank line
114
        # So while there is remaining data, it probably means there is
115
        # another header to be parsed.
116
        # Get rid of any preceeding whitespace, which if it is all whitespace
117
        # will get rid of everything.
118
        remaining = header_file.read().lstrip()
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
119
    return m
120
121
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
122
class HttpTransportBase(Transport):
123
    """Base class for http implementations.
124
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
125
    Does URL parsing, etc, but not any network IO.
126
127
    The protocol can be given as e.g. http+urllib://host/ to use a particular
128
    implementation.
129
    """
130
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
131
    # _proto: "http" or "https"
132
    # _qualified_proto: may have "+pycurl", etc
133
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
134
    def __init__(self, base):
135
        """Set the base path where files will be stored."""
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
136
        proto_match = re.match(r'^(https?)(\+\w+)?://', base)
137
        if not proto_match:
138
            raise AssertionError("not a http url: %r" % base)
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
139
        self._proto = proto_match.group(1)
140
        impl_name = proto_match.group(2)
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
141
        if impl_name:
142
            impl_name = impl_name[1:]
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
143
        self._impl_name = impl_name
1530.1.3 by Robert Collins
transport implementations now tested consistently.
144
        if base[-1] != '/':
145
            base = base + '/'
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
146
        super(HttpTransportBase, self).__init__(base)
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
147
        # In the future we might actually connect to the remote host
148
        # rather than using get_url
149
        # self._connection = None
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
150
        (apparent_proto, self._host,
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
151
            self._path, self._parameters,
152
            self._query, self._fragment) = urlparse.urlparse(self.base)
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
153
        self._qualified_proto = apparent_proto
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
154
155
    def abspath(self, relpath):
156
        """Return the full url to the given relative path.
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
157
158
        This can be supplied with a string or a list.
159
1540.3.25 by Martin Pool
New 'http+urllib' scheme
160
        The URL returned always has the protocol scheme originally used to 
161
        construct the transport, even if that includes an explicit
162
        implementation qualifier.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
163
        """
1469 by Robert Collins
Change Transport.* to work with URL's.
164
        assert isinstance(relpath, basestring)
1185.85.76 by John Arbash Meinel
Adding an InvalidURL so transports can report they expect utf-8 quoted paths. Updated tests
165
        if isinstance(relpath, unicode):
1685.1.8 by John Arbash Meinel
Re-allow a couple more tests, fix a bug in http, non_ascii tests still fail.
166
            raise InvalidURL(relpath, 'paths must not be unicode.')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
167
        if isinstance(relpath, basestring):
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
168
            relpath_parts = relpath.split('/')
169
        else:
170
            # TODO: Don't call this with an array - no magic interfaces
171
            relpath_parts = relpath[:]
1910.15.1 by Andrew Bennetts
More tests for abspath and clone behaviour
172
        if relpath.startswith('/'):
173
            basepath = []
174
        else:
175
            # Except for the root, no trailing slashes are allowed
176
            if len(relpath_parts) > 1 and relpath_parts[-1] == '':
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
177
                raise ValueError("path %r within branch %r seems to be a directory"
178
                                 % (relpath, self._path))
1910.15.1 by Andrew Bennetts
More tests for abspath and clone behaviour
179
            basepath = self._path.split('/')
180
            if len(basepath) > 0 and basepath[-1] == '':
181
                basepath = basepath[:-1]
182
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
183
        for p in relpath_parts:
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
184
            if p == '..':
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
185
                if len(basepath) == 0:
1185.11.7 by John Arbash Meinel
HttpTransport just returns root when parent is requested.
186
                    # In most filesystems, a request for the parent
187
                    # of root, just returns root.
188
                    continue
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
189
                basepath.pop()
190
            elif p == '.' or p == '':
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
191
                continue # No-op
192
            else:
193
                basepath.append(p)
194
        # Possibly, we could use urlparse.urljoin() here, but
195
        # I'm concerned about when it chooses to strip the last
196
        # portion of the path, and when it doesn't.
197
        path = '/'.join(basepath)
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
198
        if path == '':
199
            path = '/'
200
        result = urlparse.urlunparse((self._qualified_proto,
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
201
                                    self._host, path, '', '', ''))
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
202
        return result
907.1.24 by John Arbash Meinel
Remote functionality work.
203
1540.3.25 by Martin Pool
New 'http+urllib' scheme
204
    def _real_abspath(self, relpath):
205
        """Produce absolute path, adjusting protocol if needed"""
206
        abspath = self.abspath(relpath)
207
        qp = self._qualified_proto
208
        rp = self._proto
209
        if self._qualified_proto != self._proto:
210
            abspath = rp + abspath[len(qp):]
211
        if not isinstance(abspath, str):
212
            # escaping must be done at a higher level
213
            abspath = abspath.encode('ascii')
214
        return abspath
215
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
216
    def has(self, relpath):
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
217
        raise NotImplementedError("has() is abstract on %r" % self)
218
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
219
    def get(self, relpath):
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
220
        """Get the file at the given relative path.
221
222
        :param relpath: The relative path to the file
223
        """
1540.3.27 by Martin Pool
Integrate http range support for pycurl
224
        code, response_file = self._get(relpath, None)
225
        return response_file
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
226
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
227
    def _get(self, relpath, ranges):
1540.3.27 by Martin Pool
Integrate http range support for pycurl
228
        """Get a file, or part of a file.
229
230
        :param relpath: Path relative to transport base URL
231
        :param byte_range: None to get the whole file;
232
            or [(start,end)] to fetch parts of a file.
233
234
        :returns: (http_code, result_file)
235
236
        Note that the current http implementations can only fetch one range at
237
        a time through this call.
238
        """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
239
        raise NotImplementedError(self._get)
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
240
241
    def readv(self, relpath, offsets):
242
        """Get parts of the file at the given relative path.
243
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
244
        :param offsets: A list of (offset, size) tuples.
1540.3.27 by Martin Pool
Integrate http range support for pycurl
245
        :param return: A list or generator of (offset, data) tuples
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
246
        """
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
247
        ranges = self.offsets_to_ranges(offsets)
248
        mutter('http readv of %s collapsed %s offsets => %s',
1786.1.34 by John Arbash Meinel
shorten the readv message to cause a smaller debug log.
249
                relpath, len(offsets), ranges)
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
250
        code, f = self._get(relpath, ranges)
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
251
        for start, size in offsets:
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
252
            f.seek(start, (start < 0) and 2 or 0)
253
            start = f.tell()
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
254
            data = f.read(size)
2001.3.2 by John Arbash Meinel
Force all transports to raise ShortReadvError if they can
255
            if len(data) != size:
256
                raise errors.ShortReadvError(relpath, start, size,
2001.3.3 by John Arbash Meinel
review feedback: add the actual count written to ShortReadvError
257
                                             actual=len(data))
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
258
            yield start, data
259
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
260
    @staticmethod
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
261
    def offsets_to_ranges(offsets):
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
262
        """Turn a list of offsets and sizes into a list of byte ranges.
263
264
        :param offsets: A list of tuples of (start, size).  An empty list
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
265
            is not accepted.
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
266
        :return: a list of inclusive byte ranges (start, end) 
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
267
            Adjacent ranges will be combined.
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
268
        """
1786.1.33 by John Arbash Meinel
Cleanup pass #2
269
        # Make sure we process sorted offsets
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
270
        offsets = sorted(offsets)
271
272
        prev_end = None
273
        combined = []
274
275
        for start, size in offsets:
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
276
            end = start + size - 1
277
            if prev_end is None:
278
                combined.append([start, end])
279
            elif start <= prev_end + 1:
280
                combined[-1][1] = end
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
281
            else:
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
282
                combined.append([start, end])
283
            prev_end = end
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
284
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
285
        return combined
1786.1.24 by John Arbash Meinel
Move the functions/regexes to be static members
286
1955.3.6 by John Arbash Meinel
Lots of deprecation warnings, but no errors
287
    def put_file(self, relpath, f, mode=None):
288
        """Copy the file-like object into the location.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
289
290
        :param relpath: Location to put the contents, relative to base.
1955.3.6 by John Arbash Meinel
Lots of deprecation warnings, but no errors
291
        :param f:       File-like object.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
292
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
293
        raise TransportNotPossible('http PUT not supported')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
294
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
295
    def mkdir(self, relpath, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
296
        """Create a directory at the given path."""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
297
        raise TransportNotPossible('http does not support mkdir()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
298
1534.4.15 by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports.
299
    def rmdir(self, relpath):
300
        """See Transport.rmdir."""
301
        raise TransportNotPossible('http does not support rmdir()')
302
1955.3.15 by John Arbash Meinel
Deprecate 'Transport.append' in favor of Transport.append_file or Transport.append_bytes
303
    def append_file(self, relpath, f, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
304
        """Append the text in the file-like object into the final
305
        location.
306
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
307
        raise TransportNotPossible('http does not support append()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
308
309
    def copy(self, rel_from, rel_to):
310
        """Copy the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
311
        raise TransportNotPossible('http does not support copy()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
312
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
313
    def copy_to(self, relpaths, other, mode=None, pb=None):
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
314
        """Copy a set of entries from self into another Transport.
315
316
        :param relpaths: A list/generator of entries to be copied.
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
317
318
        TODO: if other is LocalTransport, is it possible to
319
              do better than put(get())?
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
320
        """
907.1.29 by John Arbash Meinel
Fixing small bug in HttpTransport.copy_to
321
        # At this point HttpTransport might be able to check and see if
322
        # the remote location is the same, and rather than download, and
323
        # then upload, it could just issue a remote copy_this command.
1540.3.6 by Martin Pool
[merge] update from bzr.dev
324
        if isinstance(other, HttpTransportBase):
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
325
            raise TransportNotPossible('http cannot be the target of copy_to()')
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
326
        else:
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
327
            return super(HttpTransportBase, self).\
328
                    copy_to(relpaths, other, mode=mode, pb=pb)
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
329
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
330
    def move(self, rel_from, rel_to):
331
        """Move the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
332
        raise TransportNotPossible('http does not support move()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
333
334
    def delete(self, relpath):
335
        """Delete the item at relpath"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
336
        raise TransportNotPossible('http does not support delete()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
337
1530.1.3 by Robert Collins
transport implementations now tested consistently.
338
    def is_readonly(self):
339
        """See Transport.is_readonly."""
340
        return True
341
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
342
    def listable(self):
343
        """See Transport.listable."""
344
        return False
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
345
346
    def stat(self, relpath):
347
        """Return the stat information for a file.
348
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
349
        raise TransportNotPossible('http does not support stat()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
350
907.1.24 by John Arbash Meinel
Remote functionality work.
351
    def lock_read(self, relpath):
352
        """Lock the given file for shared (read) access.
353
        :return: A lock object, which should be passed to Transport.unlock()
354
        """
355
        # The old RemoteBranch ignore lock for reading, so we will
356
        # continue that tradition and return a bogus lock object.
357
        class BogusLock(object):
358
            def __init__(self, path):
359
                self.path = path
360
            def unlock(self):
361
                pass
362
        return BogusLock(relpath)
363
364
    def lock_write(self, relpath):
365
        """Lock the given file for exclusive (write) access.
366
        WARNING: many transports do not support this, so trying avoid using it
367
368
        :return: A lock object, which should be passed to Transport.unlock()
369
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
370
        raise TransportNotPossible('http does not support lock_write()')
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
371
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
372
    def clone(self, offset=None):
373
        """Return a new HttpTransportBase with root at self.base + offset
2025.2.1 by v.ladeuil+lp at free
Fix bug #61606 by providing cloning hint do daughter classes.
374
375
        We leave the daughter classes take advantage of the hint
376
        that it's a cloning not a raw creation.
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
377
        """
378
        if offset is None:
2025.2.1 by v.ladeuil+lp at free
Fix bug #61606 by providing cloning hint do daughter classes.
379
            return self.__class__(self.base, self)
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
380
        else:
2025.2.1 by v.ladeuil+lp at free
Fix bug #61606 by providing cloning hint do daughter classes.
381
            return self.__class__(self.abspath(offset), self)
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
382
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
383
    @staticmethod
384
    def range_header(ranges, tail_amount):
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
385
        """Turn a list of bytes ranges into a HTTP Range header value.
386
387
        :param offsets: A list of byte ranges, (start, end). An empty list
388
        is not accepted.
389
390
        :return: HTTP range header string.
391
        """
392
        strings = []
393
        for start, end in ranges:
394
            strings.append('%d-%d' % (start, end))
395
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
396
        if tail_amount:
397
            strings.append('-%d' % tail_amount)
398
1786.1.36 by John Arbash Meinel
pycurl expects us to just set the range of bytes, not including bytes=
399
        return ','.join(strings)
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
400
401
1530.1.3 by Robert Collins
transport implementations now tested consistently.
402
#---------------- test server facilities ----------------
1540.3.6 by Martin Pool
[merge] update from bzr.dev
403
# TODO: load these only when running tests
1530.1.3 by Robert Collins
transport implementations now tested consistently.
404
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
405
1530.1.3 by Robert Collins
transport implementations now tested consistently.
406
class WebserverNotAvailable(Exception):
407
    pass
408
409
410
class BadWebserverPath(ValueError):
411
    def __str__(self):
412
        return 'path %s is not in %s' % self.args
413
414
415
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
416
417
    def log_message(self, format, *args):
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
418
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
1530.1.3 by Robert Collins
transport implementations now tested consistently.
419
                                  self.address_string(),
420
                                  self.log_date_time_string(),
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
421
                                  format % args,
422
                                  self.headers.get('referer', '-'),
423
                                  self.headers.get('user-agent', '-'))
1530.1.3 by Robert Collins
transport implementations now tested consistently.
424
425
    def handle_one_request(self):
426
        """Handle a single HTTP request.
427
428
        You normally don't need to override this method; see the class
429
        __doc__ string for information on how to handle specific HTTP
430
        commands such as GET and POST.
431
432
        """
433
        for i in xrange(1,11): # Don't try more than 10 times
434
            try:
435
                self.raw_requestline = self.rfile.readline()
436
            except socket.error, e:
437
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
438
                    # omitted for now because some tests look at the log of
439
                    # the server and expect to see no errors.  see recent
440
                    # email thread. -- mbp 20051021. 
441
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
442
                    time.sleep(0.01)
443
                    continue
444
                raise
445
            else:
446
                break
447
        if not self.raw_requestline:
448
            self.close_connection = 1
449
            return
450
        if not self.parse_request(): # An error code has been sent, just exit
451
            return
452
        mname = 'do_' + self.command
1963.2.6 by Robey Pointer
pychecker is on crack; go back to using 'is None'.
453
        if getattr(self, mname, None) is None:
1530.1.3 by Robert Collins
transport implementations now tested consistently.
454
            self.send_error(501, "Unsupported method (%r)" % self.command)
455
            return
456
        method = getattr(self, mname)
457
        method()
458
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
459
    if sys.platform == 'win32':
460
        # On win32 you cannot access non-ascii filenames without
461
        # decoding them into unicode first.
462
        # However, under Linux, you can access bytestream paths
463
        # without any problems. If this function was always active
464
        # it would probably break tests when LANG=C was set
465
        def translate_path(self, path):
466
            """Translate a /-separated PATH to the local filename syntax.
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
467
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
468
            For bzr, all url paths are considered to be utf8 paths.
469
            On Linux, you can access these paths directly over the bytestream
470
            request, but on win32, you must decode them, and access them
471
            as Unicode files.
472
            """
473
            # abandon query parameters
474
            path = urlparse.urlparse(path)[2]
475
            path = posixpath.normpath(urllib.unquote(path))
476
            path = path.decode('utf-8')
477
            words = path.split('/')
478
            words = filter(None, words)
479
            path = os.getcwdu()
480
            for word in words:
481
                drive, word = os.path.splitdrive(word)
482
                head, word = os.path.split(word)
483
                if word in (os.curdir, os.pardir): continue
484
                path = os.path.join(path, word)
485
            return path
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
486
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
487
1530.1.3 by Robert Collins
transport implementations now tested consistently.
488
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
489
    def __init__(self, server_address, RequestHandlerClass, test_case):
490
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
491
                                                RequestHandlerClass)
492
        self.test_case = test_case
493
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
494
1530.1.3 by Robert Collins
transport implementations now tested consistently.
495
class HttpServer(Server):
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
496
    """A test server for http transports."""
1530.1.3 by Robert Collins
transport implementations now tested consistently.
497
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
498
    # used to form the url that connects to this server
499
    _url_protocol = 'http'
500
1960.2.1 by vila
Enable writable http transports.
501
    # Subclasses can provide a specific request handler
1948.3.1 by Vincent LADEUIL
Enable writable http transports.
502
    def __init__(self, request_handler=TestingHTTPRequestHandler):
1948.3.10 by Vincent LADEUIL
Fix Aaron's review remarks.
503
        Server.__init__(self)
1948.3.2 by Vincent LADEUIL
Fix tabs.
504
        self.request_handler = request_handler
1960.2.1 by vila
Enable writable http transports.
505
1530.1.3 by Robert Collins
transport implementations now tested consistently.
506
    def _http_start(self):
507
        httpd = None
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
508
        httpd = TestingHTTPServer(('localhost', 0),
1960.2.1 by vila
Enable writable http transports.
509
                                  self.request_handler,
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
510
                                  self)
511
        host, port = httpd.socket.getsockname()
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
512
        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
1530.1.3 by Robert Collins
transport implementations now tested consistently.
513
        self._http_starting.release()
514
        httpd.socket.settimeout(0.1)
515
516
        while self._http_running:
517
            try:
518
                httpd.handle_request()
519
            except socket.timeout:
520
                pass
521
522
    def _get_remote_url(self, path):
523
        path_parts = path.split(os.path.sep)
524
        if os.path.isabs(path):
525
            if path_parts[:len(self._local_path_parts)] != \
526
                   self._local_path_parts:
527
                raise BadWebserverPath(path, self.test_dir)
528
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
529
        else:
530
            remote_path = '/'.join(path_parts)
531
532
        self._http_starting.acquire()
533
        self._http_starting.release()
534
        return self._http_base_url + remote_path
535
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
536
    def log(self, format, *args):
1530.1.3 by Robert Collins
transport implementations now tested consistently.
537
        """Capture Server log output."""
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
538
        self.logs.append(format % args)
1530.1.3 by Robert Collins
transport implementations now tested consistently.
539
540
    def setUp(self):
541
        """See bzrlib.transport.Server.setUp."""
542
        self._home_dir = os.getcwdu()
543
        self._local_path_parts = self._home_dir.split(os.path.sep)
544
        self._http_starting = threading.Lock()
545
        self._http_starting.acquire()
546
        self._http_running = True
547
        self._http_base_url = None
548
        self._http_thread = threading.Thread(target=self._http_start)
549
        self._http_thread.setDaemon(True)
550
        self._http_thread.start()
551
        self._http_proxy = os.environ.get("http_proxy")
552
        if self._http_proxy is not None:
553
            del os.environ["http_proxy"]
1530.1.18 by Robert Collins
unbreak test_fetch
554
        self.logs = []
1530.1.3 by Robert Collins
transport implementations now tested consistently.
555
556
    def tearDown(self):
557
        """See bzrlib.transport.Server.tearDown."""
558
        self._http_running = False
559
        self._http_thread.join()
560
        if self._http_proxy is not None:
561
            import os
562
            os.environ["http_proxy"] = self._http_proxy
563
564
    def get_url(self):
565
        """See bzrlib.transport.Server.get_url."""
566
        return self._get_remote_url(self._home_dir)
1530.1.9 by Robert Collins
Test bogus urls with http in the new infrastructure.
567
        
568
    def get_bogus_url(self):
569
        """See bzrlib.transport.Server.get_bogus_url."""
1960.2.1 by vila
Enable writable http transports.
570
        # this is chosen to try to prevent trouble with proxies, weird dns,
1540.3.30 by Martin Pool
Fix up bogus-url tests for broken dns servers, and error imports
571
        # etc
572
        return 'http://127.0.0.1:1/'
1530.1.9 by Robert Collins
Test bogus urls with http in the new infrastructure.
573