/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2005-2010 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1540.3.3 by Martin Pool
Review updates of pycurl transport
16
17
"""Base implementation of Transport over http.
18
19
There are separate implementation modules for each http client implementation.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
20
"""
21
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
22
from __future__ import absolute_import
23
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
24
import os
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
25
import re
2172.3.2 by v.ladeuil+lp at free
Fix the missing import and typos in comments.
26
import sys
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
27
import weakref
1786.1.6 by John Arbash Meinel
Missed a couple of imports
28
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
29
from ... import (
3675.1.1 by Martin Pool
Merge and update log+ transport decorator
30
    debug,
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
31
    errors,
5609.9.1 by Martin
Blindly change all users of get_transport to address the function via the transport module
32
    transport,
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
33
    ui,
34
    urlutils,
35
    )
6670.4.16 by Jelmer Vernooij
Move smart to breezy.bzr.
36
from ...bzr.smart import medium
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
37
from ...trace import mutter
38
from ...transport import (
2485.8.16 by Vincent Ladeuil
Create a new, empty, ConnectedTransport class.
39
    ConnectedTransport,
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
40
    )
1540.3.6 by Martin Pool
[merge] update from bzr.dev
41
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
42
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
43
class HttpTransportBase(ConnectedTransport):
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
44
    """Base class for http implementations.
45
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
46
    Does URL parsing, etc, but not any network IO.
47
48
    The protocol can be given as e.g. http+urllib://host/ to use a particular
49
    implementation.
50
    """
51
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
52
    # _unqualified_scheme: "http" or "https"
53
    # _scheme: may have "+pycurl", etc
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
54
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
55
    def __init__(self, base, _impl_name, _from_transport=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
56
        """Set the base path where files will be stored."""
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
57
        proto_match = re.match(r'^(https?)(\+\w+)?://', base)
58
        if not proto_match:
59
            raise AssertionError("not a http url: %r" % base)
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
60
        self._unqualified_scheme = proto_match.group(1)
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
61
        self._impl_name = _impl_name
2485.8.59 by Vincent Ladeuil
Update from review comments.
62
        super(HttpTransportBase, self).__init__(base,
63
                                                _from_transport=_from_transport)
3734.3.2 by Vincent Ladeuil
Fix another SmartHTTPMedium refactoring bit.
64
        self._medium = None
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
65
        # range hint is handled dynamically throughout the life
2363.4.9 by Vincent Ladeuil
Catch first succesful authentification to avoid further 401
66
        # of the transport object. We start by trying multi-range
67
        # requests and if the server returns bogus results, we
68
        # retry with single range requests and, finally, we
69
        # forget about range if the server really can't
70
        # understand. Once acquired, this piece of info is
71
        # propagated to clones.
2485.8.59 by Vincent Ladeuil
Update from review comments.
72
        if _from_transport is not None:
73
            self._range_hint = _from_transport._range_hint
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
74
        else:
75
            self._range_hint = 'multi'
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
76
77
    def has(self, relpath):
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
78
        raise NotImplementedError("has() is abstract on %r" % self)
79
2164.2.15 by Vincent Ladeuil
Http redirections are not followed by default. Do not use hints
80
    def get(self, relpath):
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
81
        """Get the file at the given relative path.
82
83
        :param relpath: The relative path to the file
84
        """
3945.1.5 by Vincent Ladeuil
Start implementing http activity reporting at socket level.
85
        code, response_file = self._get(relpath, None)
6352.4.1 by Jelmer Vernooij
Don't allow seeking backwards in get.
86
        return response_file
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
87
2164.2.15 by Vincent Ladeuil
Http redirections are not followed by default. Do not use hints
88
    def _get(self, relpath, ranges, tail_amount=0):
1540.3.27 by Martin Pool
Integrate http range support for pycurl
89
        """Get a file, or part of a file.
90
91
        :param relpath: Path relative to transport base URL
2164.2.1 by v.ladeuil+lp at free
First rough http branch redirection implementation.
92
        :param ranges: None to get the whole file;
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
93
            or  a list of _CoalescedOffset to fetch parts of a file.
2164.2.26 by Vincent Ladeuil
Delete obsolete note in doc string.
94
        :param tail_amount: The amount to get from the end of the file.
1540.3.27 by Martin Pool
Integrate http range support for pycurl
95
96
        :returns: (http_code, result_file)
97
        """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
98
        raise NotImplementedError(self._get)
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
99
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
100
    def _remote_path(self, relpath):
101
        """See ConnectedTransport._remote_path.
102
103
        user and passwords are not embedded in the path provided to the server.
104
        """
5268.7.19 by Jelmer Vernooij
Use urlutils.URL in bzrlib.transport.http.
105
        url = self._parsed_url.clone(relpath)
106
        url.user = url.quoted_user = None
107
        url.password = url.quoted_password = None
108
        url.scheme = self._unqualified_scheme
109
        return str(url)
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
110
111
    def _create_auth(self):
4795.4.4 by Vincent Ladeuil
Protect more access to 'user' and 'password' auth attributes.
112
        """Returns a dict containing the credentials provided at build time."""
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
113
        auth = dict(host=self._parsed_url.host, port=self._parsed_url.port,
114
                    user=self._parsed_url.user, password=self._parsed_url.password,
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
115
                    protocol=self._unqualified_scheme,
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
116
                    path=self._parsed_url.path)
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
117
        return auth
118
2018.2.3 by Andrew Bennetts
Starting factoring out the smart server client "medium" from the protocol.
119
    def get_smart_medium(self):
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
120
        """See Transport.get_smart_medium."""
121
        if self._medium is None:
122
            # Since medium holds some state (smart server probing at least), we
123
            # need to keep it around. Note that this is needed because medium
124
            # has the same 'base' attribute as the transport so it can't be
125
            # shared between transports having different bases.
126
            self._medium = SmartClientHTTPMedium(self)
127
        return self._medium
2018.2.3 by Andrew Bennetts
Starting factoring out the smart server client "medium" from the protocol.
128
6621.9.1 by Martin
Avoid passing exc_info around in http range degrade logic
129
    def _degrade_range_hint(self, relpath, ranges):
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
130
        if self._range_hint == 'multi':
131
            self._range_hint = 'single'
2520.2.2 by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request
132
            mutter('Retry "%s" with single range request' % relpath)
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
133
        elif self._range_hint == 'single':
134
            self._range_hint = None
2520.2.2 by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request
135
            mutter('Retry "%s" without ranges' % relpath)
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
136
        else:
6621.9.1 by Martin
Avoid passing exc_info around in http range degrade logic
137
            # We tried all the tricks, but nothing worked, caller must reraise.
138
            return False
139
        return True
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
140
2520.2.2 by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request
141
    # _coalesce_offsets is a helper for readv, it try to combine ranges without
142
    # degrading readv performances. _bytes_to_read_before_seek is the value
143
    # used for the limit parameter and has been tuned for other transports. For
144
    # HTTP, the name is inappropriate but the parameter is still useful and
145
    # helps reduce the number of chunks in the response. The overhead for a
146
    # chunk (headers, length, footer around the data itself is variable but
147
    # around 50 bytes. We use 128 to reduce the range specifiers that appear in
148
    # the header, some servers (notably Apache) enforce a maximum length for a
149
    # header and issue a '400: Bad request' error when too much ranges are
150
    # specified.
151
    _bytes_to_read_before_seek = 128
152
    # No limit on the offset number that get combined into one, we are trying
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
153
    # to avoid downloading the whole file.
3024.2.1 by Vincent Ladeuil
Fix 165061 by using the correct _max_readv_combine attribute.
154
    _max_readv_combine = 0
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
155
    # By default Apache has a limit of ~400 ranges before replying with a 400
156
    # Bad Request. So we go underneath that amount to be safe.
157
    _max_get_ranges = 200
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
158
    # We impose no limit on the range size. But see _pycurl.py for a different
159
    # use.
160
    _get_max_size = 0
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
161
2745.5.1 by Robert Collins
* New parameter on ``bzrlib.transport.Transport.readv``
162
    def _readv(self, relpath, offsets):
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
163
        """Get parts of the file at the given relative path.
164
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
165
        :param offsets: A list of (offset, size) tuples.
1540.3.27 by Martin Pool
Integrate http range support for pycurl
166
        :param return: A list or generator of (offset, data) tuples
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
167
        """
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
168
        # offsets may be a generator, we will iterate it several times, so
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
169
        # build a list
170
        offsets = list(offsets)
171
172
        try_again = True
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
173
        retried_offset = None
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
174
        while try_again:
175
            try_again = False
176
177
            # Coalesce the offsets to minimize the GET requests issued
178
            sorted_offsets = sorted(offsets)
179
            coalesced = self._coalesce_offsets(
180
                sorted_offsets, limit=self._max_readv_combine,
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
181
                fudge_factor=self._bytes_to_read_before_seek,
182
                max_size=self._get_max_size)
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
183
184
            # Turn it into a list, we will iterate it several times
185
            coalesced = list(coalesced)
3675.1.1 by Martin Pool
Merge and update log+ transport decorator
186
            if 'http' in debug.debug_flags:
187
                mutter('http readv of %s  offsets => %s collapsed %s',
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
188
                    relpath, len(offsets), len(coalesced))
189
190
            # Cache the data read, but only until it's been used
191
            data_map = {}
192
            # We will iterate on the data received from the GET requests and
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
193
            # serve the corresponding offsets respecting the initial order. We
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
194
            # need an offset iterator for that.
195
            iter_offsets = iter(offsets)
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
196
            cur_offset_and_size = next(iter_offsets)
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
197
198
            try:
3059.2.10 by Vincent Ladeuil
Jam's review feedback.
199
                for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
200
                    # Split the received chunk
201
                    for offset, size in cur_coal.ranges:
3052.3.2 by Vincent Ladeuil
Add tests and fix trivial bugs and other typos.
202
                        start = cur_coal.start + offset
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
203
                        rfile.seek(start, os.SEEK_SET)
3059.2.10 by Vincent Ladeuil
Jam's review feedback.
204
                        data = rfile.read(size)
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
205
                        data_len = len(data)
206
                        if data_len != size:
207
                            raise errors.ShortReadvError(relpath, start, size,
208
                                                         actual=data_len)
3059.2.5 by Vincent Ladeuil
DAMN^64, the http test server is 1.0 not 1.1 :( Better pipe cleaning and less readv caching (since that's the point of the whole fix).
209
                        if (start, size) == cur_offset_and_size:
210
                            # The offset requested are sorted as the coalesced
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
211
                            # ones, no need to cache. Win !
3059.2.5 by Vincent Ladeuil
DAMN^64, the http test server is 1.0 not 1.1 :( Better pipe cleaning and less readv caching (since that's the point of the whole fix).
212
                            yield cur_offset_and_size[0], data
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
213
                            cur_offset_and_size = next(iter_offsets)
3059.2.5 by Vincent Ladeuil
DAMN^64, the http test server is 1.0 not 1.1 :( Better pipe cleaning and less readv caching (since that's the point of the whole fix).
214
                        else:
215
                            # Different sorting. We need to cache.
216
                            data_map[(start, size)] = data
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
217
218
                    # Yield everything we can
219
                    while cur_offset_and_size in data_map:
220
                        # Clean the cached data since we use it
221
                        # XXX: will break if offsets contains duplicates --
222
                        # vila20071129
223
                        this_data = data_map.pop(cur_offset_and_size)
224
                        yield cur_offset_and_size[0], this_data
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
225
                        cur_offset_and_size = next(iter_offsets)
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
226
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
227
            except (errors.ShortReadvError, errors.InvalidRange,
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
228
                    errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e:
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
229
                mutter('Exception %r: %s during http._readv',e, e)
230
                if (not isinstance(e, errors.ShortReadvError)
231
                    or retried_offset == cur_offset_and_size):
232
                    # We don't degrade the range hint for ShortReadvError since
233
                    # they do not indicate a problem with the server ability to
234
                    # handle ranges. Except when we fail to get back a required
235
                    # offset twice in a row. In that case, falling back to
6621.9.1 by Martin
Avoid passing exc_info around in http range degrade logic
236
                    # single range or whole file should help.
237
                    if not self._degrade_range_hint(relpath, coalesced):
238
                        raise
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
239
                # Some offsets may have been already processed, so we retry
240
                # only the unsuccessful ones.
3052.3.2 by Vincent Ladeuil
Add tests and fix trivial bugs and other typos.
241
                offsets = [cur_offset_and_size] + [o for o in iter_offsets]
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
242
                retried_offset = cur_offset_and_size
3052.3.2 by Vincent Ladeuil
Add tests and fix trivial bugs and other typos.
243
                try_again = True
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
244
245
    def _coalesce_readv(self, relpath, coalesced):
246
        """Issue several GET requests to satisfy the coalesced offsets"""
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
247
248
        def get_and_yield(relpath, coalesced):
249
            if coalesced:
250
                # Note that the _get below may raise
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
251
                # errors.InvalidHttpRange. It's the caller's responsibility to
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
252
                # decide how to retry since it may provide different coalesced
253
                # offsets.
254
                code, rfile = self._get(relpath, coalesced)
255
                for coal in coalesced:
256
                    yield coal, rfile
257
258
        if self._range_hint is None:
259
            # Download whole file
260
            for c, rfile in get_and_yield(relpath, coalesced):
261
                yield c, rfile
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
262
        else:
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
263
            total = len(coalesced)
264
            if self._range_hint == 'multi':
265
                max_ranges = self._max_get_ranges
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
266
            elif self._range_hint == 'single':
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
267
                max_ranges = total
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
268
            else:
269
                raise AssertionError("Unknown _range_hint %r"
270
                                     % (self._range_hint,))
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
271
            # TODO: Some web servers may ignore the range requests and return
272
            # the whole file, we may want to detect that and avoid further
273
            # requests.
274
            # Hint: test_readv_multiple_get_requests will fail once we do that
275
            cumul = 0
276
            ranges = []
277
            for coal in coalesced:
278
                if ((self._get_max_size > 0
279
                     and cumul + coal.length > self._get_max_size)
280
                    or len(ranges) >= max_ranges):
281
                    # Get that much and yield
282
                    for c, rfile in get_and_yield(relpath, ranges):
283
                        yield c, rfile
284
                    # Restart with the current offset
285
                    ranges = [coal]
286
                    cumul = coal.length
287
                else:
288
                    ranges.append(coal)
289
                    cumul += coal.length
290
            # Get the rest and yield
291
            for c, rfile in get_and_yield(relpath, ranges):
292
                yield c, rfile
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
293
2671.3.1 by Robert Collins
* New method ``bzrlib.transport.Transport.get_recommended_page_size``.
294
    def recommended_page_size(self):
295
        """See Transport.recommended_page_size().
296
297
        For HTTP we suggest a large page size to reduce the overhead
298
        introduced by latency.
299
        """
300
        return 64 * 1024
301
2018.2.10 by Andrew Bennetts
Tidy up TODOs, further testing and fixes for SmartServerRequestProtocolOne, and remove a read_bytes(1) call.
302
    def _post(self, body_bytes):
303
        """POST body_bytes to .bzr/smart on this transport.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
304
2018.2.10 by Andrew Bennetts
Tidy up TODOs, further testing and fixes for SmartServerRequestProtocolOne, and remove a read_bytes(1) call.
305
        :returns: (response code, response body file-like object).
306
        """
307
        # TODO: Requiring all the body_bytes to be available at the beginning of
308
        # the POST may require large client buffers.  It would be nice to have
309
        # an interface that allows streaming via POST when possible (and
310
        # degrades to a local buffer when not).
311
        raise NotImplementedError(self._post)
312
1955.3.6 by John Arbash Meinel
Lots of deprecation warnings, but no errors
313
    def put_file(self, relpath, f, mode=None):
314
        """Copy the file-like object into the location.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
315
316
        :param relpath: Location to put the contents, relative to base.
1955.3.6 by John Arbash Meinel
Lots of deprecation warnings, but no errors
317
        :param f:       File-like object.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
318
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
319
        raise errors.TransportNotPossible('http PUT not supported')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
320
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
321
    def mkdir(self, relpath, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
322
        """Create a directory at the given path."""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
323
        raise errors.TransportNotPossible('http does not support mkdir()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
324
1534.4.15 by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports.
325
    def rmdir(self, relpath):
326
        """See Transport.rmdir."""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
327
        raise errors.TransportNotPossible('http does not support rmdir()')
1534.4.15 by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports.
328
1955.3.15 by John Arbash Meinel
Deprecate 'Transport.append' in favor of Transport.append_file or Transport.append_bytes
329
    def append_file(self, relpath, f, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
330
        """Append the text in the file-like object into the final
331
        location.
332
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
333
        raise errors.TransportNotPossible('http does not support append()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
334
335
    def copy(self, rel_from, rel_to):
336
        """Copy the item at rel_from to the location at rel_to"""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
337
        raise errors.TransportNotPossible('http does not support copy()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
338
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
339
    def copy_to(self, relpaths, other, mode=None, pb=None):
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
340
        """Copy a set of entries from self into another Transport.
341
342
        :param relpaths: A list/generator of entries to be copied.
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
343
344
        TODO: if other is LocalTransport, is it possible to
345
              do better than put(get())?
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
346
        """
907.1.29 by John Arbash Meinel
Fixing small bug in HttpTransport.copy_to
347
        # At this point HttpTransport might be able to check and see if
348
        # the remote location is the same, and rather than download, and
349
        # then upload, it could just issue a remote copy_this command.
1540.3.6 by Martin Pool
[merge] update from bzr.dev
350
        if isinstance(other, HttpTransportBase):
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
351
            raise errors.TransportNotPossible(
352
                'http cannot be the target of copy_to()')
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
353
        else:
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
354
            return super(HttpTransportBase, self).\
355
                    copy_to(relpaths, other, mode=mode, pb=pb)
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
356
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
357
    def move(self, rel_from, rel_to):
358
        """Move the item at rel_from to the location at rel_to"""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
359
        raise errors.TransportNotPossible('http does not support move()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
360
361
    def delete(self, relpath):
362
        """Delete the item at relpath"""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
363
        raise errors.TransportNotPossible('http does not support delete()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
364
2634.1.1 by Robert Collins
(robertc) Reinstate the accidentally backed out external_url patch.
365
    def external_url(self):
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
366
        """See breezy.transport.Transport.external_url."""
3878.4.6 by Vincent Ladeuil
Fix bug #270863 by preserving 'bzr+http[s]' decorator.
367
        # HTTP URL's are externally usable as long as they don't mention their
368
        # implementation qualifier
5268.7.18 by Jelmer Vernooij
Use urlutils.URL in bzrlib.transport.http.
369
        url = self._parsed_url.clone()
370
        url.scheme = self._unqualified_scheme
371
        return str(url)
2634.1.1 by Robert Collins
(robertc) Reinstate the accidentally backed out external_url patch.
372
1530.1.3 by Robert Collins
transport implementations now tested consistently.
373
    def is_readonly(self):
374
        """See Transport.is_readonly."""
375
        return True
376
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
377
    def listable(self):
378
        """See Transport.listable."""
379
        return False
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
380
381
    def stat(self, relpath):
382
        """Return the stat information for a file.
383
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
384
        raise errors.TransportNotPossible('http does not support stat()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
385
907.1.24 by John Arbash Meinel
Remote functionality work.
386
    def lock_read(self, relpath):
387
        """Lock the given file for shared (read) access.
388
        :return: A lock object, which should be passed to Transport.unlock()
389
        """
390
        # The old RemoteBranch ignore lock for reading, so we will
391
        # continue that tradition and return a bogus lock object.
392
        class BogusLock(object):
393
            def __init__(self, path):
394
                self.path = path
395
            def unlock(self):
396
                pass
397
        return BogusLock(relpath)
398
399
    def lock_write(self, relpath):
400
        """Lock the given file for exclusive (write) access.
401
        WARNING: many transports do not support this, so trying avoid using it
402
403
        :return: A lock object, which should be passed to Transport.unlock()
404
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
405
        raise errors.TransportNotPossible('http does not support lock_write()')
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
406
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
407
    def _attempted_range_header(self, offsets, tail_amount):
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
408
        """Prepare a HTTP Range header at a level the server should accept.
409
410
        :return: the range header representing offsets/tail_amount or None if
411
            no header can be built.
412
        """
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
413
414
        if self._range_hint == 'multi':
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
415
            # Generate the header describing all offsets
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
416
            return self._range_header(offsets, tail_amount)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
417
        elif self._range_hint == 'single':
418
            # Combine all the requested ranges into a single
419
            # encompassing one
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
420
            if len(offsets) > 0:
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
421
                if tail_amount not in (0, None):
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
422
                    # Nothing we can do here to combine ranges with tail_amount
423
                    # in a single range, just returns None. The whole file
424
                    # should be downloaded.
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
425
                    return None
426
                else:
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
427
                    start = offsets[0].start
428
                    last = offsets[-1]
429
                    end = last.start + last.length - 1
430
                    whole = self._coalesce_offsets([(start, end - start + 1)],
431
                                                   limit=0, fudge_factor=0)
432
                    return self._range_header(list(whole), 0)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
433
            else:
434
                # Only tail_amount, requested, leave range_header
435
                # do its work
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
436
                return self._range_header(offsets, tail_amount)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
437
        else:
438
            return None
439
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
440
    @staticmethod
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
441
    def _range_header(ranges, tail_amount):
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
442
        """Turn a list of bytes ranges into a HTTP Range header value.
443
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
444
        :param ranges: A list of _CoalescedOffset
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
445
        :param tail_amount: The amount to get from the end of the file.
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
446
447
        :return: HTTP range header string.
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
448
449
        At least a non-empty ranges *or* a tail_amount must be
450
        provided.
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
451
        """
452
        strings = []
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
453
        for offset in ranges:
454
            strings.append('%d-%d' % (offset.start,
455
                                      offset.start + offset.length - 1))
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
456
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
457
        if tail_amount:
458
            strings.append('-%d' % tail_amount)
459
1786.1.36 by John Arbash Meinel
pycurl expects us to just set the range of bytes, not including bytes=
460
        return ','.join(strings)
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
461
3878.4.5 by Vincent Ladeuil
Don't use the exception as a parameter for _redirected_to.
462
    def _redirected_to(self, source, target):
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
463
        """Returns a transport suitable to re-issue a redirected request.
464
3878.4.5 by Vincent Ladeuil
Don't use the exception as a parameter for _redirected_to.
465
        :param source: The source url as returned by the server.
466
        :param target: The target url as returned by the server.
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
467
468
        The redirection can be handled only if the relpath involved is not
469
        renamed by the redirection.
470
471
        :returns: A transport or None.
472
        """
6145.1.2 by Jelmer Vernooij
Some refactoring.
473
        parsed_source = self._split_url(source)
6145.1.3 by Jelmer Vernooij
Fix redirecting to other transports.
474
        parsed_target = self._split_url(target)
6145.1.2 by Jelmer Vernooij
Some refactoring.
475
        pl = len(self._parsed_url.path)
6145.1.4 by Jelmer Vernooij
Some more comments.
476
        # determine the excess tail - the relative path that was in
477
        # the original request but not part of this transports' URL.
6145.1.3 by Jelmer Vernooij
Fix redirecting to other transports.
478
        excess_tail = parsed_source.path[pl:].strip("/")
479
        if not target.endswith(excess_tail):
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
480
            # The final part of the url has been renamed, we can't handle the
481
            # redirection.
482
            return None
6145.1.3 by Jelmer Vernooij
Fix redirecting to other transports.
483
484
        target_path = parsed_target.path
485
        if excess_tail:
486
            # Drop the tail that was in the redirect but not part of
487
            # the path of this transport.
488
            target_path = target_path[:-len(excess_tail)]
489
6145.1.2 by Jelmer Vernooij
Some refactoring.
490
        if parsed_target.scheme in ('http', 'https'):
3878.4.7 by Vincent Ladeuil
Fixed as per Robert's review.
491
            # Same protocol family (i.e. http[s]), we will preserve the same
492
            # http client implementation when a redirection occurs from one to
493
            # the other (otherwise users may be surprised that bzr switches
494
            # from one implementation to the other, and devs may suffer
495
            # debugging it).
6145.1.2 by Jelmer Vernooij
Some refactoring.
496
            if (parsed_target.scheme == self._unqualified_scheme
497
                and parsed_target.host == self._parsed_url.host
498
                and parsed_target.port == self._parsed_url.port
499
                and (parsed_target.user is None or
500
                     parsed_target.user == self._parsed_url.user)):
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
501
                # If a user is specified, it should match, we don't care about
502
                # passwords, wrong passwords will be rejected anyway.
6145.1.3 by Jelmer Vernooij
Fix redirecting to other transports.
503
                return self.clone(target_path)
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
504
            else:
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
505
                # Rebuild the url preserving the scheme qualification and the
506
                # credentials (if they don't apply, the redirected to server
507
                # will tell us, but if they do apply, we avoid prompting the
508
                # user)
6670.2.2 by Jelmer Vernooij
Update news.
509
                redir_scheme = parsed_target.scheme
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
510
                new_url = self._unsplit_url(redir_scheme,
6145.1.3 by Jelmer Vernooij
Fix redirecting to other transports.
511
                    self._parsed_url.user,
512
                    self._parsed_url.password,
513
                    parsed_target.host, parsed_target.port,
514
                    target_path)
515
                return transport.get_transport_from_url(new_url)
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
516
        else:
517
            # Redirected to a different protocol
6145.1.3 by Jelmer Vernooij
Fix redirecting to other transports.
518
            new_url = self._unsplit_url(parsed_target.scheme,
519
                    parsed_target.user,
520
                    parsed_target.password,
521
                    parsed_target.host, parsed_target.port,
522
                    target_path)
523
            return transport.get_transport_from_url(new_url)
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
524
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
525
526
# TODO: May be better located in smart/medium.py with the other
527
# SmartMedium classes
528
class SmartClientHTTPMedium(medium.SmartClientMedium):
529
530
    def __init__(self, http_transport):
531
        super(SmartClientHTTPMedium, self).__init__(http_transport.base)
532
        # We don't want to create a circular reference between the http
533
        # transport and its associated medium. Since the transport will live
534
        # longer than the medium, the medium keep only a weak reference to its
535
        # transport.
536
        self._http_transport_ref = weakref.ref(http_transport)
537
538
    def get_request(self):
539
        return SmartClientHTTPMediumRequest(self)
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
540
3245.4.47 by Andrew Bennetts
Don't automatically send 'hello' requests from RemoteBzrDirFormat.probe_transport unless we have to (i.e. the transport is HTTP).
541
    def should_probe(self):
542
        return True
543
3431.3.11 by Andrew Bennetts
Push remote_path_from_transport logic into SmartClientMedium, removing special-casing of bzr+http from _SmartClient.
544
    def remote_path_from_transport(self, transport):
545
        # Strip the optional 'bzr+' prefix from transport so it will have the
546
        # same scheme as self.
547
        transport_base = transport.base
548
        if transport_base.startswith('bzr+'):
549
            transport_base = transport_base[4:]
550
        rel_url = urlutils.relative_url(self.base, transport_base)
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
551
        return urlutils.unquote(rel_url)
3431.3.11 by Andrew Bennetts
Push remote_path_from_transport logic into SmartClientMedium, removing special-casing of bzr+http from _SmartClient.
552
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
553
    def send_http_smart_request(self, bytes):
554
        try:
555
            # Get back the http_transport hold by the weak reference
556
            t = self._http_transport_ref()
557
            code, body_filelike = t._post(bytes)
558
            if code != 200:
6123.2.1 by Jelmer Vernooij
Remove unused imports, fix import of error.
559
                raise errors.InvalidHttpResponse(
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
560
                    t._remote_path('.bzr/smart'),
561
                    'Expected 200 response code, got %r' % (code,))
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
562
        except (errors.InvalidHttpResponse, errors.ConnectionReset) as e:
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
563
            raise errors.SmartProtocolError(str(e))
564
        return body_filelike
565
3958.1.1 by Andrew Bennetts
Report traffic on smart media as transport activity.
566
    def _report_activity(self, bytes, direction):
567
        """See SmartMedium._report_activity.
568
569
        Does nothing; the underlying plain HTTP transport will report the
570
        activity that this medium would report.
571
        """
572
        pass
573
5247.2.12 by Vincent Ladeuil
Ensure that all transports close their underlying connection.
574
    def disconnect(self):
575
        """See SmartClientMedium.disconnect()."""
576
        t = self._http_transport_ref()
577
        t.disconnect()
578
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
579
580
# TODO: May be better located in smart/medium.py with the other
581
# SmartMediumRequest classes
2018.5.2 by Andrew Bennetts
Start splitting bzrlib/transport/smart.py into a package.
582
class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest):
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
583
    """A SmartClientMediumRequest that works with an HTTP medium."""
584
2018.5.2 by Andrew Bennetts
Start splitting bzrlib/transport/smart.py into a package.
585
    def __init__(self, client_medium):
586
        medium.SmartClientMediumRequest.__init__(self, client_medium)
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
587
        self._buffer = ''
588
589
    def _accept_bytes(self, bytes):
590
        self._buffer += bytes
591
592
    def _finished_writing(self):
593
        data = self._medium.send_http_smart_request(self._buffer)
594
        self._response_body = data
595
596
    def _read_bytes(self, count):
3565.1.2 by Andrew Bennetts
Delete some more code, fix some bugs, add more comments.
597
        """See SmartClientMediumRequest._read_bytes."""
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
598
        return self._response_body.read(count)
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
599
3606.4.1 by Andrew Bennetts
Fix NotImplementedError when probing for smart protocol via HTTP.
600
    def _read_line(self):
601
        line, excess = medium._get_line(self._response_body.read)
602
        if excess != '':
603
            raise AssertionError(
604
                '_get_line returned excess bytes, but this mediumrequest '
605
                'cannot handle excess. (%r)' % (excess,))
606
        return line
607
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
608
    def _finished_reading(self):
609
        """See SmartClientMediumRequest._finished_reading."""
610
        pass
4912.2.1 by Martin Pool
Add unhtml_roughly
611
612
4912.2.4 by Martin Pool
Add test for unhtml_roughly, and truncate at 1000 bytes
613
def unhtml_roughly(maybe_html, length_limit=1000):
4912.2.1 by Martin Pool
Add unhtml_roughly
614
    """Very approximate html->text translation, for presenting error bodies.
615
4912.2.4 by Martin Pool
Add test for unhtml_roughly, and truncate at 1000 bytes
616
    :param length_limit: Truncate the result to this many characters.
617
4912.2.1 by Martin Pool
Add unhtml_roughly
618
    >>> unhtml_roughly("<b>bad</b> things happened\\n")
619
    ' bad  things happened '
620
    """
4912.2.4 by Martin Pool
Add test for unhtml_roughly, and truncate at 1000 bytes
621
    return re.subn(r"(<[^>]*>|\n|&nbsp;)", " ", maybe_html)[0][:length_limit]