bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
1 |
# Copyright (C) 2005, 2006 Canonical Ltd
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
2 |
#
|
|
1185.11.19
by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings. |
3 |
# This program is free software; you can redistribute it and/or modify
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
7 |
#
|
|
1185.11.19
by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings. |
8 |
# This program is distributed in the hope that it will be useful,
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
12 |
#
|
|
1185.11.19
by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings. |
13 |
# You should have received a copy of the GNU General Public License
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
16 |
|
17 |
"""Base implementation of Transport over http.
|
|
18 |
||
19 |
There are separate implementation modules for each http client implementation.
|
|
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
20 |
"""
|
21 |
||
|
1711.4.14
by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded |
22 |
from cStringIO import StringIO |
|
1786.1.25
by John Arbash Meinel
Test that we can extract headers properly. |
23 |
import mimetools |
|
1540.3.23
by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl |
24 |
import re |
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
25 |
import urlparse |
26 |
import urllib |
|
|
2172.3.2
by v.ladeuil+lp at free
Fix the missing import and typos in comments. |
27 |
import sys |
|
1786.1.6
by John Arbash Meinel
Missed a couple of imports |
28 |
|
|
2485.8.24
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
29 |
from bzrlib import ( |
30 |
errors, |
|
31 |
ui, |
|
32 |
urlutils, |
|
33 |
)
|
|
|
2400.1.3
by Andrew Bennetts
Split smart transport code into several separate modules. |
34 |
from bzrlib.smart import medium |
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
35 |
from bzrlib.symbol_versioning import ( |
36 |
deprecated_method, |
|
37 |
zero_seventeen, |
|
38 |
)
|
|
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
39 |
from bzrlib.trace import mutter |
|
2018.2.2
by Andrew Bennetts
Implement HTTP smart server. |
40 |
from bzrlib.transport import ( |
|
2485.8.16
by Vincent Ladeuil
Create a new, empty, ConnectedTransport class. |
41 |
ConnectedTransport, |
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
42 |
_CoalescedOffset, |
|
2018.2.2
by Andrew Bennetts
Implement HTTP smart server. |
43 |
Transport, |
44 |
)
|
|
|
1540.3.6
by Martin Pool
[merge] update from bzr.dev |
45 |
|
|
2004.1.9
by vila
Takes jam's remarks into account when possible, add TODOs for the rest. |
46 |
# TODO: This is not used anymore by HttpTransport_urllib
|
47 |
# (extracting the auth info and prompting the user for a password
|
|
48 |
# have been split), only the tests still use it. It should be
|
|
49 |
# deleted and the tests rewritten ASAP to stay in sync.
|
|
|
1185.40.20
by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files |
50 |
def extract_auth(url, password_manager): |
|
1540.3.26
by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet |
51 |
"""Extract auth parameters from am HTTP/HTTPS url and add them to the given |
|
1185.40.20
by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files |
52 |
password manager. Return the url, minus those auth parameters (which
|
53 |
confuse urllib2).
|
|
54 |
"""
|
|
|
1540.3.26
by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet |
55 |
assert re.match(r'^(https?)(\+\w+)?://', url), \ |
56 |
'invalid absolute url %r' % url |
|
|
1540.2.1
by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary |
57 |
scheme, netloc, path, query, fragment = urlparse.urlsplit(url) |
|
2004.3.1
by vila
Test ConnectionError exceptions. |
58 |
|
|
1540.2.1
by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary |
59 |
if '@' in netloc: |
60 |
auth, netloc = netloc.split('@', 1) |
|
|
1185.40.20
by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files |
61 |
if ':' in auth: |
62 |
username, password = auth.split(':', 1) |
|
63 |
else: |
|
64 |
username, password = auth, None |
|
|
1540.2.1
by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary |
65 |
if ':' in netloc: |
66 |
host = netloc.split(':', 1)[0] |
|
67 |
else: |
|
68 |
host = netloc |
|
69 |
username = urllib.unquote(username) |
|
|
1185.40.20
by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files |
70 |
if password is not None: |
71 |
password = urllib.unquote(password) |
|
|
1540.2.1
by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary |
72 |
else: |
|
2094.3.6
by John Arbash Meinel
[merge] bzr.dev 2158 |
73 |
password = ui.ui_factory.get_password( |
|
2004.2.1
by John Arbash Meinel
Cleanup of urllib functions |
74 |
prompt='HTTP %(user)s@%(host)s password', |
75 |
user=username, host=host) |
|
|
1540.2.1
by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary |
76 |
password_manager.add_password(None, host, username, password) |
77 |
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment)) |
|
|
1185.40.20
by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files |
78 |
return url |
|
1553.1.5
by James Henstridge
Make HTTP transport has() method do HEAD requests, and update test to |
79 |
|
|
1185.50.83
by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it. |
80 |
|
|
1786.1.42
by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive. |
81 |
def _extract_headers(header_text, url): |
82 |
"""Extract the mapping for an rfc2822 header |
|
|
1786.1.25
by John Arbash Meinel
Test that we can extract headers properly. |
83 |
|
|
1786.1.42
by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive. |
84 |
This is a helper function for the test suite and for _pycurl.
|
|
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
85 |
(urllib already parses the headers for us)
|
86 |
||
|
1786.1.42
by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive. |
87 |
In the case that there are multiple headers inside the file,
|
88 |
the last one is returned.
|
|
89 |
||
90 |
:param header_text: A string of header information.
|
|
91 |
This expects that the first line of a header will always be HTTP ...
|
|
92 |
:param url: The url we are parsing, so we can raise nice errors
|
|
93 |
:return: mimetools.Message object, which basically acts like a case
|
|
94 |
insensitive dictionary.
|
|
|
1786.1.25
by John Arbash Meinel
Test that we can extract headers properly. |
95 |
"""
|
|
1786.1.42
by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive. |
96 |
first_header = True |
97 |
remaining = header_text |
|
98 |
||
99 |
if not remaining: |
|
100 |
raise errors.InvalidHttpResponse(url, 'Empty headers') |
|
101 |
||
102 |
while remaining: |
|
103 |
header_file = StringIO(remaining) |
|
104 |
first_line = header_file.readline() |
|
105 |
if not first_line.startswith('HTTP'): |
|
106 |
if first_header: # The first header *must* start with HTTP |
|
107 |
raise errors.InvalidHttpResponse(url, |
|
|
2004.3.1
by vila
Test ConnectionError exceptions. |
108 |
'Opening header line did not start with HTTP: %s' |
|
1786.1.42
by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive. |
109 |
% (first_line,)) |
110 |
else: |
|
111 |
break # We are done parsing |
|
112 |
first_header = False |
|
113 |
m = mimetools.Message(header_file) |
|
114 |
||
115 |
# mimetools.Message parses the first header up to a blank line
|
|
116 |
# So while there is remaining data, it probably means there is
|
|
117 |
# another header to be parsed.
|
|
118 |
# Get rid of any preceeding whitespace, which if it is all whitespace
|
|
119 |
# will get rid of everything.
|
|
120 |
remaining = header_file.read().lstrip() |
|
|
1786.1.25
by John Arbash Meinel
Test that we can extract headers properly. |
121 |
return m |
122 |
||
123 |
||
|
2485.8.16
by Vincent Ladeuil
Create a new, empty, ConnectedTransport class. |
124 |
class HttpTransportBase(ConnectedTransport, medium.SmartClientMedium): |
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
125 |
"""Base class for http implementations. |
126 |
||
|
1540.3.23
by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl |
127 |
Does URL parsing, etc, but not any network IO.
|
128 |
||
129 |
The protocol can be given as e.g. http+urllib://host/ to use a particular
|
|
130 |
implementation.
|
|
131 |
"""
|
|
132 |
||
|
2485.8.24
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
133 |
# _unqualified_scheme: "http" or "https"
|
134 |
# _scheme: may have "+pycurl", etc
|
|
|
1540.3.24
by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl. |
135 |
|
|
2485.8.59
by Vincent Ladeuil
Update from review comments. |
136 |
def __init__(self, base, _from_transport=None): |
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
137 |
"""Set the base path where files will be stored.""" |
|
1540.3.23
by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl |
138 |
proto_match = re.match(r'^(https?)(\+\w+)?://', base) |
139 |
if not proto_match: |
|
140 |
raise AssertionError("not a http url: %r" % base) |
|
|
2485.8.24
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
141 |
self._unqualified_scheme = proto_match.group(1) |
|
1540.3.24
by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl. |
142 |
impl_name = proto_match.group(2) |
|
1540.3.23
by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl |
143 |
if impl_name: |
144 |
impl_name = impl_name[1:] |
|
|
1540.3.24
by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl. |
145 |
self._impl_name = impl_name |
|
2485.8.59
by Vincent Ladeuil
Update from review comments. |
146 |
super(HttpTransportBase, self).__init__(base, |
147 |
_from_transport=_from_transport) |
|
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
148 |
# range hint is handled dynamically throughout the life
|
|
2363.4.9
by Vincent Ladeuil
Catch first succesful authentification to avoid further 401 |
149 |
# of the transport object. We start by trying multi-range
|
150 |
# requests and if the server returns bogus results, we
|
|
151 |
# retry with single range requests and, finally, we
|
|
152 |
# forget about range if the server really can't
|
|
153 |
# understand. Once acquired, this piece of info is
|
|
154 |
# propagated to clones.
|
|
|
2485.8.59
by Vincent Ladeuil
Update from review comments. |
155 |
if _from_transport is not None: |
156 |
self._range_hint = _from_transport._range_hint |
|
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
157 |
else: |
158 |
self._range_hint = 'multi' |
|
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
159 |
|
|
2485.8.25
by Vincent Ladeuil
Separate abspath from _remote_path, the intents are different. |
160 |
def _remote_path(self, relpath): |
|
2485.8.29
by Vincent Ladeuil
Cometic changes (and a typo). |
161 |
"""Produce absolute path, adjusting protocol.""" |
|
2485.8.25
by Vincent Ladeuil
Separate abspath from _remote_path, the intents are different. |
162 |
relative = urlutils.unescape(relpath).encode('utf-8') |
163 |
path = self._combine_paths(self._path, relative) |
|
|
2485.8.24
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
164 |
return self._unsplit_url(self._unqualified_scheme, |
165 |
self._user, self._password, |
|
166 |
self._host, self._port, |
|
|
2485.8.28
by Vincent Ladeuil
Further simplifications and doc updates. |
167 |
path) |
|
1540.3.25
by Martin Pool
New 'http+urllib' scheme |
168 |
|
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
169 |
def has(self, relpath): |
|
1540.3.15
by Martin Pool
[merge] large merge to sync with bzr.dev |
170 |
raise NotImplementedError("has() is abstract on %r" % self) |
171 |
||
|
2164.2.15
by Vincent Ladeuil
Http redirections are not followed by default. Do not use hints |
172 |
def get(self, relpath): |
|
1594.2.5
by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support. |
173 |
"""Get the file at the given relative path. |
174 |
||
175 |
:param relpath: The relative path to the file
|
|
176 |
"""
|
|
|
2164.2.15
by Vincent Ladeuil
Http redirections are not followed by default. Do not use hints |
177 |
code, response_file = self._get(relpath, None) |
|
1540.3.27
by Martin Pool
Integrate http range support for pycurl |
178 |
return response_file |
|
1540.3.26
by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet |
179 |
|
|
2164.2.15
by Vincent Ladeuil
Http redirections are not followed by default. Do not use hints |
180 |
def _get(self, relpath, ranges, tail_amount=0): |
|
1540.3.27
by Martin Pool
Integrate http range support for pycurl |
181 |
"""Get a file, or part of a file. |
182 |
||
183 |
:param relpath: Path relative to transport base URL
|
|
|
2164.2.1
by v.ladeuil+lp at free
First rough http branch redirection implementation. |
184 |
:param ranges: None to get the whole file;
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
185 |
or a list of _CoalescedOffset to fetch parts of a file.
|
|
2164.2.26
by Vincent Ladeuil
Delete obsolete note in doc string. |
186 |
:param tail_amount: The amount to get from the end of the file.
|
|
1540.3.27
by Martin Pool
Integrate http range support for pycurl |
187 |
|
188 |
:returns: (http_code, result_file)
|
|
189 |
"""
|
|
|
1540.3.26
by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet |
190 |
raise NotImplementedError(self._get) |
|
1594.2.5
by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support. |
191 |
|
|
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
192 |
def get_request(self): |
|
2018.2.8
by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again. |
193 |
return SmartClientHTTPMediumRequest(self) |
|
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
194 |
|
|
2018.2.3
by Andrew Bennetts
Starting factoring out the smart server client "medium" from the protocol. |
195 |
def get_smart_medium(self): |
196 |
"""See Transport.get_smart_medium. |
|
197 |
||
198 |
HttpTransportBase directly implements the minimal interface of
|
|
199 |
SmartMediumClient, so this returns self.
|
|
200 |
"""
|
|
|
2018.2.8
by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again. |
201 |
return self |
|
2018.2.3
by Andrew Bennetts
Starting factoring out the smart server client "medium" from the protocol. |
202 |
|
|
2520.2.2
by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request |
203 |
def _degrade_range_hint(self, relpath, ranges, exc_info): |
|
2000.3.9
by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :) |
204 |
if self._range_hint == 'multi': |
205 |
self._range_hint = 'single' |
|
|
2520.2.2
by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request |
206 |
mutter('Retry "%s" with single range request' % relpath) |
|
2000.3.9
by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :) |
207 |
elif self._range_hint == 'single': |
208 |
self._range_hint = None |
|
|
2520.2.2
by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request |
209 |
mutter('Retry "%s" without ranges' % relpath) |
|
2000.3.9
by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :) |
210 |
else: |
|
2520.2.2
by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request |
211 |
# We tried all the tricks, but nothing worked. We re-raise original
|
212 |
# exception; the 'mutter' calls above will indicate that further
|
|
213 |
# tries were unsuccessful
|
|
|
2172.3.1
by v.ladeuil+lp at free
Merge a recent bzr.dev (2172) and takes John's remarks into account. |
214 |
raise exc_info[0], exc_info[1], exc_info[2] |
|
2000.3.9
by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :) |
215 |
|
|
2520.2.2
by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request |
216 |
def _get_ranges_hinted(self, relpath, ranges): |
217 |
"""Issue a ranged GET request taking server capabilities into account. |
|
218 |
||
219 |
Depending of the errors returned by the server, we try several GET
|
|
220 |
requests, trying to minimize the data transferred.
|
|
221 |
||
222 |
:param relpath: Path relative to transport base URL
|
|
223 |
:param ranges: None to get the whole file;
|
|
224 |
or a list of _CoalescedOffset to fetch parts of a file.
|
|
225 |
:returns: A file handle containing at least the requested ranges.
|
|
226 |
"""
|
|
227 |
exc_info = None |
|
228 |
try_again = True |
|
229 |
while try_again: |
|
230 |
try_again = False |
|
231 |
try: |
|
232 |
code, f = self._get(relpath, ranges) |
|
233 |
except errors.InvalidRange, e: |
|
234 |
if exc_info is None: |
|
235 |
exc_info = sys.exc_info() |
|
236 |
self._degrade_range_hint(relpath, ranges, exc_info) |
|
237 |
try_again = True |
|
238 |
return f |
|
239 |
||
240 |
# _coalesce_offsets is a helper for readv, it try to combine ranges without
|
|
241 |
# degrading readv performances. _bytes_to_read_before_seek is the value
|
|
242 |
# used for the limit parameter and has been tuned for other transports. For
|
|
243 |
# HTTP, the name is inappropriate but the parameter is still useful and
|
|
244 |
# helps reduce the number of chunks in the response. The overhead for a
|
|
245 |
# chunk (headers, length, footer around the data itself is variable but
|
|
246 |
# around 50 bytes. We use 128 to reduce the range specifiers that appear in
|
|
247 |
# the header, some servers (notably Apache) enforce a maximum length for a
|
|
248 |
# header and issue a '400: Bad request' error when too much ranges are
|
|
249 |
# specified.
|
|
250 |
_bytes_to_read_before_seek = 128 |
|
251 |
# No limit on the offset number that get combined into one, we are trying
|
|
252 |
# to avoid downloading the whole file.
|
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
253 |
_max_readv_combined = 0 |
254 |
||
|
2745.5.1
by Robert Collins
* New parameter on ``bzrlib.transport.Transport.readv`` |
255 |
def _readv(self, relpath, offsets): |
|
1594.2.5
by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support. |
256 |
"""Get parts of the file at the given relative path. |
257 |
||
|
1540.3.26
by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet |
258 |
:param offsets: A list of (offset, size) tuples.
|
|
1540.3.27
by Martin Pool
Integrate http range support for pycurl |
259 |
:param return: A list or generator of (offset, data) tuples
|
|
1594.2.5
by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support. |
260 |
"""
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
261 |
sorted_offsets = sorted(list(offsets)) |
262 |
fudge = self._bytes_to_read_before_seek |
|
263 |
coalesced = self._coalesce_offsets(sorted_offsets, |
|
264 |
limit=self._max_readv_combine, |
|
265 |
fudge_factor=fudge) |
|
266 |
coalesced = list(coalesced) |
|
267 |
mutter('http readv of %s offsets => %s collapsed %s', |
|
268 |
relpath, len(offsets), len(coalesced)) |
|
|
2000.3.9
by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :) |
269 |
|
|
2520.2.2
by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request |
270 |
f = self._get_ranges_hinted(relpath, coalesced) |
|
1786.1.5
by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object. |
271 |
for start, size in offsets: |
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
272 |
try_again = True |
273 |
while try_again: |
|
274 |
try_again = False |
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
275 |
f.seek(start, ((start < 0) and 2) or 0) |
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
276 |
start = f.tell() |
277 |
try: |
|
278 |
data = f.read(size) |
|
279 |
if len(data) != size: |
|
280 |
raise errors.ShortReadvError(relpath, start, size, |
|
281 |
actual=len(data)) |
|
|
2520.2.2
by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request |
282 |
except errors.ShortReadvError, e: |
283 |
self._degrade_range_hint(relpath, coalesced, sys.exc_info()) |
|
284 |
||
285 |
# Since the offsets and the ranges may not be in the same
|
|
286 |
# order, we don't try to calculate a restricted single
|
|
287 |
# range encompassing unprocessed offsets.
|
|
288 |
||
289 |
# Note: we replace 'f' here, it may need cleaning one day
|
|
290 |
# before being thrown that way.
|
|
291 |
f = self._get_ranges_hinted(relpath, coalesced) |
|
292 |
try_again = True |
|
293 |
||
|
2000.3.9
by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :) |
294 |
# After one or more tries, we get the data.
|
|
1786.1.5
by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object. |
295 |
yield start, data |
296 |
||
|
2671.3.1
by Robert Collins
* New method ``bzrlib.transport.Transport.get_recommended_page_size``. |
297 |
def recommended_page_size(self): |
298 |
"""See Transport.recommended_page_size(). |
|
299 |
||
300 |
For HTTP we suggest a large page size to reduce the overhead
|
|
301 |
introduced by latency.
|
|
302 |
"""
|
|
303 |
return 64 * 1024 |
|
304 |
||
|
1786.1.23
by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests. |
305 |
@staticmethod
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
306 |
@deprecated_method(zero_seventeen) |
|
1786.1.39
by John Arbash Meinel
Remove the ability to read negative offsets from readv() |
307 |
def offsets_to_ranges(offsets): |
|
1786.1.23
by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests. |
308 |
"""Turn a list of offsets and sizes into a list of byte ranges. |
309 |
||
310 |
:param offsets: A list of tuples of (start, size). An empty list
|
|
|
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
311 |
is not accepted.
|
|
1786.1.39
by John Arbash Meinel
Remove the ability to read negative offsets from readv() |
312 |
:return: a list of inclusive byte ranges (start, end)
|
|
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
313 |
Adjacent ranges will be combined.
|
|
1786.1.23
by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests. |
314 |
"""
|
|
1786.1.33
by John Arbash Meinel
Cleanup pass #2 |
315 |
# Make sure we process sorted offsets
|
|
1786.1.23
by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests. |
316 |
offsets = sorted(offsets) |
317 |
||
318 |
prev_end = None |
|
319 |
combined = [] |
|
320 |
||
321 |
for start, size in offsets: |
|
|
1786.1.39
by John Arbash Meinel
Remove the ability to read negative offsets from readv() |
322 |
end = start + size - 1 |
323 |
if prev_end is None: |
|
324 |
combined.append([start, end]) |
|
325 |
elif start <= prev_end + 1: |
|
326 |
combined[-1][1] = end |
|
|
1786.1.23
by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests. |
327 |
else: |
|
1786.1.39
by John Arbash Meinel
Remove the ability to read negative offsets from readv() |
328 |
combined.append([start, end]) |
329 |
prev_end = end |
|
|
1786.1.23
by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests. |
330 |
|
|
1786.1.39
by John Arbash Meinel
Remove the ability to read negative offsets from readv() |
331 |
return combined |
|
1786.1.24
by John Arbash Meinel
Move the functions/regexes to be static members |
332 |
|
|
2018.2.10
by Andrew Bennetts
Tidy up TODOs, further testing and fixes for SmartServerRequestProtocolOne, and remove a read_bytes(1) call. |
333 |
def _post(self, body_bytes): |
334 |
"""POST body_bytes to .bzr/smart on this transport. |
|
335 |
|
|
336 |
:returns: (response code, response body file-like object).
|
|
337 |
"""
|
|
338 |
# TODO: Requiring all the body_bytes to be available at the beginning of
|
|
339 |
# the POST may require large client buffers. It would be nice to have
|
|
340 |
# an interface that allows streaming via POST when possible (and
|
|
341 |
# degrades to a local buffer when not).
|
|
342 |
raise NotImplementedError(self._post) |
|
343 |
||
|
1955.3.6
by John Arbash Meinel
Lots of deprecation warnings, but no errors |
344 |
def put_file(self, relpath, f, mode=None): |
345 |
"""Copy the file-like object into the location. |
|
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
346 |
|
347 |
:param relpath: Location to put the contents, relative to base.
|
|
|
1955.3.6
by John Arbash Meinel
Lots of deprecation warnings, but no errors |
348 |
:param f: File-like object.
|
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
349 |
"""
|
|
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
350 |
raise errors.TransportNotPossible('http PUT not supported') |
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
351 |
|
|
1185.58.2
by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work. |
352 |
def mkdir(self, relpath, mode=None): |
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
353 |
"""Create a directory at the given path.""" |
|
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
354 |
raise errors.TransportNotPossible('http does not support mkdir()') |
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
355 |
|
|
1534.4.15
by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports. |
356 |
def rmdir(self, relpath): |
357 |
"""See Transport.rmdir.""" |
|
|
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
358 |
raise errors.TransportNotPossible('http does not support rmdir()') |
|
1534.4.15
by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports. |
359 |
|
|
1955.3.15
by John Arbash Meinel
Deprecate 'Transport.append' in favor of Transport.append_file or Transport.append_bytes |
360 |
def append_file(self, relpath, f, mode=None): |
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
361 |
"""Append the text in the file-like object into the final |
362 |
location.
|
|
363 |
"""
|
|
|
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
364 |
raise errors.TransportNotPossible('http does not support append()') |
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
365 |
|
366 |
def copy(self, rel_from, rel_to): |
|
367 |
"""Copy the item at rel_from to the location at rel_to""" |
|
|
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
368 |
raise errors.TransportNotPossible('http does not support copy()') |
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
369 |
|
|
1185.58.2
by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work. |
370 |
def copy_to(self, relpaths, other, mode=None, pb=None): |
|
907.1.28
by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function. |
371 |
"""Copy a set of entries from self into another Transport. |
372 |
||
373 |
:param relpaths: A list/generator of entries to be copied.
|
|
|
907.1.50
by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown. |
374 |
|
375 |
TODO: if other is LocalTransport, is it possible to
|
|
376 |
do better than put(get())?
|
|
|
907.1.28
by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function. |
377 |
"""
|
|
907.1.29
by John Arbash Meinel
Fixing small bug in HttpTransport.copy_to |
378 |
# At this point HttpTransport might be able to check and see if
|
379 |
# the remote location is the same, and rather than download, and
|
|
380 |
# then upload, it could just issue a remote copy_this command.
|
|
|
1540.3.6
by Martin Pool
[merge] update from bzr.dev |
381 |
if isinstance(other, HttpTransportBase): |
|
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
382 |
raise errors.TransportNotPossible( |
383 |
'http cannot be the target of copy_to()') |
|
|
907.1.28
by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function. |
384 |
else: |
|
1540.3.26
by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet |
385 |
return super(HttpTransportBase, self).\ |
386 |
copy_to(relpaths, other, mode=mode, pb=pb) |
|
|
907.1.28
by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function. |
387 |
|
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
388 |
def move(self, rel_from, rel_to): |
389 |
"""Move the item at rel_from to the location at rel_to""" |
|
|
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
390 |
raise errors.TransportNotPossible('http does not support move()') |
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
391 |
|
392 |
def delete(self, relpath): |
|
393 |
"""Delete the item at relpath""" |
|
|
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
394 |
raise errors.TransportNotPossible('http does not support delete()') |
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
395 |
|
|
2634.1.1
by Robert Collins
(robertc) Reinstate the accidentally backed out external_url patch. |
396 |
def external_url(self): |
397 |
"""See bzrlib.transport.Transport.external_url.""" |
|
398 |
# HTTP URL's are externally usable.
|
|
399 |
return self.base |
|
400 |
||
|
1530.1.3
by Robert Collins
transport implementations now tested consistently. |
401 |
def is_readonly(self): |
402 |
"""See Transport.is_readonly.""" |
|
403 |
return True |
|
404 |
||
|
1400.1.1
by Robert Collins
implement a basic test for the ui branch command from http servers |
405 |
def listable(self): |
406 |
"""See Transport.listable.""" |
|
407 |
return False |
|
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
408 |
|
409 |
def stat(self, relpath): |
|
410 |
"""Return the stat information for a file. |
|
411 |
"""
|
|
|
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
412 |
raise errors.TransportNotPossible('http does not support stat()') |
|
907.1.21
by John Arbash Meinel
Adding http transport as a valid transport protocol. |
413 |
|
|
907.1.24
by John Arbash Meinel
Remote functionality work. |
414 |
def lock_read(self, relpath): |
415 |
"""Lock the given file for shared (read) access. |
|
416 |
:return: A lock object, which should be passed to Transport.unlock()
|
|
417 |
"""
|
|
418 |
# The old RemoteBranch ignore lock for reading, so we will
|
|
419 |
# continue that tradition and return a bogus lock object.
|
|
420 |
class BogusLock(object): |
|
421 |
def __init__(self, path): |
|
422 |
self.path = path |
|
423 |
def unlock(self): |
|
424 |
pass
|
|
425 |
return BogusLock(relpath) |
|
426 |
||
427 |
def lock_write(self, relpath): |
|
428 |
"""Lock the given file for exclusive (write) access. |
|
429 |
WARNING: many transports do not support this, so trying avoid using it
|
|
430 |
||
431 |
:return: A lock object, which should be passed to Transport.unlock()
|
|
432 |
"""
|
|
|
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
433 |
raise errors.TransportNotPossible('http does not support lock_write()') |
|
1530.1.1
by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter. |
434 |
|
|
1540.3.26
by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet |
435 |
def clone(self, offset=None): |
436 |
"""Return a new HttpTransportBase with root at self.base + offset |
|
|
2025.2.1
by v.ladeuil+lp at free
Fix bug #61606 by providing cloning hint do daughter classes. |
437 |
|
|
2004.1.6
by vila
Connection sharing between cloned transports. |
438 |
We leave the daughter classes take advantage of the hint
|
439 |
that it's a cloning not a raw creation.
|
|
|
1540.3.26
by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet |
440 |
"""
|
441 |
if offset is None: |
|
|
2004.1.6
by vila
Connection sharing between cloned transports. |
442 |
return self.__class__(self.base, self) |
|
1540.3.26
by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet |
443 |
else: |
|
2004.1.6
by vila
Connection sharing between cloned transports. |
444 |
return self.__class__(self.abspath(offset), self) |
|
1530.1.1
by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter. |
445 |
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
446 |
def _attempted_range_header(self, offsets, tail_amount): |
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
447 |
"""Prepare a HTTP Range header at a level the server should accept""" |
448 |
||
449 |
if self._range_hint == 'multi': |
|
450 |
# Nothing to do here
|
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
451 |
return self._range_header(offsets, tail_amount) |
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
452 |
elif self._range_hint == 'single': |
453 |
# Combine all the requested ranges into a single
|
|
454 |
# encompassing one
|
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
455 |
if len(offsets) > 0: |
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
456 |
if tail_amount not in (0, None): |
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
457 |
# Nothing we can do here to combine ranges with tail_amount
|
458 |
# in a single range, just returns None. The whole file
|
|
459 |
# should be downloaded.
|
|
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
460 |
return None |
461 |
else: |
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
462 |
start = offsets[0].start |
463 |
last = offsets[-1] |
|
464 |
end = last.start + last.length - 1 |
|
465 |
whole = self._coalesce_offsets([(start, end - start + 1)], |
|
466 |
limit=0, fudge_factor=0) |
|
467 |
return self._range_header(list(whole), 0) |
|
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
468 |
else: |
469 |
# Only tail_amount, requested, leave range_header
|
|
470 |
# do its work
|
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
471 |
return self._range_header(offsets, tail_amount) |
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
472 |
else: |
473 |
return None |
|
474 |
||
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
475 |
@staticmethod
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
476 |
def _range_header(ranges, tail_amount): |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
477 |
"""Turn a list of bytes ranges into a HTTP Range header value. |
478 |
||
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
479 |
:param ranges: A list of _CoalescedOffset
|
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
480 |
:param tail_amount: The amount to get from the end of the file.
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
481 |
|
482 |
:return: HTTP range header string.
|
|
|
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
483 |
|
484 |
At least a non-empty ranges *or* a tail_amount must be
|
|
485 |
provided.
|
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
486 |
"""
|
487 |
strings = [] |
|
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
488 |
for offset in ranges: |
489 |
strings.append('%d-%d' % (offset.start, |
|
490 |
offset.start + offset.length - 1)) |
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
491 |
|
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
492 |
if tail_amount: |
493 |
strings.append('-%d' % tail_amount) |
|
494 |
||
|
1786.1.36
by John Arbash Meinel
pycurl expects us to just set the range of bytes, not including bytes= |
495 |
return ','.join(strings) |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
496 |
|
|
2018.2.8
by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again. |
497 |
def send_http_smart_request(self, bytes): |
498 |
code, body_filelike = self._post(bytes) |
|
499 |
assert code == 200, 'unexpected HTTP response code %r' % (code,) |
|
500 |
return body_filelike |
|
501 |
||
502 |
||
|
2018.5.2
by Andrew Bennetts
Start splitting bzrlib/transport/smart.py into a package. |
503 |
class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest): |
|
2018.2.8
by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again. |
504 |
"""A SmartClientMediumRequest that works with an HTTP medium.""" |
505 |
||
|
2018.5.2
by Andrew Bennetts
Start splitting bzrlib/transport/smart.py into a package. |
506 |
def __init__(self, client_medium): |
507 |
medium.SmartClientMediumRequest.__init__(self, client_medium) |
|
|
2018.2.8
by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again. |
508 |
self._buffer = '' |
509 |
||
510 |
def _accept_bytes(self, bytes): |
|
511 |
self._buffer += bytes |
|
512 |
||
513 |
def _finished_writing(self): |
|
514 |
data = self._medium.send_http_smart_request(self._buffer) |
|
515 |
self._response_body = data |
|
516 |
||
517 |
def _read_bytes(self, count): |
|
518 |
return self._response_body.read(count) |
|
|
2004.1.28
by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code |
519 |
|
|
2018.2.8
by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again. |
520 |
def _finished_reading(self): |
521 |
"""See SmartClientMediumRequest._finished_reading.""" |
|
522 |
pass
|