81
def _extract_headers(header_text, url):
82
"""Extract the mapping for an rfc2822 header
84
This is a helper function for the test suite and for _pycurl.
85
(urllib already parses the headers for us)
87
In the case that there are multiple headers inside the file,
88
the last one is returned.
90
:param header_text: A string of header information.
91
This expects that the first line of a header will always be HTTP ...
92
:param url: The url we are parsing, so we can raise nice errors
93
:return: mimetools.Message object, which basically acts like a case
94
insensitive dictionary.
97
remaining = header_text
100
raise errors.InvalidHttpResponse(url, 'Empty headers')
103
header_file = StringIO(remaining)
104
first_line = header_file.readline()
105
if not first_line.startswith('HTTP'):
106
if first_header: # The first header *must* start with HTTP
107
raise errors.InvalidHttpResponse(url,
108
'Opening header line did not start with HTTP: %s'
111
break # We are done parsing
113
m = mimetools.Message(header_file)
115
# mimetools.Message parses the first header up to a blank line
116
# So while there is remaining data, it probably means there is
117
# another header to be parsed.
118
# Get rid of any preceeding whitespace, which if it is all whitespace
119
# will get rid of everything.
120
remaining = header_file.read().lstrip()
124
81
class HttpTransportBase(ConnectedTransport, medium.SmartClientMedium):
125
82
"""Base class for http implementations.
175
123
:param relpath: The relative path to the file
177
125
code, response_file = self._get(relpath, None)
126
# FIXME: some callers want an iterable... One step forward, three steps
127
# backwards :-/ And not only an iterable, but an iterable that can be
128
# seeked backwards, so we will never be able to do that. One such
129
# known client is bzrlib.bundle.serializer.v4.get_bundle_reader. At the
130
# time of this writing it's even the only known client -- vila20071203
131
return StringIO(response_file.read())
180
133
def _get(self, relpath, ranges, tail_amount=0):
181
134
"""Get a file, or part of a file.
190
143
raise NotImplementedError(self._get)
145
def _remote_path(self, relpath):
146
"""See ConnectedTransport._remote_path.
148
user and passwords are not embedded in the path provided to the server.
150
relative = urlutils.unescape(relpath).encode('utf-8')
151
path = self._combine_paths(self._path, relative)
152
return self._unsplit_url(self._unqualified_scheme,
153
None, None, self._host, self._port, path)
155
def _create_auth(self):
156
"""Returns a dict returning the credentials provided at build time."""
157
auth = dict(host=self._host, port=self._port,
158
user=self._user, password=self._password,
159
protocol=self._unqualified_scheme,
192
163
def get_request(self):
193
164
return SmartClientHTTPMediumRequest(self)
213
184
# further tries were unsuccessful
214
185
raise exc_info[0], exc_info[1], exc_info[2]
216
def _get_ranges_hinted(self, relpath, ranges):
217
"""Issue a ranged GET request taking server capabilities into account.
219
Depending of the errors returned by the server, we try several GET
220
requests, trying to minimize the data transferred.
222
:param relpath: Path relative to transport base URL
223
:param ranges: None to get the whole file;
224
or a list of _CoalescedOffset to fetch parts of a file.
225
:returns: A file handle containing at least the requested ranges.
232
code, f = self._get(relpath, ranges)
233
except errors.InvalidRange, e:
235
exc_info = sys.exc_info()
236
self._degrade_range_hint(relpath, ranges, exc_info)
240
187
# _coalesce_offsets is a helper for readv, it try to combine ranges without
241
188
# degrading readv performances. _bytes_to_read_before_seek is the value
242
189
# used for the limit parameter and has been tuned for other transports. For
284
236
# Cache the data read, but only until it's been used
286
238
# We will iterate on the data received from the GET requests and
287
# serve the corresponding offsets repecting the initial order. We
239
# serve the corresponding offsets respecting the initial order. We
288
240
# need an offset iterator for that.
289
241
iter_offsets = iter(offsets)
290
242
cur_offset_and_size = iter_offsets.next()
293
for cur_coal, file in self._coalesce_readv(relpath, coalesced):
245
for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):
294
246
# Split the received chunk
295
247
for offset, size in cur_coal.ranges:
296
248
start = cur_coal.start + offset
298
data = file.read(size)
250
data = rfile.read(size)
299
251
data_len = len(data)
300
252
if data_len != size:
301
253
raise errors.ShortReadvError(relpath, start, size,
303
data_map[(start, size)] = data
255
if (start, size) == cur_offset_and_size:
256
# The offset requested are sorted as the coalesced
257
# ones, no need to cache. Win !
258
yield cur_offset_and_size[0], data
259
cur_offset_and_size = iter_offsets.next()
261
# Different sorting. We need to cache.
262
data_map[(start, size)] = data
305
264
# Yield everything we can
306
265
while cur_offset_and_size in data_map:
311
270
yield cur_offset_and_size[0], this_data
312
271
cur_offset_and_size = iter_offsets.next()
314
except (errors.ShortReadvError,errors.InvalidRange), e:
315
self._degrade_range_hint(relpath, coalesced, sys.exc_info())
273
except (errors.ShortReadvError, errors.InvalidRange,
274
errors.InvalidHttpRange), e:
275
mutter('Exception %r: %s during http._readv',e, e)
276
if (not isinstance(e, errors.ShortReadvError)
277
or retried_offset == cur_offset_and_size):
278
# We don't degrade the range hint for ShortReadvError since
279
# they do not indicate a problem with the server ability to
280
# handle ranges. Except when we fail to get back a required
281
# offset twice in a row. In that case, falling back to
282
# single range or whole file should help or end up in a
284
self._degrade_range_hint(relpath, coalesced, sys.exc_info())
316
285
# Some offsets may have been already processed, so we retry
317
286
# only the unsuccessful ones.
318
287
offsets = [cur_offset_and_size] + [o for o in iter_offsets]
288
retried_offset = cur_offset_and_size
321
291
def _coalesce_readv(self, relpath, coalesced):
322
292
"""Issue several GET requests to satisfy the coalesced offsets"""
323
total = len(coalesced)
324
if self._range_hint == 'multi':
325
max_ranges = self._max_get_ranges
326
elif self._range_hint == 'single':
294
def get_and_yield(relpath, coalesced):
296
# Note that the _get below may raise
297
# errors.InvalidHttpRange. It's the caller's responsibility to
298
# decide how to retry since it may provide different coalesced
300
code, rfile = self._get(relpath, coalesced)
301
for coal in coalesced:
304
if self._range_hint is None:
305
# Download whole file
306
for c, rfile in get_and_yield(relpath, coalesced):
329
# The whole file will be downloaded anyway
331
# TODO: Some web servers may ignore the range requests and return the
332
# whole file, we may want to detect that and avoid further requests.
333
# Hint: test_readv_multiple_get_requests will fail in that case .
334
for group in xrange(0, len(coalesced), max_ranges):
335
ranges = coalesced[group:group+max_ranges]
336
# Note that the following may raise errors.InvalidRange. It's the
337
# caller responsability to decide how to retry since it may provide
338
# different coalesced offsets.
339
code, file = self._get(relpath, ranges)
309
total = len(coalesced)
310
if self._range_hint == 'multi':
311
max_ranges = self._max_get_ranges
312
elif self._range_hint == 'single':
315
raise AssertionError("Unknown _range_hint %r"
316
% (self._range_hint,))
317
# TODO: Some web servers may ignore the range requests and return
318
# the whole file, we may want to detect that and avoid further
320
# Hint: test_readv_multiple_get_requests will fail once we do that
323
for coal in coalesced:
324
if ((self._get_max_size > 0
325
and cumul + coal.length > self._get_max_size)
326
or len(ranges) >= max_ranges):
327
# Get that much and yield
328
for c, rfile in get_and_yield(relpath, ranges):
330
# Restart with the current offset
336
# Get the rest and yield
337
for c, rfile in get_and_yield(relpath, ranges):
343
340
def recommended_page_size(self):
344
341
"""See Transport.recommended_page_size().
352
@deprecated_method(zero_seventeen)
353
def offsets_to_ranges(offsets):
354
"""Turn a list of offsets and sizes into a list of byte ranges.
356
:param offsets: A list of tuples of (start, size). An empty list
358
:return: a list of inclusive byte ranges (start, end)
359
Adjacent ranges will be combined.
361
# Make sure we process sorted offsets
362
offsets = sorted(offsets)
367
for start, size in offsets:
368
end = start + size - 1
370
combined.append([start, end])
371
elif start <= prev_end + 1:
372
combined[-1][1] = end
374
combined.append([start, end])
379
348
def _post(self, body_bytes):
380
349
"""POST body_bytes to .bzr/smart on this transport.