bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5609.52.1
by Martin Pool
Cope with buggy squids interrupting the response before a mime multipart boundary |
1 |
# Copyright (C) 2006-2011 Canonical Ltd
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
16 |
|
17 |
"""Handlers for HTTP Responses.
|
|
18 |
||
19 |
The purpose of these classes is to provide a uniform interface for clients
|
|
20 |
to standard HTTP responses, single range responses and multipart range
|
|
21 |
responses.
|
|
22 |
"""
|
|
23 |
||
7296.2.5
by Jelmer Vernooij
More avoidance of urllib2-specific things. |
24 |
import cgi |
7479.2.1
by Jelmer Vernooij
Drop python2 support. |
25 |
from io import BytesIO |
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
26 |
import os |
7479.2.1
by Jelmer Vernooij
Drop python2 support. |
27 |
import http.client as http_client |
28 |
import email.utils as email_utils |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
29 |
|
6624
by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes') |
30 |
from ... import ( |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
31 |
errors, |
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
32 |
osutils, |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
33 |
)
|
34 |
||
35 |
||
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
36 |
class ResponseFile(object): |
37 |
"""A wrapper around the http socket containing the result of a GET request. |
|
38 |
||
39 |
Only read() and seek() (forward) are supported.
|
|
6575.1.2
by Vincent Ladeuil
TDD backwards, works here ;) |
40 |
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
41 |
"""
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
42 |
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
43 |
def __init__(self, path, infile): |
44 |
"""Constructor. |
|
45 |
||
46 |
:param path: File url, for error reports.
|
|
47 |
||
48 |
:param infile: File-like socket set at body start.
|
|
49 |
"""
|
|
50 |
self._path = path |
|
51 |
self._file = infile |
|
52 |
self._pos = 0 |
|
53 |
||
54 |
def close(self): |
|
55 |
"""Close this file. |
|
56 |
||
57 |
Dummy implementation for consistency with the 'file' API.
|
|
58 |
"""
|
|
59 |
||
7045.5.4
by Jelmer Vernooij
Fix a few more tests. |
60 |
def __enter__(self): |
61 |
return self |
|
62 |
||
63 |
def __exit__(self, exc_type, exc_val, exc_tb): |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
64 |
return False # propogate exceptions. |
7045.5.4
by Jelmer Vernooij
Fix a few more tests. |
65 |
|
66 |
def read(self, size=None): |
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
67 |
"""Read size bytes from the current position in the file. |
68 |
||
69 |
:param size: The number of bytes to read. Leave unspecified or pass
|
|
70 |
-1 to read to EOF.
|
|
71 |
"""
|
|
7045.5.4
by Jelmer Vernooij
Fix a few more tests. |
72 |
data = self._file.read(size) |
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
73 |
self._pos += len(data) |
74 |
return data |
|
75 |
||
6519.1.2
by Jelmer Vernooij
Implement ResponseFile.readline and ResponseFile.tell. |
76 |
def readline(self): |
77 |
data = self._file.readline() |
|
78 |
self._pos += len(data) |
|
79 |
return data |
|
80 |
||
6575.1.1
by Jelmer Vernooij
Implement basic ResponseFile.__iter__ |
81 |
def __iter__(self): |
82 |
while True: |
|
83 |
line = self.readline() |
|
84 |
if not line: |
|
85 |
return
|
|
86 |
yield line |
|
87 |
||
6519.1.2
by Jelmer Vernooij
Implement ResponseFile.readline and ResponseFile.tell. |
88 |
def tell(self): |
89 |
return self._pos |
|
90 |
||
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
91 |
def seek(self, offset, whence=os.SEEK_SET): |
92 |
if whence == os.SEEK_SET: |
|
93 |
if offset < self._pos: |
|
6519.1.1
by Jelmer Vernooij
Fix typos in assertionerror. |
94 |
raise AssertionError( |
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
95 |
"Can't seek backwards, pos: %s, offset: %s" |
6519.1.1
by Jelmer Vernooij
Fix typos in assertionerror. |
96 |
% (self._pos, offset)) |
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
97 |
to_discard = offset - self._pos |
98 |
elif whence == os.SEEK_CUR: |
|
99 |
to_discard = offset |
|
100 |
else: |
|
101 |
raise AssertionError("Can't seek backwards") |
|
102 |
if to_discard: |
|
103 |
# Just discard the unwanted bytes
|
|
104 |
self.read(to_discard) |
|
105 |
||
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
106 |
# A RangeFile expects the following grammar (simplified to outline the
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
107 |
# assumptions we rely upon).
|
108 |
||
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
109 |
# file: single_range
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
110 |
# | multiple_range
|
111 |
||
112 |
# single_range: content_range_header data
|
|
113 |
||
114 |
# multiple_range: boundary_header boundary (content_range_header data boundary)+
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
115 |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
116 |
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
117 |
class RangeFile(ResponseFile): |
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
118 |
"""File-like object that allow access to partial available data. |
119 |
||
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
120 |
All accesses should happen sequentially since the acquisition occurs during
|
121 |
an http response reception (as sockets can't be seeked, we simulate the
|
|
122 |
seek by just reading and discarding the data).
|
|
123 |
||
124 |
The access pattern is defined by a set of ranges discovered as reading
|
|
125 |
progress. Only one range is available at a given time, so all accesses
|
|
126 |
should happen with monotonically increasing offsets.
|
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
127 |
"""
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
128 |
|
3146.3.4
by Vincent Ladeuil
Review feedback, simpler loops. |
129 |
# in _checked_read() below, we may have to discard several MB in the worst
|
130 |
# case. To avoid buffering that much, we read and discard by chunks
|
|
6621.22.2
by Martin
Use BytesIO or StringIO from bzrlib.sixish |
131 |
# instead. The underlying file is either a socket or a BytesIO, so reading
|
3146.3.4
by Vincent Ladeuil
Review feedback, simpler loops. |
132 |
# 8k chunks should be fine.
|
133 |
_discarded_buf_size = 8192 |
|
134 |
||
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
135 |
# maximum size of read requests -- used to avoid MemoryError issues in recv
|
136 |
_max_read_size = 512 * 1024 |
|
137 |
||
3945.1.8
by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking. |
138 |
def __init__(self, path, infile): |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
139 |
"""Constructor. |
140 |
||
141 |
:param path: File url, for error reports.
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
142 |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
143 |
:param infile: File-like socket set at body start.
|
144 |
"""
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
145 |
super(RangeFile, self).__init__(path, infile) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
146 |
self._boundary = None |
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
147 |
# When using multi parts response, this will be set with the headers
|
148 |
# associated with the range currently read.
|
|
149 |
self._headers = None |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
150 |
# Default to the whole file of unspecified size
|
151 |
self.set_range(0, -1) |
|
152 |
||
153 |
def set_range(self, start, size): |
|
154 |
"""Change the range mapping""" |
|
155 |
self._start = start |
|
156 |
self._size = size |
|
157 |
# Set the new _pos since that's what we want to expose
|
|
158 |
self._pos = self._start |
|
159 |
||
160 |
def set_boundary(self, boundary): |
|
161 |
"""Define the boundary used in a multi parts message. |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
162 |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
163 |
The file should be at the beginning of the body, the first range
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
164 |
definition is read and taken into account.
|
165 |
"""
|
|
7045.5.4
by Jelmer Vernooij
Fix a few more tests. |
166 |
if not isinstance(boundary, bytes): |
167 |
raise TypeError(boundary) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
168 |
self._boundary = boundary |
169 |
# Decode the headers and setup the first range
|
|
170 |
self.read_boundary() |
|
171 |
self.read_range_definition() |
|
172 |
||
173 |
def read_boundary(self): |
|
174 |
"""Read the boundary headers defining a new range""" |
|
7045.5.4
by Jelmer Vernooij
Fix a few more tests. |
175 |
boundary_line = b'\r\n' |
176 |
while boundary_line == b'\r\n': |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
177 |
# RFC2616 19.2 Additional CRLFs may precede the first boundary
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
178 |
# string entity.
|
179 |
# To be on the safe side we allow it before any boundary line
|
|
180 |
boundary_line = self._file.readline() |
|
3535.1.4
by adwi2
Changes as suggested by Mr Ladeuil. |
181 |
|
7045.5.4
by Jelmer Vernooij
Fix a few more tests. |
182 |
if boundary_line == b'': |
5609.52.1
by Martin Pool
Cope with buggy squids interrupting the response before a mime multipart boundary |
183 |
# A timeout in the proxy server caused the response to end early.
|
184 |
# See launchpad bug 198646.
|
|
185 |
raise errors.HttpBoundaryMissing( |
|
186 |
self._path, |
|
187 |
self._boundary) |
|
188 |
||
7045.5.4
by Jelmer Vernooij
Fix a few more tests. |
189 |
if boundary_line != b'--' + self._boundary + b'\r\n': |
6791.2.3
by Jelmer Vernooij
Fix more imports. |
190 |
# email_utils.unquote() incorrectly unquotes strings enclosed in <>
|
3535.1.3
by adwi2
Fix ability to use IIS as a dumb HTTP server by unquoting the boundary |
191 |
# IIS 6 and 7 incorrectly wrap boundary strings in <>
|
192 |
# together they make a beautiful bug, which we will be gracious
|
|
193 |
# about here
|
|
6973.11.9
by Jelmer Vernooij
Fix tests. |
194 |
if (self._unquote_boundary(boundary_line) != |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
195 |
b'--' + self._boundary + b'\r\n'): |
3535.1.3
by adwi2
Fix ability to use IIS as a dumb HTTP server by unquoting the boundary |
196 |
raise errors.InvalidHttpResponse( |
197 |
self._path, |
|
3537.1.1
by Vincent Ladeuil
Fix some more PEP8isms and delete useless import |
198 |
"Expected a boundary (%s) line, got '%s'" |
199 |
% (self._boundary, boundary_line)) |
|
200 |
||
3535.1.2
by Adrian Wilkins
Fix ability to use IIS as a dumb HTTP server. |
201 |
def _unquote_boundary(self, b): |
7058.4.2
by Jelmer Vernooij
Fix boundaries handling. |
202 |
return b[:2] + email_utils.unquote(b[2:-2].decode('ascii')).encode('ascii') + b[-2:] |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
203 |
|
204 |
def read_range_definition(self): |
|
205 |
"""Read a new range definition in a multi parts message. |
|
206 |
||
207 |
Parse the headers including the empty line following them so that we
|
|
208 |
are ready to read the data itself.
|
|
209 |
"""
|
|
7479.2.1
by Jelmer Vernooij
Drop python2 support. |
210 |
self._headers = http_client.parse_headers(self._file) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
211 |
# Extract the range definition
|
7045.5.4
by Jelmer Vernooij
Fix a few more tests. |
212 |
content_range = self._headers.get('content-range', None) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
213 |
if content_range is None: |
214 |
raise errors.InvalidHttpResponse( |
|
215 |
self._path, |
|
216 |
'Content-Range header missing in a multi-part response') |
|
217 |
self.set_range_from_header(content_range) |
|
218 |
||
219 |
def set_range_from_header(self, content_range): |
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
220 |
"""Helper to set the new range from its description in the headers""" |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
221 |
try: |
222 |
rtype, values = content_range.split() |
|
3059.2.10
by Vincent Ladeuil
Jam's review feedback. |
223 |
except ValueError: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
224 |
raise errors.InvalidHttpRange(self._path, content_range, |
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
225 |
'Malformed header') |
3059.2.11
by Vincent Ladeuil
Fix typos mentioned by spiv. |
226 |
if rtype != 'bytes': |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
227 |
raise errors.InvalidHttpRange(self._path, content_range, |
228 |
"Unsupported range type '%s'" % rtype) |
|
229 |
try: |
|
230 |
# We don't need total, but note that it may be either the file size
|
|
231 |
# or '*' if the server can't or doesn't want to return the file
|
|
232 |
# size.
|
|
233 |
start_end, total = values.split('/') |
|
234 |
start, end = start_end.split('-') |
|
235 |
start = int(start) |
|
236 |
end = int(end) |
|
3059.2.10
by Vincent Ladeuil
Jam's review feedback. |
237 |
except ValueError: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
238 |
raise errors.InvalidHttpRange(self._path, content_range, |
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
239 |
'Invalid range values') |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
240 |
size = end - start + 1 |
241 |
if size <= 0: |
|
242 |
raise errors.InvalidHttpRange(self._path, content_range, |
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
243 |
'Invalid range, size <= 0') |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
244 |
self.set_range(start, size) |
245 |
||
246 |
def _checked_read(self, size): |
|
3146.3.2
by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors. |
247 |
"""Read the file checking for short reads. |
248 |
||
249 |
The data read is discarded along the way.
|
|
250 |
"""
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
251 |
pos = self._pos |
3146.3.4
by Vincent Ladeuil
Review feedback, simpler loops. |
252 |
remaining = size |
253 |
while remaining > 0: |
|
254 |
data = self._file.read(min(remaining, self._discarded_buf_size)) |
|
255 |
remaining -= len(data) |
|
256 |
if not data: |
|
257 |
raise errors.ShortReadvError(self._path, pos, size, |
|
258 |
size - remaining) |
|
259 |
self._pos += size |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
260 |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
261 |
def _seek_to_next_range(self): |
262 |
# We will cross range boundaries
|
|
263 |
if self._boundary is None: |
|
264 |
# If we don't have a boundary, we can't find another range
|
|
3146.3.2
by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors. |
265 |
raise errors.InvalidRange(self._path, self._pos, |
266 |
"Range (%s, %s) exhausted" |
|
267 |
% (self._start, self._size)) |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
268 |
self.read_boundary() |
269 |
self.read_range_definition() |
|
270 |
||
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
271 |
def read(self, size=-1): |
3408.6.3
by Andrew Bennetts
Docstring/NEWS tweaks requested by Ian's review. |
272 |
"""Read size bytes from the current position in the file. |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
273 |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
274 |
Reading across ranges is not supported. We rely on the underlying http
|
275 |
client to clean the socket if we leave bytes unread. This may occur for
|
|
276 |
the final boundary line of a multipart response or for any range
|
|
277 |
request not entirely consumed by the client (due to offset coalescing)
|
|
3408.6.3
by Andrew Bennetts
Docstring/NEWS tweaks requested by Ian's review. |
278 |
|
279 |
:param size: The number of bytes to read. Leave unspecified or pass
|
|
280 |
-1 to read to EOF.
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
281 |
"""
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
282 |
if (self._size > 0 |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
283 |
and self._pos == self._start + self._size): |
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
284 |
if size == 0: |
7045.5.4
by Jelmer Vernooij
Fix a few more tests. |
285 |
return b'' |
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
286 |
else: |
287 |
self._seek_to_next_range() |
|
288 |
elif self._pos < self._start: |
|
289 |
raise errors.InvalidRange( |
|
290 |
self._path, self._pos, |
|
291 |
"Can't read %s bytes before range (%s, %s)" |
|
292 |
% (size, self._start, self._size)) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
293 |
if self._size > 0: |
294 |
if size > 0 and self._pos + size > self._start + self._size: |
|
295 |
raise errors.InvalidRange( |
|
296 |
self._path, self._pos, |
|
297 |
"Can't read %s bytes across range (%s, %s)" |
|
298 |
% (size, self._start, self._size)) |
|
299 |
||
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
300 |
# read data from file
|
6621.22.2
by Martin
Use BytesIO or StringIO from bzrlib.sixish |
301 |
buf = BytesIO() |
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
302 |
limited = size |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
303 |
if self._size > 0: |
304 |
# Don't read past the range definition
|
|
305 |
limited = self._start + self._size - self._pos |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
306 |
if size >= 0: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
307 |
limited = min(limited, size) |
6586.1.1
by Vincent Ladeuil
Fix various typos in docstrings. Rename 'buffer' to 'buf' since it's now a python builtin function. |
308 |
osutils.pumpfile(self._file, buf, limited, self._max_read_size) |
309 |
data = buf.getvalue() |
|
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
310 |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
311 |
# Update _pos respecting the data effectively read
|
312 |
self._pos += len(data) |
|
313 |
return data |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
314 |
|
315 |
def seek(self, offset, whence=0): |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
316 |
start_pos = self._pos |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
317 |
if whence == 0: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
318 |
final_pos = offset |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
319 |
elif whence == 1: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
320 |
final_pos = start_pos + offset |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
321 |
elif whence == 2: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
322 |
if self._size > 0: |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
323 |
final_pos = self._start + self._size + offset # offset < 0 |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
324 |
else: |
325 |
raise errors.InvalidRange( |
|
326 |
self._path, self._pos, |
|
3059.2.14
by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a |
327 |
"RangeFile: can't seek from end while size is unknown") |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
328 |
else: |
329 |
raise ValueError("Invalid value %s for whence." % whence) |
|
330 |
||
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
331 |
if final_pos < self._pos: |
332 |
# Can't seek backwards
|
|
333 |
raise errors.InvalidRange( |
|
334 |
self._path, self._pos, |
|
335 |
'RangeFile: trying to seek backwards to %s' % final_pos) |
|
336 |
||
337 |
if self._size > 0: |
|
338 |
cur_limit = self._start + self._size |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
339 |
while final_pos > cur_limit: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
340 |
# We will cross range boundaries
|
341 |
remain = cur_limit - self._pos |
|
342 |
if remain > 0: |
|
343 |
# Finish reading the current range
|
|
344 |
self._checked_read(remain) |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
345 |
self._seek_to_next_range() |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
346 |
cur_limit = self._start + self._size |
347 |
||
348 |
size = final_pos - self._pos |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
349 |
if size > 0: # size can be < 0 if we crossed a range boundary |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
350 |
# We don't need the data, just read it and throw it away
|
351 |
self._checked_read(size) |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
352 |
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
353 |
def tell(self): |
354 |
return self._pos |
|
355 |
||
1786.1.5
by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object. |
356 |
|
7296.2.5
by Jelmer Vernooij
More avoidance of urllib2-specific things. |
357 |
def handle_response(url, code, getheader, data): |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
358 |
"""Interpret the code & headers and wrap the provided data in a RangeFile. |
359 |
||
360 |
This is a factory method which returns an appropriate RangeFile based on
|
|
361 |
the code & headers it's given.
|
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
362 |
|
363 |
:param url: The url being processed. Mostly for error reporting
|
|
364 |
:param code: The integer HTTP response code
|
|
7296.2.5
by Jelmer Vernooij
More avoidance of urllib2-specific things. |
365 |
:param getheader: Function for retrieving header
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
366 |
:param data: A file-like object that can be read() to get the
|
367 |
requested data
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
368 |
:return: A file-like object that can seek()+read() the
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
369 |
ranges indicated by the headers.
|
370 |
"""
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
371 |
if code == 200: |
372 |
# A whole file
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
373 |
rfile = ResponseFile(url, data) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
374 |
elif code == 206: |
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
375 |
rfile = RangeFile(url, data) |
7296.2.5
by Jelmer Vernooij
More avoidance of urllib2-specific things. |
376 |
# When there is no content-type header we treat the response as
|
377 |
# being of type 'application/octet-stream' as per RFC2616 section
|
|
378 |
# 7.2.1.
|
|
379 |
# Therefore it is obviously not multipart
|
|
380 |
content_type = getheader('content-type', 'application/octet-stream') |
|
381 |
mimetype, options = cgi.parse_header(content_type) |
|
382 |
if mimetype == 'multipart/byteranges': |
|
383 |
rfile.set_boundary(options['boundary'].encode('ascii')) |
|
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
384 |
else: |
385 |
# A response to a range request, but not multipart
|
|
7296.2.5
by Jelmer Vernooij
More avoidance of urllib2-specific things. |
386 |
content_range = getheader('content-range', None) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
387 |
if content_range is None: |
7296.2.5
by Jelmer Vernooij
More avoidance of urllib2-specific things. |
388 |
raise errors.InvalidHttpResponse( |
389 |
url, 'Missing the Content-Range header in a 206 range response') |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
390 |
rfile.set_range_from_header(content_range) |
1786.1.40
by John Arbash Meinel
code cleanups from Martin Pool. |
391 |
else: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
392 |
raise errors.InvalidHttpResponse(url, |
393 |
'Unknown response code %s' % code) |
|
394 |
||
395 |
return rfile |