bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
5609.52.1
by Martin Pool
Cope with buggy squids interrupting the response before a mime multipart boundary |
1 |
# Copyright (C) 2006-2011 Canonical Ltd
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
16 |
|
17 |
"""Handlers for HTTP Responses.
|
|
18 |
||
19 |
The purpose of these classes is to provide a uniform interface for clients
|
|
20 |
to standard HTTP responses, single range responses and multipart range
|
|
21 |
responses.
|
|
22 |
"""
|
|
23 |
||
|
6379.6.7
by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear. |
24 |
from __future__ import absolute_import |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
25 |
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
26 |
import os |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
27 |
import httplib |
|
3535.1.2
by Adrian Wilkins
Fix ability to use IIS as a dumb HTTP server. |
28 |
import rfc822 |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
29 |
|
|
6624
by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes') |
30 |
from ... import ( |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
31 |
errors, |
|
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
32 |
osutils, |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
33 |
)
|
|
6624
by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes') |
34 |
from ...sixish import ( |
|
6621.22.2
by Martin
Use BytesIO or StringIO from bzrlib.sixish |
35 |
BytesIO, |
36 |
)
|
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
37 |
|
38 |
||
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
39 |
class ResponseFile(object): |
40 |
"""A wrapper around the http socket containing the result of a GET request. |
|
41 |
||
42 |
Only read() and seek() (forward) are supported.
|
|
|
6575.1.2
by Vincent Ladeuil
TDD backwards, works here ;) |
43 |
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
44 |
"""
|
45 |
def __init__(self, path, infile): |
|
46 |
"""Constructor. |
|
47 |
||
48 |
:param path: File url, for error reports.
|
|
49 |
||
50 |
:param infile: File-like socket set at body start.
|
|
51 |
"""
|
|
52 |
self._path = path |
|
53 |
self._file = infile |
|
54 |
self._pos = 0 |
|
55 |
||
56 |
def close(self): |
|
57 |
"""Close this file. |
|
58 |
||
59 |
Dummy implementation for consistency with the 'file' API.
|
|
60 |
"""
|
|
61 |
||
62 |
def read(self, size=-1): |
|
63 |
"""Read size bytes from the current position in the file. |
|
64 |
||
65 |
:param size: The number of bytes to read. Leave unspecified or pass
|
|
66 |
-1 to read to EOF.
|
|
67 |
"""
|
|
68 |
data = self._file.read(size) |
|
69 |
self._pos += len(data) |
|
70 |
return data |
|
71 |
||
|
6519.1.2
by Jelmer Vernooij
Implement ResponseFile.readline and ResponseFile.tell. |
72 |
def readline(self): |
73 |
data = self._file.readline() |
|
74 |
self._pos += len(data) |
|
75 |
return data |
|
76 |
||
|
6575.1.1
by Jelmer Vernooij
Implement basic ResponseFile.__iter__ |
77 |
def __iter__(self): |
78 |
while True: |
|
79 |
line = self.readline() |
|
80 |
if not line: |
|
81 |
return
|
|
82 |
yield line |
|
83 |
||
|
6519.1.2
by Jelmer Vernooij
Implement ResponseFile.readline and ResponseFile.tell. |
84 |
def tell(self): |
85 |
return self._pos |
|
86 |
||
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
87 |
def seek(self, offset, whence=os.SEEK_SET): |
88 |
if whence == os.SEEK_SET: |
|
89 |
if offset < self._pos: |
|
|
6519.1.1
by Jelmer Vernooij
Fix typos in assertionerror. |
90 |
raise AssertionError( |
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
91 |
"Can't seek backwards, pos: %s, offset: %s" |
|
6519.1.1
by Jelmer Vernooij
Fix typos in assertionerror. |
92 |
% (self._pos, offset)) |
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
93 |
to_discard = offset - self._pos |
94 |
elif whence == os.SEEK_CUR: |
|
95 |
to_discard = offset |
|
96 |
else: |
|
97 |
raise AssertionError("Can't seek backwards") |
|
98 |
if to_discard: |
|
99 |
# Just discard the unwanted bytes
|
|
100 |
self.read(to_discard) |
|
101 |
||
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
102 |
# A RangeFile expects the following grammar (simplified to outline the
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
103 |
# assumptions we rely upon).
|
104 |
||
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
105 |
# file: single_range
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
106 |
# | multiple_range
|
107 |
||
108 |
# single_range: content_range_header data
|
|
109 |
||
110 |
# multiple_range: boundary_header boundary (content_range_header data boundary)+
|
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
111 |
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
112 |
class RangeFile(ResponseFile): |
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
113 |
"""File-like object that allow access to partial available data. |
114 |
||
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
115 |
All accesses should happen sequentially since the acquisition occurs during
|
116 |
an http response reception (as sockets can't be seeked, we simulate the
|
|
117 |
seek by just reading and discarding the data).
|
|
118 |
||
119 |
The access pattern is defined by a set of ranges discovered as reading
|
|
120 |
progress. Only one range is available at a given time, so all accesses
|
|
121 |
should happen with monotonically increasing offsets.
|
|
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
122 |
"""
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
123 |
|
|
3146.3.4
by Vincent Ladeuil
Review feedback, simpler loops. |
124 |
# in _checked_read() below, we may have to discard several MB in the worst
|
125 |
# case. To avoid buffering that much, we read and discard by chunks
|
|
|
6621.22.2
by Martin
Use BytesIO or StringIO from bzrlib.sixish |
126 |
# instead. The underlying file is either a socket or a BytesIO, so reading
|
|
3146.3.4
by Vincent Ladeuil
Review feedback, simpler loops. |
127 |
# 8k chunks should be fine.
|
128 |
_discarded_buf_size = 8192 |
|
129 |
||
|
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
130 |
# maximum size of read requests -- used to avoid MemoryError issues in recv
|
131 |
_max_read_size = 512 * 1024 |
|
132 |
||
|
3945.1.8
by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking. |
133 |
def __init__(self, path, infile): |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
134 |
"""Constructor. |
135 |
||
136 |
:param path: File url, for error reports.
|
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
137 |
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
138 |
:param infile: File-like socket set at body start.
|
139 |
"""
|
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
140 |
super(RangeFile, self).__init__(path, infile) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
141 |
self._boundary = None |
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
142 |
# When using multi parts response, this will be set with the headers
|
143 |
# associated with the range currently read.
|
|
144 |
self._headers = None |
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
145 |
# Default to the whole file of unspecified size
|
146 |
self.set_range(0, -1) |
|
147 |
||
148 |
def set_range(self, start, size): |
|
149 |
"""Change the range mapping""" |
|
150 |
self._start = start |
|
151 |
self._size = size |
|
152 |
# Set the new _pos since that's what we want to expose
|
|
153 |
self._pos = self._start |
|
154 |
||
155 |
def set_boundary(self, boundary): |
|
156 |
"""Define the boundary used in a multi parts message. |
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
157 |
|
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
158 |
The file should be at the beginning of the body, the first range
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
159 |
definition is read and taken into account.
|
160 |
"""
|
|
161 |
self._boundary = boundary |
|
162 |
# Decode the headers and setup the first range
|
|
163 |
self.read_boundary() |
|
164 |
self.read_range_definition() |
|
165 |
||
166 |
def read_boundary(self): |
|
167 |
"""Read the boundary headers defining a new range""" |
|
168 |
boundary_line = '\r\n' |
|
169 |
while boundary_line == '\r\n': |
|
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
170 |
# RFC2616 19.2 Additional CRLFs may precede the first boundary
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
171 |
# string entity.
|
172 |
# To be on the safe side we allow it before any boundary line
|
|
173 |
boundary_line = self._file.readline() |
|
|
3535.1.4
by adwi2
Changes as suggested by Mr Ladeuil. |
174 |
|
|
5609.52.1
by Martin Pool
Cope with buggy squids interrupting the response before a mime multipart boundary |
175 |
if boundary_line == '': |
176 |
# A timeout in the proxy server caused the response to end early.
|
|
177 |
# See launchpad bug 198646.
|
|
178 |
raise errors.HttpBoundaryMissing( |
|
179 |
self._path, |
|
180 |
self._boundary) |
|
181 |
||
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
182 |
if boundary_line != '--' + self._boundary + '\r\n': |
|
3535.1.3
by adwi2
Fix ability to use IIS as a dumb HTTP server by unquoting the boundary |
183 |
# rfc822.unquote() incorrectly unquotes strings enclosed in <>
|
184 |
# IIS 6 and 7 incorrectly wrap boundary strings in <>
|
|
185 |
# together they make a beautiful bug, which we will be gracious
|
|
186 |
# about here
|
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
187 |
if (self._unquote_boundary(boundary_line) != |
|
3535.1.4
by adwi2
Changes as suggested by Mr Ladeuil. |
188 |
'--' + self._boundary + '\r\n'): |
|
3535.1.3
by adwi2
Fix ability to use IIS as a dumb HTTP server by unquoting the boundary |
189 |
raise errors.InvalidHttpResponse( |
190 |
self._path, |
|
|
3537.1.1
by Vincent Ladeuil
Fix some more PEP8isms and delete useless import |
191 |
"Expected a boundary (%s) line, got '%s'" |
192 |
% (self._boundary, boundary_line)) |
|
193 |
||
|
3535.1.2
by Adrian Wilkins
Fix ability to use IIS as a dumb HTTP server. |
194 |
def _unquote_boundary(self, b): |
195 |
return b[:2] + rfc822.unquote(b[2:-2]) + b[-2:] |
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
196 |
|
197 |
def read_range_definition(self): |
|
198 |
"""Read a new range definition in a multi parts message. |
|
199 |
||
200 |
Parse the headers including the empty line following them so that we
|
|
201 |
are ready to read the data itself.
|
|
202 |
"""
|
|
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
203 |
self._headers = httplib.HTTPMessage(self._file, seekable=0) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
204 |
# Extract the range definition
|
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
205 |
content_range = self._headers.getheader('content-range', None) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
206 |
if content_range is None: |
207 |
raise errors.InvalidHttpResponse( |
|
208 |
self._path, |
|
209 |
'Content-Range header missing in a multi-part response') |
|
210 |
self.set_range_from_header(content_range) |
|
211 |
||
212 |
def set_range_from_header(self, content_range): |
|
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
213 |
"""Helper to set the new range from its description in the headers""" |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
214 |
try: |
215 |
rtype, values = content_range.split() |
|
|
3059.2.10
by Vincent Ladeuil
Jam's review feedback. |
216 |
except ValueError: |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
217 |
raise errors.InvalidHttpRange(self._path, content_range, |
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
218 |
'Malformed header') |
|
3059.2.11
by Vincent Ladeuil
Fix typos mentioned by spiv. |
219 |
if rtype != 'bytes': |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
220 |
raise errors.InvalidHttpRange(self._path, content_range, |
221 |
"Unsupported range type '%s'" % rtype) |
|
222 |
try: |
|
223 |
# We don't need total, but note that it may be either the file size
|
|
224 |
# or '*' if the server can't or doesn't want to return the file
|
|
225 |
# size.
|
|
226 |
start_end, total = values.split('/') |
|
227 |
start, end = start_end.split('-') |
|
228 |
start = int(start) |
|
229 |
end = int(end) |
|
|
3059.2.10
by Vincent Ladeuil
Jam's review feedback. |
230 |
except ValueError: |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
231 |
raise errors.InvalidHttpRange(self._path, content_range, |
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
232 |
'Invalid range values') |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
233 |
size = end - start + 1 |
234 |
if size <= 0: |
|
235 |
raise errors.InvalidHttpRange(self._path, content_range, |
|
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
236 |
'Invalid range, size <= 0') |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
237 |
self.set_range(start, size) |
238 |
||
239 |
def _checked_read(self, size): |
|
|
3146.3.2
by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors. |
240 |
"""Read the file checking for short reads. |
241 |
||
242 |
The data read is discarded along the way.
|
|
243 |
"""
|
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
244 |
pos = self._pos |
|
3146.3.4
by Vincent Ladeuil
Review feedback, simpler loops. |
245 |
remaining = size |
246 |
while remaining > 0: |
|
247 |
data = self._file.read(min(remaining, self._discarded_buf_size)) |
|
248 |
remaining -= len(data) |
|
249 |
if not data: |
|
250 |
raise errors.ShortReadvError(self._path, pos, size, |
|
251 |
size - remaining) |
|
252 |
self._pos += size |
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
253 |
|
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
254 |
def _seek_to_next_range(self): |
255 |
# We will cross range boundaries
|
|
256 |
if self._boundary is None: |
|
257 |
# If we don't have a boundary, we can't find another range
|
|
|
3146.3.2
by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors. |
258 |
raise errors.InvalidRange(self._path, self._pos, |
259 |
"Range (%s, %s) exhausted" |
|
260 |
% (self._start, self._size)) |
|
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
261 |
self.read_boundary() |
262 |
self.read_range_definition() |
|
263 |
||
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
264 |
def read(self, size=-1): |
|
3408.6.3
by Andrew Bennetts
Docstring/NEWS tweaks requested by Ian's review. |
265 |
"""Read size bytes from the current position in the file. |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
266 |
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
267 |
Reading across ranges is not supported. We rely on the underlying http
|
268 |
client to clean the socket if we leave bytes unread. This may occur for
|
|
269 |
the final boundary line of a multipart response or for any range
|
|
270 |
request not entirely consumed by the client (due to offset coalescing)
|
|
|
3408.6.3
by Andrew Bennetts
Docstring/NEWS tweaks requested by Ian's review. |
271 |
|
272 |
:param size: The number of bytes to read. Leave unspecified or pass
|
|
273 |
-1 to read to EOF.
|
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
274 |
"""
|
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
275 |
if (self._size > 0 |
276 |
and self._pos == self._start + self._size): |
|
277 |
if size == 0: |
|
278 |
return '' |
|
279 |
else: |
|
280 |
self._seek_to_next_range() |
|
281 |
elif self._pos < self._start: |
|
282 |
raise errors.InvalidRange( |
|
283 |
self._path, self._pos, |
|
284 |
"Can't read %s bytes before range (%s, %s)" |
|
285 |
% (size, self._start, self._size)) |
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
286 |
if self._size > 0: |
287 |
if size > 0 and self._pos + size > self._start + self._size: |
|
288 |
raise errors.InvalidRange( |
|
289 |
self._path, self._pos, |
|
290 |
"Can't read %s bytes across range (%s, %s)" |
|
291 |
% (size, self._start, self._size)) |
|
292 |
||
|
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
293 |
# read data from file
|
|
6621.22.2
by Martin
Use BytesIO or StringIO from bzrlib.sixish |
294 |
buf = BytesIO() |
|
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
295 |
limited = size |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
296 |
if self._size > 0: |
297 |
# Don't read past the range definition
|
|
298 |
limited = self._start + self._size - self._pos |
|
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
299 |
if size >= 0: |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
300 |
limited = min(limited, size) |
|
6586.1.1
by Vincent Ladeuil
Fix various typos in docstrings. Rename 'buffer' to 'buf' since it's now a python builtin function. |
301 |
osutils.pumpfile(self._file, buf, limited, self._max_read_size) |
302 |
data = buf.getvalue() |
|
|
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
303 |
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
304 |
# Update _pos respecting the data effectively read
|
305 |
self._pos += len(data) |
|
306 |
return data |
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
307 |
|
308 |
def seek(self, offset, whence=0): |
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
309 |
start_pos = self._pos |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
310 |
if whence == 0: |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
311 |
final_pos = offset |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
312 |
elif whence == 1: |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
313 |
final_pos = start_pos + offset |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
314 |
elif whence == 2: |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
315 |
if self._size > 0: |
|
3059.2.14
by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a |
316 |
final_pos = self._start + self._size + offset # offset < 0 |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
317 |
else: |
318 |
raise errors.InvalidRange( |
|
319 |
self._path, self._pos, |
|
|
3059.2.14
by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a |
320 |
"RangeFile: can't seek from end while size is unknown") |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
321 |
else: |
322 |
raise ValueError("Invalid value %s for whence." % whence) |
|
323 |
||
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
324 |
if final_pos < self._pos: |
325 |
# Can't seek backwards
|
|
326 |
raise errors.InvalidRange( |
|
327 |
self._path, self._pos, |
|
328 |
'RangeFile: trying to seek backwards to %s' % final_pos) |
|
329 |
||
330 |
if self._size > 0: |
|
331 |
cur_limit = self._start + self._size |
|
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
332 |
while final_pos > cur_limit: |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
333 |
# We will cross range boundaries
|
334 |
remain = cur_limit - self._pos |
|
335 |
if remain > 0: |
|
336 |
# Finish reading the current range
|
|
337 |
self._checked_read(remain) |
|
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
338 |
self._seek_to_next_range() |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
339 |
cur_limit = self._start + self._size |
340 |
||
341 |
size = final_pos - self._pos |
|
342 |
if size > 0: # size can be < 0 if we crossed a range boundary |
|
343 |
# We don't need the data, just read it and throw it away
|
|
344 |
self._checked_read(size) |
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
345 |
|
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
346 |
def tell(self): |
347 |
return self._pos |
|
348 |
||
|
1786.1.5
by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object. |
349 |
|
|
3945.1.8
by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking. |
350 |
def handle_response(url, code, msg, data): |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
351 |
"""Interpret the code & headers and wrap the provided data in a RangeFile. |
352 |
||
353 |
This is a factory method which returns an appropriate RangeFile based on
|
|
354 |
the code & headers it's given.
|
|
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
355 |
|
356 |
:param url: The url being processed. Mostly for error reporting
|
|
357 |
:param code: The integer HTTP response code
|
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
358 |
:param msg: An HTTPMessage containing the headers for the response
|
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
359 |
:param data: A file-like object that can be read() to get the
|
360 |
requested data
|
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
361 |
:return: A file-like object that can seek()+read() the
|
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
362 |
ranges indicated by the headers.
|
363 |
"""
|
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
364 |
if code == 200: |
365 |
# A whole file
|
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
366 |
rfile = ResponseFile(url, data) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
367 |
elif code == 206: |
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
368 |
rfile = RangeFile(url, data) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
369 |
content_type = msg.getheader('content-type', None) |
370 |
if content_type is None: |
|
371 |
# When there is no content-type header we treat the response as
|
|
372 |
# being of type 'application/octet-stream' as per RFC2616 section
|
|
373 |
# 7.2.1.
|
|
|
2073.1.1
by John Arbash Meinel
Robert's comments: Refer to RFC2616 to explain how we handle missing Content-Type |
374 |
# Therefore it is obviously not multipart
|
375 |
content_type = 'application/octet-stream' |
|
|
2070.1.1
by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses |
376 |
is_multipart = False |
377 |
else: |
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
378 |
is_multipart = (msg.getmaintype() == 'multipart' |
379 |
and msg.getsubtype() == 'byteranges') |
|
|
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
380 |
|
|
2070.1.1
by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses |
381 |
if is_multipart: |
|
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
382 |
# Full fledged multipart response
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
383 |
rfile.set_boundary(msg.getparam('boundary')) |
|
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
384 |
else: |
385 |
# A response to a range request, but not multipart
|
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
386 |
content_range = msg.getheader('content-range', None) |
387 |
if content_range is None: |
|
|
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
388 |
raise errors.InvalidHttpResponse(url, |
389 |
'Missing the Content-Range header in a 206 range response') |
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
390 |
rfile.set_range_from_header(content_range) |
|
1786.1.40
by John Arbash Meinel
code cleanups from Martin Pool. |
391 |
else: |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
392 |
raise errors.InvalidHttpResponse(url, |
393 |
'Unknown response code %s' % code) |
|
394 |
||
395 |
return rfile |
|
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
396 |