57
59
Dummy implementation for consistency with the 'file' API.
63
def __exit__(self, exc_type, exc_val, exc_tb):
64
return False # propogate exceptions.
66
def read(self, size=None):
62
def read(self, size=-1):
67
63
"""Read size bytes from the current position in the file.
69
65
:param size: The number of bytes to read. Leave unspecified or pass
72
data = self._file.read(size)
68
data = self._file.read(size)
73
69
self._pos += len(data)
168
158
The file should be at the beginning of the body, the first range
169
159
definition is read and taken into account.
171
if not isinstance(boundary, bytes):
172
raise TypeError(boundary)
173
161
self._boundary = boundary
174
162
# Decode the headers and setup the first range
175
163
self.read_boundary()
178
166
def read_boundary(self):
179
167
"""Read the boundary headers defining a new range"""
180
boundary_line = b'\r\n'
181
while boundary_line == b'\r\n':
168
boundary_line = '\r\n'
169
while boundary_line == '\r\n':
182
170
# RFC2616 19.2 Additional CRLFs may precede the first boundary
184
172
# To be on the safe side we allow it before any boundary line
185
173
boundary_line = self._file.readline()
187
if boundary_line == b'':
175
if boundary_line == '':
188
176
# A timeout in the proxy server caused the response to end early.
189
177
# See launchpad bug 198646.
190
178
raise errors.HttpBoundaryMissing(
194
if boundary_line != b'--' + self._boundary + b'\r\n':
195
# email_utils.unquote() incorrectly unquotes strings enclosed in <>
182
if boundary_line != '--' + self._boundary + '\r\n':
183
# rfc822.unquote() incorrectly unquotes strings enclosed in <>
196
184
# IIS 6 and 7 incorrectly wrap boundary strings in <>
197
185
# together they make a beautiful bug, which we will be gracious
199
187
if (self._unquote_boundary(boundary_line) !=
200
b'--' + self._boundary + b'\r\n'):
188
'--' + self._boundary + '\r\n'):
201
189
raise errors.InvalidHttpResponse(
203
191
"Expected a boundary (%s) line, got '%s'"
204
192
% (self._boundary, boundary_line))
206
194
def _unquote_boundary(self, b):
207
return b[:2] + email_utils.unquote(b[2:-2].decode('ascii')).encode('ascii') + b[-2:]
195
return b[:2] + rfc822.unquote(b[2:-2]) + b[-2:]
209
197
def read_range_definition(self):
210
198
"""Read a new range definition in a multi parts message.
212
200
Parse the headers including the empty line following them so that we
213
201
are ready to read the data itself.
215
self._headers = http_client.parse_headers(self._file)
203
self._headers = httplib.HTTPMessage(self._file, seekable=0)
216
204
# Extract the range definition
217
content_range = self._headers.get('content-range', None)
205
content_range = self._headers.getheader('content-range', None)
218
206
if content_range is None:
219
207
raise errors.InvalidHttpResponse(
351
339
cur_limit = self._start + self._size
353
341
size = final_pos - self._pos
354
if size > 0: # size can be < 0 if we crossed a range boundary
342
if size > 0: # size can be < 0 if we crossed a range boundary
355
343
# We don't need the data, just read it and throw it away
356
344
self._checked_read(size)
362
def handle_response(url, code, getheader, data):
350
def handle_response(url, code, msg, data):
363
351
"""Interpret the code & headers and wrap the provided data in a RangeFile.
365
353
This is a factory method which returns an appropriate RangeFile based on
368
356
:param url: The url being processed. Mostly for error reporting
369
357
:param code: The integer HTTP response code
370
:param getheader: Function for retrieving header
358
:param msg: An HTTPMessage containing the headers for the response
371
359
:param data: A file-like object that can be read() to get the
373
361
:return: A file-like object that can seek()+read() the
378
366
rfile = ResponseFile(url, data)
379
367
elif code == 206:
380
368
rfile = RangeFile(url, data)
381
# When there is no content-type header we treat the response as
382
# being of type 'application/octet-stream' as per RFC2616 section
384
# Therefore it is obviously not multipart
385
content_type = getheader('content-type', 'application/octet-stream')
386
mimetype, options = cgi.parse_header(content_type)
387
if mimetype == 'multipart/byteranges':
388
rfile.set_boundary(options['boundary'].encode('ascii'))
369
content_type = msg.getheader('content-type', None)
370
if content_type is None:
371
# When there is no content-type header we treat the response as
372
# being of type 'application/octet-stream' as per RFC2616 section
374
# Therefore it is obviously not multipart
375
content_type = 'application/octet-stream'
378
is_multipart = (msg.getmaintype() == 'multipart'
379
and msg.getsubtype() == 'byteranges')
382
# Full fledged multipart response
383
rfile.set_boundary(msg.getparam('boundary'))
390
385
# A response to a range request, but not multipart
391
content_range = getheader('content-range', None)
386
content_range = msg.getheader('content-range', None)
392
387
if content_range is None:
393
raise errors.InvalidHttpResponse(
394
url, 'Missing the Content-Range header in a 206 range response')
388
raise errors.InvalidHttpResponse(url,
389
'Missing the Content-Range header in a 206 range response')
395
390
rfile.set_range_from_header(content_range)
397
raise errors.UnexpectedHttpStatus(url, code)
392
raise errors.InvalidHttpResponse(url,
393
'Unknown response code %s' % code)