1
# Copyright (C) 2006, 2007 Canonical Ltd
1
# Copyright (C) 2006-2011 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
26
from cStringIO import StringIO
24
from __future__ import absolute_import
28
import http.client as http_client
29
except ImportError: # python < 3
30
import httplib as http_client
32
import email.utils as email_utils
33
except ImportError: # python < 3
34
import rfc822 as email_utils
40
from ...sixish import (
45
class ResponseFile(object):
46
"""A wrapper around the http socket containing the result of a GET request.
48
Only read() and seek() (forward) are supported.
51
def __init__(self, path, infile):
54
:param path: File url, for error reports.
56
:param infile: File-like socket set at body start.
65
Dummy implementation for consistency with the 'file' API.
68
def read(self, size=-1):
69
"""Read size bytes from the current position in the file.
71
:param size: The number of bytes to read. Leave unspecified or pass
74
data = self._file.read(size)
75
self._pos += len(data)
79
data = self._file.readline()
80
self._pos += len(data)
85
line = self.readline()
93
def seek(self, offset, whence=os.SEEK_SET):
94
if whence == os.SEEK_SET:
95
if offset < self._pos:
97
"Can't seek backwards, pos: %s, offset: %s"
98
% (self._pos, offset))
99
to_discard = offset - self._pos
100
elif whence == os.SEEK_CUR:
103
raise AssertionError("Can't seek backwards")
105
# Just discard the unwanted bytes
106
self.read(to_discard)
36
108
# A RangeFile expects the following grammar (simplified to outline the
37
109
# assumptions we rely upon).
41
112
# | multiple_range
43
# whole_file: [content_length_header] data
45
114
# single_range: content_range_header data
47
116
# multiple_range: boundary_header boundary (content_range_header data boundary)+
49
class RangeFile(object):
118
class RangeFile(ResponseFile):
50
119
"""File-like object that allow access to partial available data.
52
121
All accesses should happen sequentially since the acquisition occurs during
61
130
# in _checked_read() below, we may have to discard several MB in the worst
62
131
# case. To avoid buffering that much, we read and discard by chunks
63
# instead. The underlying file is either a socket or a StringIO, so reading
132
# instead. The underlying file is either a socket or a BytesIO, so reading
64
133
# 8k chunks should be fine.
65
134
_discarded_buf_size = 8192
73
142
:param path: File url, for error reports.
74
144
:param infile: File-like socket set at body start.
146
super(RangeFile, self).__init__(path, infile)
78
147
self._boundary = None
79
148
# When using multi parts response, this will be set with the headers
80
149
# associated with the range currently read.
109
178
# To be on the safe side we allow it before any boundary line
110
179
boundary_line = self._file.readline()
181
if boundary_line == '':
182
# A timeout in the proxy server caused the response to end early.
183
# See launchpad bug 198646.
184
raise errors.HttpBoundaryMissing(
112
188
if boundary_line != '--' + self._boundary + '\r\n':
113
# rfc822.unquote() incorrectly unquotes strings enclosed in <>
189
# email_utils.unquote() incorrectly unquotes strings enclosed in <>
114
190
# IIS 6 and 7 incorrectly wrap boundary strings in <>
115
191
# together they make a beautiful bug, which we will be gracious
122
198
% (self._boundary, boundary_line))
124
200
def _unquote_boundary(self, b):
125
return b[:2] + rfc822.unquote(b[2:-2]) + b[-2:]
201
return b[:2] + email_utils.unquote(b[2:-2]) + b[-2:]
127
203
def read_range_definition(self):
128
204
"""Read a new range definition in a multi parts message.
130
206
Parse the headers including the empty line following them so that we
131
207
are ready to read the data itself.
133
self._headers = httplib.HTTPMessage(self._file, seekable=0)
209
self._headers = http_client.HTTPMessage(self._file, seekable=0)
134
210
# Extract the range definition
135
211
content_range = self._headers.getheader('content-range', None)
136
212
if content_range is None:
221
297
% (size, self._start, self._size))
223
299
# read data from file
226
302
if self._size > 0:
227
303
# Don't read past the range definition
228
304
limited = self._start + self._size - self._pos
230
306
limited = min(limited, size)
231
osutils.pumpfile(self._file, buffer, limited, self._max_read_size)
232
data = buffer.getvalue()
307
osutils.pumpfile(self._file, buf, limited, self._max_read_size)
308
data = buf.getvalue()
234
310
# Update _pos respecting the data effectively read
235
311
self._pos += len(data)
291
367
:return: A file-like object that can seek()+read() the
292
368
ranges indicated by the headers.
294
rfile = RangeFile(url, data)
297
size = msg.getheader('content-length', None)
302
rfile.set_range(0, size)
372
rfile = ResponseFile(url, data)
303
373
elif code == 206:
374
rfile = RangeFile(url, data)
304
375
content_type = msg.getheader('content-type', None)
305
376
if content_type is None:
306
377
# When there is no content-type header we treat the response as