/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5609.52.1 by Martin Pool
Cope with buggy squids interrupting the response before a mime multipart boundary
1
# Copyright (C) 2006-2011 Canonical Ltd
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
16
17
"""Handlers for HTTP Responses.
18
19
The purpose of these classes is to provide a uniform interface for clients
20
to standard HTTP responses, single range responses and multipart range
21
responses.
22
"""
23
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
24
from __future__ import absolute_import
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
25
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
26
import os
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
27
import httplib
3535.1.2 by Adrian Wilkins
Fix ability to use IIS as a dumb HTTP server.
28
import rfc822
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
29
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
30
from ... import (
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
31
    errors,
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
32
    osutils,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
33
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
34
from ...sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
35
    BytesIO,
36
    )
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
37
38
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
39
class ResponseFile(object):
40
    """A wrapper around the http socket containing the result of a GET request.
41
42
    Only read() and seek() (forward) are supported.
6575.1.2 by Vincent Ladeuil
TDD backwards, works here ;)
43
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
44
    """
45
    def __init__(self, path, infile):
46
        """Constructor.
47
48
        :param path: File url, for error reports.
49
50
        :param infile: File-like socket set at body start.
51
        """
52
        self._path = path
53
        self._file = infile
54
        self._pos = 0
55
56
    def close(self):
57
        """Close this file.
58
59
        Dummy implementation for consistency with the 'file' API.
60
        """
61
62
    def read(self, size=-1):
63
        """Read size bytes from the current position in the file.
64
65
        :param size:  The number of bytes to read.  Leave unspecified or pass
66
            -1 to read to EOF.
67
        """
68
        data =  self._file.read(size)
69
        self._pos += len(data)
70
        return data
71
6519.1.2 by Jelmer Vernooij
Implement ResponseFile.readline and ResponseFile.tell.
72
    def readline(self):
73
        data = self._file.readline()
74
        self._pos += len(data)
75
        return data
76
6575.1.1 by Jelmer Vernooij
Implement basic ResponseFile.__iter__
77
    def __iter__(self):
78
        while True:
79
            line = self.readline()
80
            if not line:
81
                return
82
            yield line
83
6519.1.2 by Jelmer Vernooij
Implement ResponseFile.readline and ResponseFile.tell.
84
    def tell(self):
85
        return self._pos
86
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
87
    def seek(self, offset, whence=os.SEEK_SET):
88
        if whence == os.SEEK_SET:
89
            if offset < self._pos:
6519.1.1 by Jelmer Vernooij
Fix typos in assertionerror.
90
                raise AssertionError(
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
91
                    "Can't seek backwards, pos: %s, offset: %s"
6519.1.1 by Jelmer Vernooij
Fix typos in assertionerror.
92
                    % (self._pos, offset))
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
93
            to_discard = offset - self._pos
94
        elif whence == os.SEEK_CUR:
95
            to_discard = offset
96
        else:
97
            raise AssertionError("Can't seek backwards")
98
        if to_discard:
99
            # Just discard the unwanted bytes
100
            self.read(to_discard)
101
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
102
# A RangeFile expects the following grammar (simplified to outline the
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
103
# assumptions we rely upon).
104
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
105
# file: single_range
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
106
#     | multiple_range
107
108
# single_range: content_range_header data
109
110
# multiple_range: boundary_header boundary (content_range_header data boundary)+
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
111
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
112
class RangeFile(ResponseFile):
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
113
    """File-like object that allow access to partial available data.
114
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
115
    All accesses should happen sequentially since the acquisition occurs during
116
    an http response reception (as sockets can't be seeked, we simulate the
117
    seek by just reading and discarding the data).
118
119
    The access pattern is defined by a set of ranges discovered as reading
120
    progress. Only one range is available at a given time, so all accesses
121
    should happen with monotonically increasing offsets.
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
122
    """
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
123
3146.3.4 by Vincent Ladeuil
Review feedback, simpler loops.
124
    # in _checked_read() below, we may have to discard several MB in the worst
125
    # case. To avoid buffering that much, we read and discard by chunks
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
126
    # instead. The underlying file is either a socket or a BytesIO, so reading
3146.3.4 by Vincent Ladeuil
Review feedback, simpler loops.
127
    # 8k chunks should be fine.
128
    _discarded_buf_size = 8192
129
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
130
    # maximum size of read requests -- used to avoid MemoryError issues in recv
131
    _max_read_size = 512 * 1024
132
3945.1.8 by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking.
133
    def __init__(self, path, infile):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
134
        """Constructor.
135
136
        :param path: File url, for error reports.
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
137
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
138
        :param infile: File-like socket set at body start.
139
        """
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
140
        super(RangeFile, self).__init__(path, infile)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
141
        self._boundary = None
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
142
        # When using multi parts response, this will be set with the headers
143
        # associated with the range currently read.
144
        self._headers = None
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
145
        # Default to the whole file of unspecified size
146
        self.set_range(0, -1)
147
148
    def set_range(self, start, size):
149
        """Change the range mapping"""
150
        self._start = start
151
        self._size = size
152
        # Set the new _pos since that's what we want to expose
153
        self._pos = self._start
154
155
    def set_boundary(self, boundary):
156
        """Define the boundary used in a multi parts message.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
157
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
158
        The file should be at the beginning of the body, the first range
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
159
        definition is read and taken into account.
160
        """
161
        self._boundary = boundary
162
        # Decode the headers and setup the first range
163
        self.read_boundary()
164
        self.read_range_definition()
165
166
    def read_boundary(self):
167
        """Read the boundary headers defining a new range"""
168
        boundary_line = '\r\n'
169
        while boundary_line == '\r\n':
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
170
            # RFC2616 19.2 Additional CRLFs may precede the first boundary
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
171
            # string entity.
172
            # To be on the safe side we allow it before any boundary line
173
            boundary_line = self._file.readline()
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
174
5609.52.1 by Martin Pool
Cope with buggy squids interrupting the response before a mime multipart boundary
175
        if boundary_line == '':
176
            # A timeout in the proxy server caused the response to end early.
177
            # See launchpad bug 198646.
178
            raise errors.HttpBoundaryMissing(
179
                self._path,
180
                self._boundary)
181
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
182
        if boundary_line != '--' + self._boundary + '\r\n':
3535.1.3 by adwi2
Fix ability to use IIS as a dumb HTTP server by unquoting the boundary
183
            # rfc822.unquote() incorrectly unquotes strings enclosed in <>
184
            # IIS 6 and 7 incorrectly wrap boundary strings in <>
185
            # together they make a beautiful bug, which we will be gracious
186
            # about here
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
187
            if (self._unquote_boundary(boundary_line) !=
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
188
                '--' + self._boundary + '\r\n'):
3535.1.3 by adwi2
Fix ability to use IIS as a dumb HTTP server by unquoting the boundary
189
                raise errors.InvalidHttpResponse(
190
                    self._path,
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
191
                    "Expected a boundary (%s) line, got '%s'"
192
                    % (self._boundary, boundary_line))
193
3535.1.2 by Adrian Wilkins
Fix ability to use IIS as a dumb HTTP server.
194
    def _unquote_boundary(self, b):
195
        return b[:2] + rfc822.unquote(b[2:-2]) + b[-2:]
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
196
197
    def read_range_definition(self):
198
        """Read a new range definition in a multi parts message.
199
200
        Parse the headers including the empty line following them so that we
201
        are ready to read the data itself.
202
        """
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
203
        self._headers = httplib.HTTPMessage(self._file, seekable=0)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
204
        # Extract the range definition
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
205
        content_range = self._headers.getheader('content-range', None)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
206
        if content_range is None:
207
            raise errors.InvalidHttpResponse(
208
                self._path,
209
                'Content-Range header missing in a multi-part response')
210
        self.set_range_from_header(content_range)
211
212
    def set_range_from_header(self, content_range):
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
213
        """Helper to set the new range from its description in the headers"""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
214
        try:
215
            rtype, values = content_range.split()
3059.2.10 by Vincent Ladeuil
Jam's review feedback.
216
        except ValueError:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
217
            raise errors.InvalidHttpRange(self._path, content_range,
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
218
                                          'Malformed header')
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
219
        if rtype != 'bytes':
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
220
            raise errors.InvalidHttpRange(self._path, content_range,
221
                                          "Unsupported range type '%s'" % rtype)
222
        try:
223
            # We don't need total, but note that it may be either the file size
224
            # or '*' if the server can't or doesn't want to return the file
225
            # size.
226
            start_end, total = values.split('/')
227
            start, end = start_end.split('-')
228
            start = int(start)
229
            end = int(end)
3059.2.10 by Vincent Ladeuil
Jam's review feedback.
230
        except ValueError:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
231
            raise errors.InvalidHttpRange(self._path, content_range,
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
232
                                          'Invalid range values')
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
233
        size = end - start + 1
234
        if size <= 0:
235
            raise errors.InvalidHttpRange(self._path, content_range,
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
236
                                          'Invalid range, size <= 0')
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
237
        self.set_range(start, size)
238
239
    def _checked_read(self, size):
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
240
        """Read the file checking for short reads.
241
242
        The data read is discarded along the way.
243
        """
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
244
        pos = self._pos
3146.3.4 by Vincent Ladeuil
Review feedback, simpler loops.
245
        remaining = size
246
        while remaining > 0:
247
            data = self._file.read(min(remaining, self._discarded_buf_size))
248
            remaining -= len(data)
249
            if not data:
250
                raise errors.ShortReadvError(self._path, pos, size,
251
                                             size - remaining)
252
        self._pos += size
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
253
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
254
    def _seek_to_next_range(self):
255
        # We will cross range boundaries
256
        if self._boundary is None:
257
            # If we don't have a boundary, we can't find another range
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
258
            raise errors.InvalidRange(self._path, self._pos,
259
                                      "Range (%s, %s) exhausted"
260
                                      % (self._start, self._size))
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
261
        self.read_boundary()
262
        self.read_range_definition()
263
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
264
    def read(self, size=-1):
3408.6.3 by Andrew Bennetts
Docstring/NEWS tweaks requested by Ian's review.
265
        """Read size bytes from the current position in the file.
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
266
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
267
        Reading across ranges is not supported. We rely on the underlying http
268
        client to clean the socket if we leave bytes unread. This may occur for
269
        the final boundary line of a multipart response or for any range
270
        request not entirely consumed by the client (due to offset coalescing)
3408.6.3 by Andrew Bennetts
Docstring/NEWS tweaks requested by Ian's review.
271
272
        :param size:  The number of bytes to read.  Leave unspecified or pass
273
            -1 to read to EOF.
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
274
        """
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
275
        if (self._size > 0
276
            and self._pos == self._start + self._size):
277
            if size == 0:
278
                return ''
279
            else:
280
                self._seek_to_next_range()
281
        elif self._pos < self._start:
282
            raise errors.InvalidRange(
283
                self._path, self._pos,
284
                "Can't read %s bytes before range (%s, %s)"
285
                % (size, self._start, self._size))
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
286
        if self._size > 0:
287
            if size > 0 and self._pos + size > self._start + self._size:
288
                raise errors.InvalidRange(
289
                    self._path, self._pos,
290
                    "Can't read %s bytes across range (%s, %s)"
291
                    % (size, self._start, self._size))
292
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
293
        # read data from file
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
294
        buf = BytesIO()
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
295
        limited = size
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
296
        if self._size > 0:
297
            # Don't read past the range definition
298
            limited = self._start + self._size - self._pos
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
299
            if size >= 0:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
300
                limited = min(limited, size)
6586.1.1 by Vincent Ladeuil
Fix various typos in docstrings. Rename 'buffer' to 'buf' since it's now a python builtin function.
301
        osutils.pumpfile(self._file, buf, limited, self._max_read_size)
302
        data = buf.getvalue()
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
303
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
304
        # Update _pos respecting the data effectively read
305
        self._pos += len(data)
306
        return data
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
307
308
    def seek(self, offset, whence=0):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
309
        start_pos = self._pos
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
310
        if whence == 0:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
311
            final_pos = offset
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
312
        elif whence == 1:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
313
            final_pos = start_pos + offset
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
314
        elif whence == 2:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
315
            if self._size > 0:
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
316
                final_pos = self._start + self._size + offset # offset < 0
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
317
            else:
318
                raise errors.InvalidRange(
319
                    self._path, self._pos,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
320
                    "RangeFile: can't seek from end while size is unknown")
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
321
        else:
322
            raise ValueError("Invalid value %s for whence." % whence)
323
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
324
        if final_pos < self._pos:
325
            # Can't seek backwards
326
            raise errors.InvalidRange(
327
                self._path, self._pos,
328
                'RangeFile: trying to seek backwards to %s' % final_pos)
329
330
        if self._size > 0:
331
            cur_limit = self._start + self._size
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
332
            while final_pos > cur_limit:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
333
                # We will cross range boundaries
334
                remain = cur_limit - self._pos
335
                if remain > 0:
336
                    # Finish reading the current range
337
                    self._checked_read(remain)
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
338
                self._seek_to_next_range()
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
339
                cur_limit = self._start + self._size
340
341
        size = final_pos - self._pos
342
        if size > 0: # size can be < 0 if we crossed a range boundary
343
            # We don't need the data, just read it and throw it away
344
            self._checked_read(size)
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
345
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
346
    def tell(self):
347
        return self._pos
348
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
349
3945.1.8 by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking.
350
def handle_response(url, code, msg, data):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
351
    """Interpret the code & headers and wrap the provided data in a RangeFile.
352
353
    This is a factory method which returns an appropriate RangeFile based on
354
    the code & headers it's given.
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
355
356
    :param url: The url being processed. Mostly for error reporting
357
    :param code: The integer HTTP response code
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
358
    :param msg: An HTTPMessage containing the headers for the response
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
359
    :param data: A file-like object that can be read() to get the
360
                 requested data
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
361
    :return: A file-like object that can seek()+read() the
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
362
             ranges indicated by the headers.
363
    """
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
364
    if code == 200:
365
        # A whole file
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
366
        rfile = ResponseFile(url, data)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
367
    elif code == 206:
6450.2.1 by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport
368
        rfile = RangeFile(url, data)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
369
        content_type = msg.getheader('content-type', None)
370
        if content_type is None:
371
            # When there is no content-type header we treat the response as
372
            # being of type 'application/octet-stream' as per RFC2616 section
373
            # 7.2.1.
2073.1.1 by John Arbash Meinel
Robert's comments: Refer to RFC2616 to explain how we handle missing Content-Type
374
            # Therefore it is obviously not multipart
375
            content_type = 'application/octet-stream'
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
376
            is_multipart = False
377
        else:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
378
            is_multipart = (msg.getmaintype() == 'multipart'
379
                            and msg.getsubtype() == 'byteranges')
1786.1.26 by John Arbash Meinel
Update and test handle_response.
380
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
381
        if is_multipart:
1786.1.26 by John Arbash Meinel
Update and test handle_response.
382
            # Full fledged multipart response
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
383
            rfile.set_boundary(msg.getparam('boundary'))
1786.1.26 by John Arbash Meinel
Update and test handle_response.
384
        else:
385
            # A response to a range request, but not multipart
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
386
            content_range = msg.getheader('content-range', None)
387
            if content_range is None:
1786.1.26 by John Arbash Meinel
Update and test handle_response.
388
                raise errors.InvalidHttpResponse(url,
389
                    'Missing the Content-Range header in a 206 range response')
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
390
            rfile.set_range_from_header(content_range)
1786.1.40 by John Arbash Meinel
code cleanups from Martin Pool.
391
    else:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
392
        raise errors.InvalidHttpResponse(url,
393
                                         'Unknown response code %s' % code)
394
395
    return rfile
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
396