/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
17
"""Tests from HTTP response parsing.
18
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
19
The handle_response method read the response body of a GET request an returns
20
the corresponding RangeFile.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
21
22
There are four different kinds of RangeFile:
23
- a whole file whose size is unknown, seen as a simple byte stream,
24
- a whole file whose size is known, we can't read past its end,
25
- a single range file, a part of a file with a start and a size,
26
- a multiple range file, several consecutive parts with known start offset
27
  and size.
28
29
Some properties are common to all kinds:
30
- seek can only be forward (its really a socket underneath),
31
- read can't cross ranges,
32
- successive ranges are taken into account transparently,
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
33
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
34
- the expected pattern of use is either seek(offset)+read(size) or a single
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
35
  read with no size specified. For multiple range files, multiple read() will
36
  return the corresponding ranges, trying to read further will raise
37
  InvalidHttpResponse.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
38
"""
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
39
40
from cStringIO import StringIO
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
41
import httplib
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
42
import rfc822
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
43
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
44
from bzrlib import (
45
    errors,
46
    tests,
47
    )
3104.3.4 by Vincent Ladeuil
Add test.
48
from bzrlib.transport.http import (
49
    response,
50
    _urllib2_wrappers,
51
    )
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
52
from bzrlib.tests.file_utils import (
53
    FakeReadFile,
54
    )
3104.3.4 by Vincent Ladeuil
Add test.
55
56
57
class ReadSocket(object):
58
    """A socket-like object that can be given a predefined content."""
59
60
    def __init__(self, data):
61
        self.readfile = StringIO(data)
62
63
    def makefile(self, mode='r', bufsize=None):
64
        return self.readfile
65
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
66
3104.3.4 by Vincent Ladeuil
Add test.
67
class FakeHTTPConnection(_urllib2_wrappers.HTTPConnection):
68
69
    def __init__(self, sock):
70
        _urllib2_wrappers.HTTPConnection.__init__(self, 'localhost')
71
        # Set the socket to bypass the connection
72
        self.sock = sock
73
74
    def send(self, str):
75
        """Ignores the writes on the socket."""
76
        pass
77
78
79
class TestHTTPConnection(tests.TestCase):
80
81
    def test_cleanup_pipe(self):
82
        sock = ReadSocket("""HTTP/1.1 200 OK\r
83
Content-Type: text/plain; charset=UTF-8\r
84
Content-Length: 18
85
\r
86
0123456789
87
garbage""")
88
        conn = FakeHTTPConnection(sock)
89
        # Simulate the request sending so that the connection will be able to
90
        # read the response.
91
        conn.putrequest('GET', 'http://localhost/fictious')
92
        conn.endheaders()
93
        # Now, get the response
94
        resp = conn.getresponse()
95
        # Read part of the response
96
        self.assertEquals('0123456789\n', resp.read(11))
97
        # Override the thresold to force the warning emission
98
        conn._range_warning_thresold = 6 # There are 7 bytes pending
99
        conn.cleanup_pipe()
100
        self.assertContainsRe(self._get_log(keep_log_file=True),
101
                              'Got a 200 response when asking')
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
102
103
104
class TestRangeFileMixin(object):
105
    """Tests for accessing the first range in a RangeFile."""
106
107
    # A simple string used to represent a file part (also called a range), in
108
    # which offsets are easy to calculate for test writers. It's used as a
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
109
    # building block with slight variations but basically 'a' is the first char
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
110
    # of the range and 'z' is the last.
111
    alpha = 'abcdefghijklmnopqrstuvwxyz'
112
113
    def test_can_read_at_first_access(self):
114
        """Test that the just created file can be read."""
115
        self.assertEquals(self.alpha, self._file.read())
116
117
    def test_seek_read(self):
118
        """Test seek/read inside the range."""
119
        f = self._file
120
        start = self.first_range_start
121
        # Before any use, tell() should be at the range start
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
122
        self.assertEquals(start, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
123
        cur = start # For an overall offset assertion
124
        f.seek(start + 3)
125
        cur += 3
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
126
        self.assertEquals('def', f.read(3))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
127
        cur += len('def')
128
        f.seek(4, 1)
129
        cur += 4
130
        self.assertEquals('klmn', f.read(4))
131
        cur += len('klmn')
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
132
        # read(0) in the middle of a range
133
        self.assertEquals('', f.read(0))
134
        # seek in place
135
        here = f.tell()
136
        f.seek(0, 1)
137
        self.assertEquals(here, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
138
        self.assertEquals(cur, f.tell())
139
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
140
    def test_read_zero(self):
141
        f = self._file
142
        start = self.first_range_start
143
        self.assertEquals('', f.read(0))
144
        f.seek(10, 1)
145
        self.assertEquals('', f.read(0))
146
147
    def test_seek_at_range_end(self):
148
        f = self._file
149
        f.seek(26, 1)
150
151
    def test_read_at_range_end(self):
152
        """Test read behaviour at range end."""
153
        f = self._file
154
        self.assertEquals(self.alpha, f.read())
155
        self.assertEquals('', f.read(0))
156
        self.assertRaises(errors.InvalidRange, f.read, 1)
157
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
158
    def test_unbounded_read_after_seek(self):
159
        f = self._file
160
        f.seek(24, 1)
161
        # Should not cross ranges
162
        self.assertEquals('yz', f.read())
163
164
    def test_seek_backwards(self):
165
        f = self._file
166
        start = self.first_range_start
167
        f.seek(start)
168
        f.read(12)
169
        self.assertRaises(errors.InvalidRange, f.seek, start + 5)
170
171
    def test_seek_outside_single_range(self):
172
        f = self._file
173
        if f._size == -1 or f._boundary is not None:
174
            raise tests.TestNotApplicable('Needs a fully defined range')
175
        # Will seek past the range and then errors out
176
        self.assertRaises(errors.InvalidRange,
177
                          f.seek, self.first_range_start + 27)
178
179
    def test_read_past_end_of_range(self):
180
        f = self._file
181
        if f._size == -1:
182
            raise tests.TestNotApplicable("Can't check an unknown size")
183
        start = self.first_range_start
184
        f.seek(start + 20)
185
        self.assertRaises(errors.InvalidRange, f.read, 10)
186
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
187
    def test_seek_from_end(self):
188
       """Test seeking from the end of the file.
189
190
       The semantic is unclear in case of multiple ranges. Seeking from end
191
       exists only for the http transports, cannot be used if the file size is
192
       unknown and is not used in bzrlib itself. This test must be (and is)
193
       overridden by daughter classes.
194
195
       Reading from end makes sense only when a range has been requested from
196
       the end of the file (see HttpTransportBase._get() when using the
197
       'tail_amount' parameter). The HTTP response can only be a whole file or
198
       a single range.
199
       """
200
       f = self._file
201
       f.seek(-2, 2)
202
       self.assertEquals('yz', f.read())
203
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
204
205
class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
206
    """Test a RangeFile for a whole file whose size is not known."""
207
208
    def setUp(self):
209
        super(TestRangeFileSizeUnknown, self).setUp()
210
        self._file = response.RangeFile('Whole_file_size_known',
211
                                        StringIO(self.alpha))
212
        # We define no range, relying on RangeFile to provide default values
213
        self.first_range_start = 0 # It's the whole file
214
215
    def test_seek_from_end(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
216
        """See TestRangeFileMixin.test_seek_from_end.
217
218
        The end of the file can't be determined since the size is unknown.
219
        """
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
220
        self.assertRaises(errors.InvalidRange, self._file.seek, -1, 2)
221
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
222
    def test_read_at_range_end(self):
223
        """Test read behaviour at range end."""
224
        f = self._file
225
        self.assertEquals(self.alpha, f.read())
226
        self.assertEquals('', f.read(0))
227
        self.assertEquals('', f.read(1))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
228
229
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
230
    """Test a RangeFile for a whole file whose size is known."""
231
232
    def setUp(self):
233
        super(TestRangeFileSizeKnown, self).setUp()
234
        self._file = response.RangeFile('Whole_file_size_known',
235
                                        StringIO(self.alpha))
236
        self._file.set_range(0, len(self.alpha))
237
        self.first_range_start = 0 # It's the whole file
238
239
240
class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
241
    """Test a RangeFile for a single range."""
242
243
    def setUp(self):
244
        super(TestRangeFileSingleRange, self).setUp()
245
        self._file = response.RangeFile('Single_range_file',
246
                                        StringIO(self.alpha))
247
        self.first_range_start = 15
248
        self._file.set_range(self.first_range_start, len(self.alpha))
249
250
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
251
    def test_read_before_range(self):
252
        # This can't occur under normal circumstances, we have to force it
253
        f = self._file
254
        f._pos = 0 # Force an invalid pos
255
        self.assertRaises(errors.InvalidRange, f.read, 2)
256
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
257
class TestRangeFileMultipleRanges(tests.TestCase, TestRangeFileMixin):
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
258
    """Test a RangeFile for multiple ranges.
259
260
    The RangeFile used for the tests contains three ranges:
261
262
    - at offset 25: alpha
263
    - at offset 100: alpha
264
    - at offset 126: alpha.upper()
265
266
    The two last ranges are contiguous. This only rarely occurs (should not in
267
    fact) in real uses but may lead to hard to track bugs.
268
    """
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
269
    def _boundary(self):
270
        return "separation"
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
271
272
    def setUp(self):
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
273
        super(TestRangeFileMultipleRanges, self).setUp()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
274
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
275
        boundary = self._boundary()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
276
277
        content = ''
278
        self.first_range_start = 25
279
        file_size = 200 # big enough to encompass all ranges
280
        for (start, part) in [(self.first_range_start, self.alpha),
281
                              # Two contiguous ranges
282
                              (100, self.alpha),
283
                              (126, self.alpha.upper())]:
284
            content += self._multipart_byterange(part, start, boundary,
285
                                                 file_size)
286
        # Final boundary
287
        content += self._boundary_line(boundary)
288
289
        self._file = response.RangeFile('Multiple_ranges_file',
290
                                        StringIO(content))
291
        # Ranges are set by decoding the range headers, the RangeFile user is
292
        # supposed to call the following before using seek or read since it
293
        # requires knowing the *response* headers (in that case the boundary
294
        # which is part of the Content-Type header).
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
295
        #
296
        # Note that all parameters in real HTTPReponse instances are 
297
        # passed through rfc822.unquote, so we should too
298
        self._file.set_boundary(rfc822.unquote(boundary))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
299
300
    def _boundary_line(self, boundary):
301
        """Helper to build the formatted boundary line."""
302
        return '--' + boundary + '\r\n'
303
304
    def _multipart_byterange(self, data, offset, boundary, file_size='*'):
305
        """Encode a part of a file as a multipart/byterange MIME type.
306
307
        When a range request is issued, the HTTP response body can be
308
        decomposed in parts, each one representing a range (start, size) in a
309
        file.
310
311
        :param data: The payload.
312
        :param offset: where data starts in the file
313
        :param boundary: used to separate the parts
314
        :param file_size: the size of the file containing the range (default to
315
            '*' meaning unknown)
316
317
        :return: a string containing the data encoded as it will appear in the
318
            HTTP response body.
319
        """
320
        bline = self._boundary_line(boundary)
321
        # Each range begins with a boundary line
322
        range = bline
323
        # A range is described by a set of headers, but only 'Content-Range' is
324
        # required for our implementation (TestHandleResponse below will
325
        # exercise ranges with multiple or missing headers')
326
        range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
327
                                                        offset+len(data)-1,
328
                                                        file_size)
329
        range += '\r\n'
330
        # Finally the raw bytes
331
        range += data
332
        return range
333
334
    def test_read_all_ranges(self):
335
        f = self._file
336
        self.assertEquals(self.alpha, f.read()) # Read first range
337
        f.seek(100) # Trigger the second range recognition
338
        self.assertEquals(self.alpha, f.read()) # Read second range
339
        self.assertEquals(126, f.tell())
340
        f.seek(126) # Start of third range which is also the current pos !
341
        self.assertEquals('A', f.read(1))
342
        f.seek(10, 1)
343
        self.assertEquals('LMN', f.read(3))
344
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
345
    def test_seek_from_end(self):
346
        """See TestRangeFileMixin.test_seek_from_end."""
347
        # The actual implementation will seek from end for the first range only
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
348
        # and then fail. Since seeking from end is intended to be used for a
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
349
        # single range only anyway, this test just document the actual
350
        # behaviour.
351
        f = self._file
352
        f.seek(-2, 2)
353
        self.assertEquals('yz', f.read())
354
        self.assertRaises(errors.InvalidRange, f.seek, -2, 2)
355
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
356
    def test_seek_into_void(self):
357
        f = self._file
358
        start = self.first_range_start
359
        f.seek(start)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
360
        # Seeking to a point between two ranges is possible (only once) but
361
        # reading there is forbidden
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
362
        f.seek(start + 40)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
363
        # We crossed a range boundary, so now the file is positioned at the
364
        # start of the new range (i.e. trying to seek below 100 will error out)
365
        f.seek(100)
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
366
        f.seek(125)
367
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
368
    def test_seek_across_ranges(self):
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
369
        f = self._file
370
        start = self.first_range_start
371
        f.seek(126) # skip the two first ranges
372
        self.assertEquals('AB', f.read(2))
373
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
374
    def test_checked_read_dont_overflow_buffers(self):
375
        f = self._file
376
        start = self.first_range_start
377
        # We force a very low value to exercise all code paths in _checked_read
378
        f._discarded_buf_size = 8
379
        f.seek(126) # skip the two first ranges
380
        self.assertEquals('AB', f.read(2))
381
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
382
    def test_seek_twice_between_ranges(self):
383
        f = self._file
384
        start = self.first_range_start
385
        f.seek(start + 40) # Past the first range but before the second
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
386
        # Now the file is positioned at the second range start (100)
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
387
        self.assertRaises(errors.InvalidRange, f.seek, start + 41)
388
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
389
    def test_seek_at_range_end(self):
390
        """Test seek behavior at range end."""
391
        f = self._file
392
        f.seek(25 + 25)
393
        f.seek(100 + 25)
394
        f.seek(126 + 25)
395
396
    def test_read_at_range_end(self):
397
        f = self._file
398
        self.assertEquals(self.alpha, f.read())
399
        self.assertEquals(self.alpha, f.read())
400
        self.assertEquals(self.alpha.upper(), f.read())
401
        self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
402
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
403
class TestRangeFileMultipleRangesQuotedBoundaries(TestRangeFileMultipleRanges):
404
    """Perform the same tests as TestRangeFileMultipleRanges, but uses 
405
    an angle-bracket quoted boundary string like IIS 6.0 and 7.0
406
    
407
    This reveals a bug caused by 
408
    
409
    - The bad implementation of RFC 822 unquoting in Python (angles are not quotes),
410
    coupled with 
411
    - The bad implementation of RFC 2046 in IIS (angles are not permitted chars in boundary lines).
412
    
413
    
414
    """
415
    def _boundary(self):
416
        return "<q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl>" # IIS 6 and 7 use this value
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
417
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
418
class TestRangeFileVarious(tests.TestCase):
419
    """Tests RangeFile aspects not covered elsewhere."""
420
421
    def test_seek_whence(self):
422
        """Test the seek whence parameter values."""
423
        f = response.RangeFile('foo', StringIO('abc'))
424
        f.set_range(0, 3)
425
        f.seek(0)
426
        f.seek(1, 1)
427
        f.seek(-1, 2)
428
        self.assertRaises(ValueError, f.seek, 0, 14)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
429
430
    def test_range_syntax(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
431
        """Test the Content-Range scanning."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
432
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
433
        f = response.RangeFile('foo', StringIO())
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
434
435
        def ok(expected, header_value):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
436
            f.set_range_from_header(header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
437
            # Slightly peek under the covers to get the size
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
438
            self.assertEquals(expected, (f.tell(), f._size))
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
439
440
        ok((1, 10), 'bytes 1-10/11')
441
        ok((1, 10), 'bytes 1-10/*')
442
        ok((12, 2), '\tbytes 12-13/*')
443
        ok((28, 1), '  bytes 28-28/*')
444
        ok((2123, 2120), 'bytes  2123-4242/12310')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
445
        ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
446
447
        def nok(header_value):
448
            self.assertRaises(errors.InvalidHttpRange,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
449
                              f.set_range_from_header, header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
450
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
451
        nok('bytes 10-2/3')
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
452
        nok('chars 1-2/3')
453
        nok('bytes xx-yyy/zzz')
454
        nok('bytes xx-12/zzz')
455
        nok('bytes 11-yy/zzz')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
456
        nok('bytes10-2/3')
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
457
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
458
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
459
# Taken from real request responses
1786.1.26 by John Arbash Meinel
Update and test handle_response.
460
_full_text_response = (200, """HTTP/1.1 200 OK\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
461
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
462
Server: Apache/2.0.54 (Fedora)\r
463
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
464
ETag: "56691-23-38e9ae00"\r
465
Accept-Ranges: bytes\r
466
Content-Length: 35\r
467
Connection: close\r
468
Content-Type: text/plain; charset=UTF-8\r
469
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
470
""", """Bazaar-NG meta directory, format 1
471
""")
472
473
1786.1.26 by John Arbash Meinel
Update and test handle_response.
474
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
475
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
476
Server: Apache/2.0.54 (Fedora)\r
477
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
478
ETag: "238a3c-16ec2-805c5540"\r
479
Accept-Ranges: bytes\r
480
Content-Length: 100\r
1786.1.26 by John Arbash Meinel
Update and test handle_response.
481
Content-Range: bytes 100-199/93890\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
482
Connection: close\r
483
Content-Type: text/plain; charset=UTF-8\r
484
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
485
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
1786.1.26 by John Arbash Meinel
Update and test handle_response.
486
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
487
488
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
489
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
490
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
491
Server: Apache/2.0.54 (Fedora)\r
492
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
493
ETag: "238a3c-16ec2-805c5540"\r
494
Accept-Ranges: bytes\r
495
Content-Length: 100\r
496
Content-Range: bytes 100-199/93890\r
497
Connection: close\r
498
\r
499
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
500
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
501
502
1786.1.26 by John Arbash Meinel
Update and test handle_response.
503
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
504
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
505
Server: Apache/2.0.54 (Fedora)\r
506
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
507
ETag: "238a3c-16ec2-805c5540"\r
508
Accept-Ranges: bytes\r
509
Content-Length: 1534\r
510
Connection: close\r
511
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
512
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
513
\r""", """--418470f848b63279b\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
514
Content-type: text/plain; charset=UTF-8\r
515
Content-range: bytes 0-254/93890\r
516
\r
517
mbp@sourcefrog.net-20050309040815-13242001617e4a06
518
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627
519
mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8
520
mbp@sourcefrog.net-20050309041501-c840e09071de3b67
521
mbp@sourcefrog.net-20050309044615-c24a3250be83220a
522
\r
523
--418470f848b63279b\r
524
Content-type: text/plain; charset=UTF-8\r
525
Content-range: bytes 1000-2049/93890\r
526
\r
527
40-fd4ec249b6b139ab
528
mbp@sourcefrog.net-20050311063625-07858525021f270b
529
mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9
530
mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a
531
mbp@sourcefrog.net-20050311232353-f5e33da490872c6a
532
mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0
533
mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb
534
mbp@sourcefrog.net-20050312073831-a47c3335ece1920f
535
mbp@sourcefrog.net-20050312085412-13373aa129ccbad3
536
mbp@sourcefrog.net-20050313052251-2bf004cb96b39933
537
mbp@sourcefrog.net-20050313052856-3edd84094687cb11
538
mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d
539
mbp@sourcefrog.net-20050313053853-7c64085594ff3072
540
mbp@sourcefrog.net-20050313054757-a86c3f5871069e22
541
mbp@sourcefrog.net-20050313061422-418f1f73b94879b9
542
mbp@sourcefrog.net-20050313120651-497bd231b19df600
543
mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a
544
mbp@sourcefrog.net-20050314025438-d52099f915fe65fc
545
mbp@sourcefrog.net-20050314025539-637a636692c055cf
546
mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba
547
mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62
548
mbp@source\r
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
549
--418470f848b63279b--\r
550
""")
551
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
552
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
553
_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
554
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
555
Server: Apache/2.2.2 (Unix) DAV/2\r
556
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
557
Accept-Ranges: bytes\r
558
Content-Type: multipart/byteranges; boundary="squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196"\r
559
Content-Length: 598\r
560
X-Cache: MISS from localhost.localdomain\r
561
X-Cache-Lookup: HIT from localhost.localdomain:3128\r
562
Proxy-Connection: keep-alive\r
563
\r
564
""",
565
"""\r
566
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
567
Content-Type: text/plain\r
568
Content-Range: bytes 0-99/18672\r
569
\r
570
# bzr knit index 8
571
572
scott@netsplit.com-20050708230047-47c7868f276b939f fulltext 0 863  :
573
scott@netsp\r
574
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
575
Content-Type: text/plain\r
576
Content-Range: bytes 300-499/18672\r
577
\r
578
com-20050708231537-2b124b835395399a :
579
scott@netsplit.com-20050820234126-551311dbb7435b51 line-delta 1803 479 .scott@netsplit.com-20050820232911-dc4322a084eadf7e :
580
scott@netsplit.com-20050821213706-c86\r
581
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196--\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
582
""")
583
584
1786.1.26 by John Arbash Meinel
Update and test handle_response.
585
# This is made up
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
586
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
587
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
588
Server: Apache/2.0.54 (Fedora)\r
589
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
590
ETag: "56691-23-38e9ae00"\r
591
Accept-Ranges: bytes\r
592
Content-Length: 35\r
593
Connection: close\r
594
\r
595
""", """Bazaar-NG meta directory, format 1
596
""")
597
598
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
599
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
600
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
601
Server: Apache/2.0.54 (Fedora)\r
602
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
603
ETag: "56691-23-38e9ae00"\r
604
Accept-Ranges: bytes\r
605
Connection: close\r
606
Content-Type: text/plain; charset=UTF-8\r
607
\r
608
""", """Bazaar-NG meta directory, format 1
609
""")
610
611
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
612
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
613
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
614
Server: Apache/2.0.54 (Fedora)\r
615
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
616
ETag: "238a3c-16ec2-805c5540"\r
617
Accept-Ranges: bytes\r
618
Content-Length: 100\r
619
Connection: close\r
620
\r
621
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
622
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
623
624
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
625
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
626
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
627
Server: Apache/2.0.54 (Fedora)\r
628
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
629
ETag: "238a3c-16ec2-805c5540"\r
630
Accept-Ranges: bytes\r
631
Content-Length: 100\r
632
Content-Range: bytes 100-199/93890\r
633
Connection: close\r
634
Content-Type: text/plain; charset=UTF-8\r
635
\r
636
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
637
638
1786.1.26 by John Arbash Meinel
Update and test handle_response.
639
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
640
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
641
Connection: close\r
642
Content-Type: text/html; charset=iso-8859-1\r
643
\r
644
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
645
<html><head>
646
<title>404 Not Found</title>
647
</head><body>
648
<h1>Not Found</h1>
649
<p>I don't know what I'm doing</p>
650
<hr>
651
</body></html>
652
""")
653
654
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
655
_multipart_no_content_range = (206, """HTTP/1.0 206 Partial Content\r
656
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
657
Content-Length: 598\r
658
\r
659
""",
660
"""\r
661
--THIS_SEPARATES\r
662
Content-Type: text/plain\r
663
\r
664
# bzr knit index 8
665
--THIS_SEPARATES\r
666
""")
667
668
669
_multipart_no_boundary = (206, """HTTP/1.0 206 Partial Content\r
670
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
671
Content-Length: 598\r
672
\r
673
""",
674
"""\r
675
--THIS_SEPARATES\r
676
Content-Type: text/plain\r
677
Content-Range: bytes 0-18/18672\r
678
\r
679
# bzr knit index 8
680
681
The range ended at the line above, this text is garbage instead of a boundary
682
line
683
""")
684
685
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
686
class TestHandleResponse(tests.TestCase):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
687
688
    def _build_HTTPMessage(self, raw_headers):
689
        status_and_headers = StringIO(raw_headers)
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
690
        # Get rid of the status line
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
691
        status_and_headers.readline()
692
        msg = httplib.HTTPMessage(status_and_headers)
693
        return msg
694
1786.1.26 by John Arbash Meinel
Update and test handle_response.
695
    def get_response(self, a_response):
696
        """Process a supplied response, and return the result."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
697
        code, raw_headers, body = a_response
698
        msg = self._build_HTTPMessage(raw_headers)
699
        return response.handle_response('http://foo', code, msg,
1786.1.26 by John Arbash Meinel
Update and test handle_response.
700
                                        StringIO(a_response[2]))
701
702
    def test_full_text(self):
703
        out = self.get_response(_full_text_response)
704
        # It is a StringIO from the original data
705
        self.assertEqual(_full_text_response[2], out.read())
706
707
    def test_single_range(self):
708
        out = self.get_response(_single_range_response)
709
710
        out.seek(100)
711
        self.assertEqual(_single_range_response[2], out.read(100))
712
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
713
    def test_single_range_no_content(self):
714
        out = self.get_response(_single_range_no_content_type)
715
716
        out.seek(100)
717
        self.assertEqual(_single_range_no_content_type[2], out.read(100))
718
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
719
    def test_single_range_truncated(self):
720
        out = self.get_response(_single_range_response_truncated)
721
        # Content-Range declares 100 but only 51 present
722
        self.assertRaises(errors.ShortReadvError, out.seek, out.tell() + 51)
723
1786.1.26 by John Arbash Meinel
Update and test handle_response.
724
    def test_multi_range(self):
725
        out = self.get_response(_multipart_range_response)
726
727
        # Just make sure we can read the right contents
728
        out.seek(0)
729
        out.read(255)
730
731
        out.seek(1000)
732
        out.read(1050)
733
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
734
    def test_multi_squid_range(self):
735
        out = self.get_response(_multipart_squid_range_response)
736
737
        # Just make sure we can read the right contents
738
        out.seek(0)
739
        out.read(100)
740
741
        out.seek(300)
742
        out.read(200)
743
1786.1.26 by John Arbash Meinel
Update and test handle_response.
744
    def test_invalid_response(self):
745
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
746
                          self.get_response, _invalid_response)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
747
748
    def test_full_text_no_content_type(self):
749
        # We should not require Content-Type for a full response
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
750
        code, raw_headers, body = _full_text_response_no_content_type
751
        msg = self._build_HTTPMessage(raw_headers)
752
        out = response.handle_response('http://foo', code, msg, StringIO(body))
753
        self.assertEqual(body, out.read())
1786.1.26 by John Arbash Meinel
Update and test handle_response.
754
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
755
    def test_full_text_no_content_length(self):
756
        code, raw_headers, body = _full_text_response_no_content_length
757
        msg = self._build_HTTPMessage(raw_headers)
758
        out = response.handle_response('http://foo', code, msg, StringIO(body))
759
        self.assertEqual(body, out.read())
760
1786.1.26 by John Arbash Meinel
Update and test handle_response.
761
    def test_missing_content_range(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
762
        code, raw_headers, body = _single_range_no_content_range
763
        msg = self._build_HTTPMessage(raw_headers)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
764
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
765
                          response.handle_response,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
766
                          'http://bogus', code, msg, StringIO(body))
767
768
    def test_multipart_no_content_range(self):
769
        code, raw_headers, body = _multipart_no_content_range
770
        msg = self._build_HTTPMessage(raw_headers)
771
        self.assertRaises(errors.InvalidHttpResponse,
772
                          response.handle_response,
773
                          'http://bogus', code, msg, StringIO(body))
774
775
    def test_multipart_no_boundary(self):
776
        out = self.get_response(_multipart_no_boundary)
777
        out.read()  # Read the whole range
778
        # Fail to find the boundary line
779
        self.assertRaises(errors.InvalidHttpResponse, out.seek, 1, 1)
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
780
781
782
class TestRangeFileSizeReadLimited(tests.TestCase):
783
    """Test RangeFile _max_read_size functionality which limits the size of
784
    read blocks to prevent MemoryError messages in socket.recv.
785
    """
786
787
    def setUp(self):
788
        # create a test datablock larger than _max_read_size.
789
        chunk_size = response.RangeFile._max_read_size
790
        test_pattern = '0123456789ABCDEF'
791
        self.test_data =  test_pattern * (3 * chunk_size / len(test_pattern))
792
        self.test_data_len = len(self.test_data)
793
794
    def test_max_read_size(self):
795
        """Read data in blocks and verify that the reads are not larger than
796
           the maximum read size.
797
        """
798
        # retrieve data in large blocks from response.RangeFile object
799
        mock_read_file = FakeReadFile(self.test_data)
800
        range_file = response.RangeFile('test_max_read_size', mock_read_file)
801
        response_data = range_file.read(self.test_data_len)
802
803
        # verify read size was equal to the maximum read size
804
        self.assertTrue(mock_read_file.get_max_read_size() > 0)
805
        self.assertEqual(mock_read_file.get_max_read_size(),
806
                         response.RangeFile._max_read_size)
807
        self.assertEqual(mock_read_file.get_read_count(), 3)
808
809
        # report error if the data wasn't equal (we only report the size due
810
        # to the length of the data)
811
        if response_data != self.test_data:
812
            message = "Data not equal.  Expected %d bytes, received %d."
813
            self.fail(message % (len(response_data), self.test_data_len))
814