/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
17
"""Tests from HTTP response parsing.
18
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
19
The handle_response method read the response body of a GET request an returns
20
the corresponding RangeFile.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
21
22
There are four different kinds of RangeFile:
23
- a whole file whose size is unknown, seen as a simple byte stream,
24
- a whole file whose size is known, we can't read past its end,
25
- a single range file, a part of a file with a start and a size,
26
- a multiple range file, several consecutive parts with known start offset
27
  and size.
28
29
Some properties are common to all kinds:
30
- seek can only be forward (its really a socket underneath),
31
- read can't cross ranges,
32
- successive ranges are taken into account transparently,
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
33
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
34
- the expected pattern of use is either seek(offset)+read(size) or a single
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
35
  read with no size specified. For multiple range files, multiple read() will
36
  return the corresponding ranges, trying to read further will raise
37
  InvalidHttpResponse.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
38
"""
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
39
40
from cStringIO import StringIO
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
41
import httplib
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
42
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
43
from bzrlib import (
44
    errors,
45
    tests,
46
    )
3104.3.4 by Vincent Ladeuil
Add test.
47
from bzrlib.transport.http import (
48
    response,
49
    _urllib2_wrappers,
50
    )
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
51
from bzrlib.tests.file_utils import (
52
    FakeReadFile,
53
    )
3104.3.4 by Vincent Ladeuil
Add test.
54
55
56
class ReadSocket(object):
57
    """A socket-like object that can be given a predefined content."""
58
59
    def __init__(self, data):
60
        self.readfile = StringIO(data)
61
62
    def makefile(self, mode='r', bufsize=None):
63
        return self.readfile
64
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
65
3104.3.4 by Vincent Ladeuil
Add test.
66
class FakeHTTPConnection(_urllib2_wrappers.HTTPConnection):
67
68
    def __init__(self, sock):
69
        _urllib2_wrappers.HTTPConnection.__init__(self, 'localhost')
70
        # Set the socket to bypass the connection
71
        self.sock = sock
72
73
    def send(self, str):
74
        """Ignores the writes on the socket."""
75
        pass
76
77
78
class TestHTTPConnection(tests.TestCase):
79
80
    def test_cleanup_pipe(self):
81
        sock = ReadSocket("""HTTP/1.1 200 OK\r
82
Content-Type: text/plain; charset=UTF-8\r
83
Content-Length: 18
84
\r
85
0123456789
86
garbage""")
87
        conn = FakeHTTPConnection(sock)
88
        # Simulate the request sending so that the connection will be able to
89
        # read the response.
90
        conn.putrequest('GET', 'http://localhost/fictious')
91
        conn.endheaders()
92
        # Now, get the response
93
        resp = conn.getresponse()
94
        # Read part of the response
95
        self.assertEquals('0123456789\n', resp.read(11))
96
        # Override the thresold to force the warning emission
97
        conn._range_warning_thresold = 6 # There are 7 bytes pending
98
        conn.cleanup_pipe()
99
        self.assertContainsRe(self._get_log(keep_log_file=True),
100
                              'Got a 200 response when asking')
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
101
102
103
class TestRangeFileMixin(object):
104
    """Tests for accessing the first range in a RangeFile."""
105
106
    # A simple string used to represent a file part (also called a range), in
107
    # which offsets are easy to calculate for test writers. It's used as a
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
108
    # building block with slight variations but basically 'a' is the first char
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
109
    # of the range and 'z' is the last.
110
    alpha = 'abcdefghijklmnopqrstuvwxyz'
111
112
    def test_can_read_at_first_access(self):
113
        """Test that the just created file can be read."""
114
        self.assertEquals(self.alpha, self._file.read())
115
116
    def test_seek_read(self):
117
        """Test seek/read inside the range."""
118
        f = self._file
119
        start = self.first_range_start
120
        # Before any use, tell() should be at the range start
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
121
        self.assertEquals(start, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
122
        cur = start # For an overall offset assertion
123
        f.seek(start + 3)
124
        cur += 3
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
125
        self.assertEquals('def', f.read(3))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
126
        cur += len('def')
127
        f.seek(4, 1)
128
        cur += 4
129
        self.assertEquals('klmn', f.read(4))
130
        cur += len('klmn')
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
131
        # read(0) in the middle of a range
132
        self.assertEquals('', f.read(0))
133
        # seek in place
134
        here = f.tell()
135
        f.seek(0, 1)
136
        self.assertEquals(here, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
137
        self.assertEquals(cur, f.tell())
138
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
139
    def test_read_zero(self):
140
        f = self._file
141
        start = self.first_range_start
142
        self.assertEquals('', f.read(0))
143
        f.seek(10, 1)
144
        self.assertEquals('', f.read(0))
145
146
    def test_seek_at_range_end(self):
147
        f = self._file
148
        f.seek(26, 1)
149
150
    def test_read_at_range_end(self):
151
        """Test read behaviour at range end."""
152
        f = self._file
153
        self.assertEquals(self.alpha, f.read())
154
        self.assertEquals('', f.read(0))
155
        self.assertRaises(errors.InvalidRange, f.read, 1)
156
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
157
    def test_unbounded_read_after_seek(self):
158
        f = self._file
159
        f.seek(24, 1)
160
        # Should not cross ranges
161
        self.assertEquals('yz', f.read())
162
163
    def test_seek_backwards(self):
164
        f = self._file
165
        start = self.first_range_start
166
        f.seek(start)
167
        f.read(12)
168
        self.assertRaises(errors.InvalidRange, f.seek, start + 5)
169
170
    def test_seek_outside_single_range(self):
171
        f = self._file
172
        if f._size == -1 or f._boundary is not None:
173
            raise tests.TestNotApplicable('Needs a fully defined range')
174
        # Will seek past the range and then errors out
175
        self.assertRaises(errors.InvalidRange,
176
                          f.seek, self.first_range_start + 27)
177
178
    def test_read_past_end_of_range(self):
179
        f = self._file
180
        if f._size == -1:
181
            raise tests.TestNotApplicable("Can't check an unknown size")
182
        start = self.first_range_start
183
        f.seek(start + 20)
184
        self.assertRaises(errors.InvalidRange, f.read, 10)
185
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
186
    def test_seek_from_end(self):
187
       """Test seeking from the end of the file.
188
189
       The semantic is unclear in case of multiple ranges. Seeking from end
190
       exists only for the http transports, cannot be used if the file size is
191
       unknown and is not used in bzrlib itself. This test must be (and is)
192
       overridden by daughter classes.
193
194
       Reading from end makes sense only when a range has been requested from
195
       the end of the file (see HttpTransportBase._get() when using the
196
       'tail_amount' parameter). The HTTP response can only be a whole file or
197
       a single range.
198
       """
199
       f = self._file
200
       f.seek(-2, 2)
201
       self.assertEquals('yz', f.read())
202
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
203
204
class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
205
    """Test a RangeFile for a whole file whose size is not known."""
206
207
    def setUp(self):
208
        super(TestRangeFileSizeUnknown, self).setUp()
209
        self._file = response.RangeFile('Whole_file_size_known',
210
                                        StringIO(self.alpha))
211
        # We define no range, relying on RangeFile to provide default values
212
        self.first_range_start = 0 # It's the whole file
213
214
    def test_seek_from_end(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
215
        """See TestRangeFileMixin.test_seek_from_end.
216
217
        The end of the file can't be determined since the size is unknown.
218
        """
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
219
        self.assertRaises(errors.InvalidRange, self._file.seek, -1, 2)
220
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
221
    def test_read_at_range_end(self):
222
        """Test read behaviour at range end."""
223
        f = self._file
224
        self.assertEquals(self.alpha, f.read())
225
        self.assertEquals('', f.read(0))
226
        self.assertEquals('', f.read(1))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
227
228
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
229
    """Test a RangeFile for a whole file whose size is known."""
230
231
    def setUp(self):
232
        super(TestRangeFileSizeKnown, self).setUp()
233
        self._file = response.RangeFile('Whole_file_size_known',
234
                                        StringIO(self.alpha))
235
        self._file.set_range(0, len(self.alpha))
236
        self.first_range_start = 0 # It's the whole file
237
238
239
class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
240
    """Test a RangeFile for a single range."""
241
242
    def setUp(self):
243
        super(TestRangeFileSingleRange, self).setUp()
244
        self._file = response.RangeFile('Single_range_file',
245
                                        StringIO(self.alpha))
246
        self.first_range_start = 15
247
        self._file.set_range(self.first_range_start, len(self.alpha))
248
249
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
250
    def test_read_before_range(self):
251
        # This can't occur under normal circumstances, we have to force it
252
        f = self._file
253
        f._pos = 0 # Force an invalid pos
254
        self.assertRaises(errors.InvalidRange, f.read, 2)
255
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
256
class TestRangeFileMultipleRanges(tests.TestCase, TestRangeFileMixin):
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
257
    """Test a RangeFile for multiple ranges.
258
259
    The RangeFile used for the tests contains three ranges:
260
261
    - at offset 25: alpha
262
    - at offset 100: alpha
263
    - at offset 126: alpha.upper()
264
265
    The two last ranges are contiguous. This only rarely occurs (should not in
266
    fact) in real uses but may lead to hard to track bugs.
267
    """
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
268
269
    def setUp(self):
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
270
        super(TestRangeFileMultipleRanges, self).setUp()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
271
272
        boundary = 'separation'
273
274
        content = ''
275
        self.first_range_start = 25
276
        file_size = 200 # big enough to encompass all ranges
277
        for (start, part) in [(self.first_range_start, self.alpha),
278
                              # Two contiguous ranges
279
                              (100, self.alpha),
280
                              (126, self.alpha.upper())]:
281
            content += self._multipart_byterange(part, start, boundary,
282
                                                 file_size)
283
        # Final boundary
284
        content += self._boundary_line(boundary)
285
286
        self._file = response.RangeFile('Multiple_ranges_file',
287
                                        StringIO(content))
288
        # Ranges are set by decoding the range headers, the RangeFile user is
289
        # supposed to call the following before using seek or read since it
290
        # requires knowing the *response* headers (in that case the boundary
291
        # which is part of the Content-Type header).
292
        self._file.set_boundary(boundary)
293
294
    def _boundary_line(self, boundary):
295
        """Helper to build the formatted boundary line."""
296
        return '--' + boundary + '\r\n'
297
298
    def _multipart_byterange(self, data, offset, boundary, file_size='*'):
299
        """Encode a part of a file as a multipart/byterange MIME type.
300
301
        When a range request is issued, the HTTP response body can be
302
        decomposed in parts, each one representing a range (start, size) in a
303
        file.
304
305
        :param data: The payload.
306
        :param offset: where data starts in the file
307
        :param boundary: used to separate the parts
308
        :param file_size: the size of the file containing the range (default to
309
            '*' meaning unknown)
310
311
        :return: a string containing the data encoded as it will appear in the
312
            HTTP response body.
313
        """
314
        bline = self._boundary_line(boundary)
315
        # Each range begins with a boundary line
316
        range = bline
317
        # A range is described by a set of headers, but only 'Content-Range' is
318
        # required for our implementation (TestHandleResponse below will
319
        # exercise ranges with multiple or missing headers')
320
        range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
321
                                                        offset+len(data)-1,
322
                                                        file_size)
323
        range += '\r\n'
324
        # Finally the raw bytes
325
        range += data
326
        return range
327
328
    def test_read_all_ranges(self):
329
        f = self._file
330
        self.assertEquals(self.alpha, f.read()) # Read first range
331
        f.seek(100) # Trigger the second range recognition
332
        self.assertEquals(self.alpha, f.read()) # Read second range
333
        self.assertEquals(126, f.tell())
334
        f.seek(126) # Start of third range which is also the current pos !
335
        self.assertEquals('A', f.read(1))
336
        f.seek(10, 1)
337
        self.assertEquals('LMN', f.read(3))
338
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
339
    def test_seek_from_end(self):
340
        """See TestRangeFileMixin.test_seek_from_end."""
341
        # The actual implementation will seek from end for the first range only
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
342
        # and then fail. Since seeking from end is intended to be used for a
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
343
        # single range only anyway, this test just document the actual
344
        # behaviour.
345
        f = self._file
346
        f.seek(-2, 2)
347
        self.assertEquals('yz', f.read())
348
        self.assertRaises(errors.InvalidRange, f.seek, -2, 2)
349
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
350
    def test_seek_into_void(self):
351
        f = self._file
352
        start = self.first_range_start
353
        f.seek(start)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
354
        # Seeking to a point between two ranges is possible (only once) but
355
        # reading there is forbidden
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
356
        f.seek(start + 40)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
357
        # We crossed a range boundary, so now the file is positioned at the
358
        # start of the new range (i.e. trying to seek below 100 will error out)
359
        f.seek(100)
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
360
        f.seek(125)
361
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
362
    def test_seek_across_ranges(self):
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
363
        f = self._file
364
        start = self.first_range_start
365
        f.seek(126) # skip the two first ranges
366
        self.assertEquals('AB', f.read(2))
367
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
368
    def test_checked_read_dont_overflow_buffers(self):
369
        f = self._file
370
        start = self.first_range_start
371
        # We force a very low value to exercise all code paths in _checked_read
372
        f._discarded_buf_size = 8
373
        f.seek(126) # skip the two first ranges
374
        self.assertEquals('AB', f.read(2))
375
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
376
    def test_seek_twice_between_ranges(self):
377
        f = self._file
378
        start = self.first_range_start
379
        f.seek(start + 40) # Past the first range but before the second
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
380
        # Now the file is positioned at the second range start (100)
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
381
        self.assertRaises(errors.InvalidRange, f.seek, start + 41)
382
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
383
    def test_seek_at_range_end(self):
384
        """Test seek behavior at range end."""
385
        f = self._file
386
        f.seek(25 + 25)
387
        f.seek(100 + 25)
388
        f.seek(126 + 25)
389
390
    def test_read_at_range_end(self):
391
        f = self._file
392
        self.assertEquals(self.alpha, f.read())
393
        self.assertEquals(self.alpha, f.read())
394
        self.assertEquals(self.alpha.upper(), f.read())
395
        self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
396
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
397
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
398
class TestRangeFileVarious(tests.TestCase):
399
    """Tests RangeFile aspects not covered elsewhere."""
400
401
    def test_seek_whence(self):
402
        """Test the seek whence parameter values."""
403
        f = response.RangeFile('foo', StringIO('abc'))
404
        f.set_range(0, 3)
405
        f.seek(0)
406
        f.seek(1, 1)
407
        f.seek(-1, 2)
408
        self.assertRaises(ValueError, f.seek, 0, 14)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
409
410
    def test_range_syntax(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
411
        """Test the Content-Range scanning."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
412
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
413
        f = response.RangeFile('foo', StringIO())
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
414
415
        def ok(expected, header_value):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
416
            f.set_range_from_header(header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
417
            # Slightly peek under the covers to get the size
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
418
            self.assertEquals(expected, (f.tell(), f._size))
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
419
420
        ok((1, 10), 'bytes 1-10/11')
421
        ok((1, 10), 'bytes 1-10/*')
422
        ok((12, 2), '\tbytes 12-13/*')
423
        ok((28, 1), '  bytes 28-28/*')
424
        ok((2123, 2120), 'bytes  2123-4242/12310')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
425
        ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
426
427
        def nok(header_value):
428
            self.assertRaises(errors.InvalidHttpRange,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
429
                              f.set_range_from_header, header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
430
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
431
        nok('bytes 10-2/3')
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
432
        nok('chars 1-2/3')
433
        nok('bytes xx-yyy/zzz')
434
        nok('bytes xx-12/zzz')
435
        nok('bytes 11-yy/zzz')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
436
        nok('bytes10-2/3')
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
437
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
438
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
439
# Taken from real request responses
1786.1.26 by John Arbash Meinel
Update and test handle_response.
440
_full_text_response = (200, """HTTP/1.1 200 OK\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
441
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
442
Server: Apache/2.0.54 (Fedora)\r
443
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
444
ETag: "56691-23-38e9ae00"\r
445
Accept-Ranges: bytes\r
446
Content-Length: 35\r
447
Connection: close\r
448
Content-Type: text/plain; charset=UTF-8\r
449
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
450
""", """Bazaar-NG meta directory, format 1
451
""")
452
453
1786.1.26 by John Arbash Meinel
Update and test handle_response.
454
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
455
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
456
Server: Apache/2.0.54 (Fedora)\r
457
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
458
ETag: "238a3c-16ec2-805c5540"\r
459
Accept-Ranges: bytes\r
460
Content-Length: 100\r
1786.1.26 by John Arbash Meinel
Update and test handle_response.
461
Content-Range: bytes 100-199/93890\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
462
Connection: close\r
463
Content-Type: text/plain; charset=UTF-8\r
464
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
465
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
1786.1.26 by John Arbash Meinel
Update and test handle_response.
466
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
467
468
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
469
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
470
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
471
Server: Apache/2.0.54 (Fedora)\r
472
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
473
ETag: "238a3c-16ec2-805c5540"\r
474
Accept-Ranges: bytes\r
475
Content-Length: 100\r
476
Content-Range: bytes 100-199/93890\r
477
Connection: close\r
478
\r
479
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
480
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
481
482
1786.1.26 by John Arbash Meinel
Update and test handle_response.
483
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
484
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
485
Server: Apache/2.0.54 (Fedora)\r
486
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
487
ETag: "238a3c-16ec2-805c5540"\r
488
Accept-Ranges: bytes\r
489
Content-Length: 1534\r
490
Connection: close\r
491
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
492
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
493
\r""", """--418470f848b63279b\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
494
Content-type: text/plain; charset=UTF-8\r
495
Content-range: bytes 0-254/93890\r
496
\r
497
mbp@sourcefrog.net-20050309040815-13242001617e4a06
498
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627
499
mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8
500
mbp@sourcefrog.net-20050309041501-c840e09071de3b67
501
mbp@sourcefrog.net-20050309044615-c24a3250be83220a
502
\r
503
--418470f848b63279b\r
504
Content-type: text/plain; charset=UTF-8\r
505
Content-range: bytes 1000-2049/93890\r
506
\r
507
40-fd4ec249b6b139ab
508
mbp@sourcefrog.net-20050311063625-07858525021f270b
509
mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9
510
mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a
511
mbp@sourcefrog.net-20050311232353-f5e33da490872c6a
512
mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0
513
mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb
514
mbp@sourcefrog.net-20050312073831-a47c3335ece1920f
515
mbp@sourcefrog.net-20050312085412-13373aa129ccbad3
516
mbp@sourcefrog.net-20050313052251-2bf004cb96b39933
517
mbp@sourcefrog.net-20050313052856-3edd84094687cb11
518
mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d
519
mbp@sourcefrog.net-20050313053853-7c64085594ff3072
520
mbp@sourcefrog.net-20050313054757-a86c3f5871069e22
521
mbp@sourcefrog.net-20050313061422-418f1f73b94879b9
522
mbp@sourcefrog.net-20050313120651-497bd231b19df600
523
mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a
524
mbp@sourcefrog.net-20050314025438-d52099f915fe65fc
525
mbp@sourcefrog.net-20050314025539-637a636692c055cf
526
mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba
527
mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62
528
mbp@source\r
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
529
--418470f848b63279b--\r
530
""")
531
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
532
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
533
_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
534
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
535
Server: Apache/2.2.2 (Unix) DAV/2\r
536
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
537
Accept-Ranges: bytes\r
538
Content-Type: multipart/byteranges; boundary="squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196"\r
539
Content-Length: 598\r
540
X-Cache: MISS from localhost.localdomain\r
541
X-Cache-Lookup: HIT from localhost.localdomain:3128\r
542
Proxy-Connection: keep-alive\r
543
\r
544
""",
545
"""\r
546
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
547
Content-Type: text/plain\r
548
Content-Range: bytes 0-99/18672\r
549
\r
550
# bzr knit index 8
551
552
scott@netsplit.com-20050708230047-47c7868f276b939f fulltext 0 863  :
553
scott@netsp\r
554
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
555
Content-Type: text/plain\r
556
Content-Range: bytes 300-499/18672\r
557
\r
558
com-20050708231537-2b124b835395399a :
559
scott@netsplit.com-20050820234126-551311dbb7435b51 line-delta 1803 479 .scott@netsplit.com-20050820232911-dc4322a084eadf7e :
560
scott@netsplit.com-20050821213706-c86\r
561
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196--\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
562
""")
563
564
1786.1.26 by John Arbash Meinel
Update and test handle_response.
565
# This is made up
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
566
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
567
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
568
Server: Apache/2.0.54 (Fedora)\r
569
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
570
ETag: "56691-23-38e9ae00"\r
571
Accept-Ranges: bytes\r
572
Content-Length: 35\r
573
Connection: close\r
574
\r
575
""", """Bazaar-NG meta directory, format 1
576
""")
577
578
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
579
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
580
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
581
Server: Apache/2.0.54 (Fedora)\r
582
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
583
ETag: "56691-23-38e9ae00"\r
584
Accept-Ranges: bytes\r
585
Connection: close\r
586
Content-Type: text/plain; charset=UTF-8\r
587
\r
588
""", """Bazaar-NG meta directory, format 1
589
""")
590
591
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
592
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
593
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
594
Server: Apache/2.0.54 (Fedora)\r
595
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
596
ETag: "238a3c-16ec2-805c5540"\r
597
Accept-Ranges: bytes\r
598
Content-Length: 100\r
599
Connection: close\r
600
\r
601
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
602
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
603
604
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
605
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
606
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
607
Server: Apache/2.0.54 (Fedora)\r
608
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
609
ETag: "238a3c-16ec2-805c5540"\r
610
Accept-Ranges: bytes\r
611
Content-Length: 100\r
612
Content-Range: bytes 100-199/93890\r
613
Connection: close\r
614
Content-Type: text/plain; charset=UTF-8\r
615
\r
616
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
617
618
1786.1.26 by John Arbash Meinel
Update and test handle_response.
619
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
620
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
621
Connection: close\r
622
Content-Type: text/html; charset=iso-8859-1\r
623
\r
624
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
625
<html><head>
626
<title>404 Not Found</title>
627
</head><body>
628
<h1>Not Found</h1>
629
<p>I don't know what I'm doing</p>
630
<hr>
631
</body></html>
632
""")
633
634
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
635
_multipart_no_content_range = (206, """HTTP/1.0 206 Partial Content\r
636
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
637
Content-Length: 598\r
638
\r
639
""",
640
"""\r
641
--THIS_SEPARATES\r
642
Content-Type: text/plain\r
643
\r
644
# bzr knit index 8
645
--THIS_SEPARATES\r
646
""")
647
648
649
_multipart_no_boundary = (206, """HTTP/1.0 206 Partial Content\r
650
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
651
Content-Length: 598\r
652
\r
653
""",
654
"""\r
655
--THIS_SEPARATES\r
656
Content-Type: text/plain\r
657
Content-Range: bytes 0-18/18672\r
658
\r
659
# bzr knit index 8
660
661
The range ended at the line above, this text is garbage instead of a boundary
662
line
663
""")
664
665
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
666
class TestHandleResponse(tests.TestCase):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
667
668
    def _build_HTTPMessage(self, raw_headers):
669
        status_and_headers = StringIO(raw_headers)
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
670
        # Get rid of the status line
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
671
        status_and_headers.readline()
672
        msg = httplib.HTTPMessage(status_and_headers)
673
        return msg
674
1786.1.26 by John Arbash Meinel
Update and test handle_response.
675
    def get_response(self, a_response):
676
        """Process a supplied response, and return the result."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
677
        code, raw_headers, body = a_response
678
        msg = self._build_HTTPMessage(raw_headers)
679
        return response.handle_response('http://foo', code, msg,
1786.1.26 by John Arbash Meinel
Update and test handle_response.
680
                                        StringIO(a_response[2]))
681
682
    def test_full_text(self):
683
        out = self.get_response(_full_text_response)
684
        # It is a StringIO from the original data
685
        self.assertEqual(_full_text_response[2], out.read())
686
687
    def test_single_range(self):
688
        out = self.get_response(_single_range_response)
689
690
        out.seek(100)
691
        self.assertEqual(_single_range_response[2], out.read(100))
692
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
693
    def test_single_range_no_content(self):
694
        out = self.get_response(_single_range_no_content_type)
695
696
        out.seek(100)
697
        self.assertEqual(_single_range_no_content_type[2], out.read(100))
698
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
699
    def test_single_range_truncated(self):
700
        out = self.get_response(_single_range_response_truncated)
701
        # Content-Range declares 100 but only 51 present
702
        self.assertRaises(errors.ShortReadvError, out.seek, out.tell() + 51)
703
1786.1.26 by John Arbash Meinel
Update and test handle_response.
704
    def test_multi_range(self):
705
        out = self.get_response(_multipart_range_response)
706
707
        # Just make sure we can read the right contents
708
        out.seek(0)
709
        out.read(255)
710
711
        out.seek(1000)
712
        out.read(1050)
713
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
714
    def test_multi_squid_range(self):
715
        out = self.get_response(_multipart_squid_range_response)
716
717
        # Just make sure we can read the right contents
718
        out.seek(0)
719
        out.read(100)
720
721
        out.seek(300)
722
        out.read(200)
723
1786.1.26 by John Arbash Meinel
Update and test handle_response.
724
    def test_invalid_response(self):
725
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
726
                          self.get_response, _invalid_response)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
727
728
    def test_full_text_no_content_type(self):
729
        # We should not require Content-Type for a full response
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
730
        code, raw_headers, body = _full_text_response_no_content_type
731
        msg = self._build_HTTPMessage(raw_headers)
732
        out = response.handle_response('http://foo', code, msg, StringIO(body))
733
        self.assertEqual(body, out.read())
1786.1.26 by John Arbash Meinel
Update and test handle_response.
734
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
735
    def test_full_text_no_content_length(self):
736
        code, raw_headers, body = _full_text_response_no_content_length
737
        msg = self._build_HTTPMessage(raw_headers)
738
        out = response.handle_response('http://foo', code, msg, StringIO(body))
739
        self.assertEqual(body, out.read())
740
1786.1.26 by John Arbash Meinel
Update and test handle_response.
741
    def test_missing_content_range(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
742
        code, raw_headers, body = _single_range_no_content_range
743
        msg = self._build_HTTPMessage(raw_headers)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
744
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
745
                          response.handle_response,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
746
                          'http://bogus', code, msg, StringIO(body))
747
748
    def test_multipart_no_content_range(self):
749
        code, raw_headers, body = _multipart_no_content_range
750
        msg = self._build_HTTPMessage(raw_headers)
751
        self.assertRaises(errors.InvalidHttpResponse,
752
                          response.handle_response,
753
                          'http://bogus', code, msg, StringIO(body))
754
755
    def test_multipart_no_boundary(self):
756
        out = self.get_response(_multipart_no_boundary)
757
        out.read()  # Read the whole range
758
        # Fail to find the boundary line
759
        self.assertRaises(errors.InvalidHttpResponse, out.seek, 1, 1)
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
760
761
762
class TestRangeFileSizeReadLimited(tests.TestCase):
763
    """Test RangeFile _max_read_size functionality which limits the size of
764
    read blocks to prevent MemoryError messages in socket.recv.
765
    """
766
767
    def setUp(self):
768
        # create a test datablock larger than _max_read_size.
769
        chunk_size = response.RangeFile._max_read_size
770
        test_pattern = '0123456789ABCDEF'
771
        self.test_data =  test_pattern * (3 * chunk_size / len(test_pattern))
772
        self.test_data_len = len(self.test_data)
773
774
    def test_max_read_size(self):
775
        """Read data in blocks and verify that the reads are not larger than
776
           the maximum read size.
777
        """
778
        # retrieve data in large blocks from response.RangeFile object
779
        mock_read_file = FakeReadFile(self.test_data)
780
        range_file = response.RangeFile('test_max_read_size', mock_read_file)
781
        response_data = range_file.read(self.test_data_len)
782
783
        # verify read size was equal to the maximum read size
784
        self.assertTrue(mock_read_file.get_max_read_size() > 0)
785
        self.assertEqual(mock_read_file.get_max_read_size(),
786
                         response.RangeFile._max_read_size)
787
        self.assertEqual(mock_read_file.get_read_count(), 3)
788
789
        # report error if the data wasn't equal (we only report the size due
790
        # to the length of the data)
791
        if response_data != self.test_data:
792
            message = "Data not equal.  Expected %d bytes, received %d."
793
            self.fail(message % (len(response_data), self.test_data_len))
794