/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
17
"""Tests from HTTP response parsing.
18
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
19
The handle_response method read the response body of a GET request an returns
20
the corresponding RangeFile.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
21
22
There are four different kinds of RangeFile:
23
- a whole file whose size is unknown, seen as a simple byte stream,
24
- a whole file whose size is known, we can't read past its end,
25
- a single range file, a part of a file with a start and a size,
26
- a multiple range file, several consecutive parts with known start offset
27
  and size.
28
29
Some properties are common to all kinds:
30
- seek can only be forward (its really a socket underneath),
31
- read can't cross ranges,
32
- successive ranges are taken into account transparently,
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
33
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
34
- the expected pattern of use is either seek(offset)+read(size) or a single
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
35
  read with no size specified. For multiple range files, multiple read() will
36
  return the corresponding ranges, trying to read further will raise
37
  InvalidHttpResponse.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
38
"""
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
39
40
from cStringIO import StringIO
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
41
import httplib
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
42
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
43
from bzrlib import (
44
    errors,
45
    tests,
46
    )
3104.3.4 by Vincent Ladeuil
Add test.
47
from bzrlib.transport.http import (
48
    response,
49
    _urllib2_wrappers,
50
    )
51
52
53
class ReadSocket(object):
54
    """A socket-like object that can be given a predefined content."""
55
56
    def __init__(self, data):
57
        self.readfile = StringIO(data)
58
59
    def makefile(self, mode='r', bufsize=None):
60
        return self.readfile
61
62
class FakeHTTPConnection(_urllib2_wrappers.HTTPConnection):
63
64
    def __init__(self, sock):
65
        _urllib2_wrappers.HTTPConnection.__init__(self, 'localhost')
66
        # Set the socket to bypass the connection
67
        self.sock = sock
68
69
    def send(self, str):
70
        """Ignores the writes on the socket."""
71
        pass
72
73
74
class TestHTTPConnection(tests.TestCase):
75
76
    def test_cleanup_pipe(self):
77
        sock = ReadSocket("""HTTP/1.1 200 OK\r
78
Content-Type: text/plain; charset=UTF-8\r
79
Content-Length: 18
80
\r
81
0123456789
82
garbage""")
83
        conn = FakeHTTPConnection(sock)
84
        # Simulate the request sending so that the connection will be able to
85
        # read the response.
86
        conn.putrequest('GET', 'http://localhost/fictious')
87
        conn.endheaders()
88
        # Now, get the response
89
        resp = conn.getresponse()
90
        # Read part of the response
91
        self.assertEquals('0123456789\n', resp.read(11))
92
        # Override the thresold to force the warning emission
93
        conn._range_warning_thresold = 6 # There are 7 bytes pending
94
        conn.cleanup_pipe()
95
        self.assertContainsRe(self._get_log(keep_log_file=True),
96
                              'Got a 200 response when asking')
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
97
98
99
class TestRangeFileMixin(object):
100
    """Tests for accessing the first range in a RangeFile."""
101
102
    # A simple string used to represent a file part (also called a range), in
103
    # which offsets are easy to calculate for test writers. It's used as a
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
104
    # building block with slight variations but basically 'a' is the first char
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
105
    # of the range and 'z' is the last.
106
    alpha = 'abcdefghijklmnopqrstuvwxyz'
107
108
    def test_can_read_at_first_access(self):
109
        """Test that the just created file can be read."""
110
        self.assertEquals(self.alpha, self._file.read())
111
112
    def test_seek_read(self):
113
        """Test seek/read inside the range."""
114
        f = self._file
115
        start = self.first_range_start
116
        # Before any use, tell() should be at the range start
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
117
        self.assertEquals(start, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
118
        cur = start # For an overall offset assertion
119
        f.seek(start + 3)
120
        cur += 3
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
121
        self.assertEquals('def', f.read(3))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
122
        cur += len('def')
123
        f.seek(4, 1)
124
        cur += 4
125
        self.assertEquals('klmn', f.read(4))
126
        cur += len('klmn')
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
127
        # read(0) in the middle of a range
128
        self.assertEquals('', f.read(0))
129
        # seek in place
130
        here = f.tell()
131
        f.seek(0, 1)
132
        self.assertEquals(here, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
133
        self.assertEquals(cur, f.tell())
134
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
135
    def test_read_zero(self):
136
        f = self._file
137
        start = self.first_range_start
138
        self.assertEquals('', f.read(0))
139
        f.seek(10, 1)
140
        self.assertEquals('', f.read(0))
141
142
    def test_seek_at_range_end(self):
143
        f = self._file
144
        f.seek(26, 1)
145
146
    def test_read_at_range_end(self):
147
        """Test read behaviour at range end."""
148
        f = self._file
149
        self.assertEquals(self.alpha, f.read())
150
        self.assertEquals('', f.read(0))
151
        self.assertRaises(errors.InvalidRange, f.read, 1)
152
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
153
    def test_unbounded_read_after_seek(self):
154
        f = self._file
155
        f.seek(24, 1)
156
        # Should not cross ranges
157
        self.assertEquals('yz', f.read())
158
159
    def test_seek_backwards(self):
160
        f = self._file
161
        start = self.first_range_start
162
        f.seek(start)
163
        f.read(12)
164
        self.assertRaises(errors.InvalidRange, f.seek, start + 5)
165
166
    def test_seek_outside_single_range(self):
167
        f = self._file
168
        if f._size == -1 or f._boundary is not None:
169
            raise tests.TestNotApplicable('Needs a fully defined range')
170
        # Will seek past the range and then errors out
171
        self.assertRaises(errors.InvalidRange,
172
                          f.seek, self.first_range_start + 27)
173
174
    def test_read_past_end_of_range(self):
175
        f = self._file
176
        if f._size == -1:
177
            raise tests.TestNotApplicable("Can't check an unknown size")
178
        start = self.first_range_start
179
        f.seek(start + 20)
180
        self.assertRaises(errors.InvalidRange, f.read, 10)
181
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
182
    def test_seek_from_end(self):
183
       """Test seeking from the end of the file.
184
185
       The semantic is unclear in case of multiple ranges. Seeking from end
186
       exists only for the http transports, cannot be used if the file size is
187
       unknown and is not used in bzrlib itself. This test must be (and is)
188
       overridden by daughter classes.
189
190
       Reading from end makes sense only when a range has been requested from
191
       the end of the file (see HttpTransportBase._get() when using the
192
       'tail_amount' parameter). The HTTP response can only be a whole file or
193
       a single range.
194
       """
195
       f = self._file
196
       f.seek(-2, 2)
197
       self.assertEquals('yz', f.read())
198
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
199
200
class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
201
    """Test a RangeFile for a whole file whose size is not known."""
202
203
    def setUp(self):
204
        super(TestRangeFileSizeUnknown, self).setUp()
205
        self._file = response.RangeFile('Whole_file_size_known',
206
                                        StringIO(self.alpha))
207
        # We define no range, relying on RangeFile to provide default values
208
        self.first_range_start = 0 # It's the whole file
209
210
    def test_seek_from_end(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
211
        """See TestRangeFileMixin.test_seek_from_end.
212
213
        The end of the file can't be determined since the size is unknown.
214
        """
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
215
        self.assertRaises(errors.InvalidRange, self._file.seek, -1, 2)
216
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
217
    def test_read_at_range_end(self):
218
        """Test read behaviour at range end."""
219
        f = self._file
220
        self.assertEquals(self.alpha, f.read())
221
        self.assertEquals('', f.read(0))
222
        self.assertEquals('', f.read(1))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
223
224
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
225
    """Test a RangeFile for a whole file whose size is known."""
226
227
    def setUp(self):
228
        super(TestRangeFileSizeKnown, self).setUp()
229
        self._file = response.RangeFile('Whole_file_size_known',
230
                                        StringIO(self.alpha))
231
        self._file.set_range(0, len(self.alpha))
232
        self.first_range_start = 0 # It's the whole file
233
234
235
class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
236
    """Test a RangeFile for a single range."""
237
238
    def setUp(self):
239
        super(TestRangeFileSingleRange, self).setUp()
240
        self._file = response.RangeFile('Single_range_file',
241
                                        StringIO(self.alpha))
242
        self.first_range_start = 15
243
        self._file.set_range(self.first_range_start, len(self.alpha))
244
245
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
246
    def test_read_before_range(self):
247
        # This can't occur under normal circumstances, we have to force it
248
        f = self._file
249
        f._pos = 0 # Force an invalid pos
250
        self.assertRaises(errors.InvalidRange, f.read, 2)
251
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
252
class TestRangeFilMultipleRanges(tests.TestCase, TestRangeFileMixin):
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
253
    """Test a RangeFile for multiple ranges.
254
255
    The RangeFile used for the tests contains three ranges:
256
257
    - at offset 25: alpha
258
    - at offset 100: alpha
259
    - at offset 126: alpha.upper()
260
261
    The two last ranges are contiguous. This only rarely occurs (should not in
262
    fact) in real uses but may lead to hard to track bugs.
263
    """
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
264
265
    def setUp(self):
266
        super(TestRangeFilMultipleRanges, self).setUp()
267
268
        boundary = 'separation'
269
270
        content = ''
271
        self.first_range_start = 25
272
        file_size = 200 # big enough to encompass all ranges
273
        for (start, part) in [(self.first_range_start, self.alpha),
274
                              # Two contiguous ranges
275
                              (100, self.alpha),
276
                              (126, self.alpha.upper())]:
277
            content += self._multipart_byterange(part, start, boundary,
278
                                                 file_size)
279
        # Final boundary
280
        content += self._boundary_line(boundary)
281
282
        self._file = response.RangeFile('Multiple_ranges_file',
283
                                        StringIO(content))
284
        # Ranges are set by decoding the range headers, the RangeFile user is
285
        # supposed to call the following before using seek or read since it
286
        # requires knowing the *response* headers (in that case the boundary
287
        # which is part of the Content-Type header).
288
        self._file.set_boundary(boundary)
289
290
    def _boundary_line(self, boundary):
291
        """Helper to build the formatted boundary line."""
292
        return '--' + boundary + '\r\n'
293
294
    def _multipart_byterange(self, data, offset, boundary, file_size='*'):
295
        """Encode a part of a file as a multipart/byterange MIME type.
296
297
        When a range request is issued, the HTTP response body can be
298
        decomposed in parts, each one representing a range (start, size) in a
299
        file.
300
301
        :param data: The payload.
302
        :param offset: where data starts in the file
303
        :param boundary: used to separate the parts
304
        :param file_size: the size of the file containing the range (default to
305
            '*' meaning unknown)
306
307
        :return: a string containing the data encoded as it will appear in the
308
            HTTP response body.
309
        """
310
        bline = self._boundary_line(boundary)
311
        # Each range begins with a boundary line
312
        range = bline
313
        # A range is described by a set of headers, but only 'Content-Range' is
314
        # required for our implementation (TestHandleResponse below will
315
        # exercise ranges with multiple or missing headers')
316
        range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
317
                                                        offset+len(data)-1,
318
                                                        file_size)
319
        range += '\r\n'
320
        # Finally the raw bytes
321
        range += data
322
        return range
323
324
    def test_read_all_ranges(self):
325
        f = self._file
326
        self.assertEquals(self.alpha, f.read()) # Read first range
327
        f.seek(100) # Trigger the second range recognition
328
        self.assertEquals(self.alpha, f.read()) # Read second range
329
        self.assertEquals(126, f.tell())
330
        f.seek(126) # Start of third range which is also the current pos !
331
        self.assertEquals('A', f.read(1))
332
        f.seek(10, 1)
333
        self.assertEquals('LMN', f.read(3))
334
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
335
    def test_seek_from_end(self):
336
        """See TestRangeFileMixin.test_seek_from_end."""
337
        # The actual implementation will seek from end for the first range only
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
338
        # and then fail. Since seeking from end is intended to be used for a
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
339
        # single range only anyway, this test just document the actual
340
        # behaviour.
341
        f = self._file
342
        f.seek(-2, 2)
343
        self.assertEquals('yz', f.read())
344
        self.assertRaises(errors.InvalidRange, f.seek, -2, 2)
345
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
346
    def test_seek_into_void(self):
347
        f = self._file
348
        start = self.first_range_start
349
        f.seek(start)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
350
        # Seeking to a point between two ranges is possible (only once) but
351
        # reading there is forbidden
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
352
        f.seek(start + 40)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
353
        # We crossed a range boundary, so now the file is positioned at the
354
        # start of the new range (i.e. trying to seek below 100 will error out)
355
        f.seek(100)
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
356
        f.seek(125)
357
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
358
    def test_seek_across_ranges(self):
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
359
        f = self._file
360
        start = self.first_range_start
361
        f.seek(126) # skip the two first ranges
362
        self.assertEquals('AB', f.read(2))
363
364
    def test_seek_twice_between_ranges(self):
365
        f = self._file
366
        start = self.first_range_start
367
        f.seek(start + 40) # Past the first range but before the second
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
368
        # Now the file is positioned at the second range start (100)
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
369
        self.assertRaises(errors.InvalidRange, f.seek, start + 41)
370
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
371
    def test_seek_at_range_end(self):
372
        """Test seek behavior at range end."""
373
        f = self._file
374
        f.seek(25 + 25)
375
        f.seek(100 + 25)
376
        f.seek(126 + 25)
377
378
    def test_read_at_range_end(self):
379
        f = self._file
380
        self.assertEquals(self.alpha, f.read())
381
        self.assertEquals(self.alpha, f.read())
382
        self.assertEquals(self.alpha.upper(), f.read())
383
        self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
384
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
385
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
386
class TestRangeFileVarious(tests.TestCase):
387
    """Tests RangeFile aspects not covered elsewhere."""
388
389
    def test_seek_whence(self):
390
        """Test the seek whence parameter values."""
391
        f = response.RangeFile('foo', StringIO('abc'))
392
        f.set_range(0, 3)
393
        f.seek(0)
394
        f.seek(1, 1)
395
        f.seek(-1, 2)
396
        self.assertRaises(ValueError, f.seek, 0, 14)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
397
398
    def test_range_syntax(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
399
        """Test the Content-Range scanning."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
400
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
401
        f = response.RangeFile('foo', StringIO())
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
402
403
        def ok(expected, header_value):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
404
            f.set_range_from_header(header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
405
            # Slightly peek under the covers to get the size
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
406
            self.assertEquals(expected, (f.tell(), f._size))
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
407
408
        ok((1, 10), 'bytes 1-10/11')
409
        ok((1, 10), 'bytes 1-10/*')
410
        ok((12, 2), '\tbytes 12-13/*')
411
        ok((28, 1), '  bytes 28-28/*')
412
        ok((2123, 2120), 'bytes  2123-4242/12310')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
413
        ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
414
415
        def nok(header_value):
416
            self.assertRaises(errors.InvalidHttpRange,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
417
                              f.set_range_from_header, header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
418
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
419
        nok('bytes 10-2/3')
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
420
        nok('chars 1-2/3')
421
        nok('bytes xx-yyy/zzz')
422
        nok('bytes xx-12/zzz')
423
        nok('bytes 11-yy/zzz')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
424
        nok('bytes10-2/3')
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
425
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
426
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
427
# Taken from real request responses
1786.1.26 by John Arbash Meinel
Update and test handle_response.
428
_full_text_response = (200, """HTTP/1.1 200 OK\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
429
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
430
Server: Apache/2.0.54 (Fedora)\r
431
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
432
ETag: "56691-23-38e9ae00"\r
433
Accept-Ranges: bytes\r
434
Content-Length: 35\r
435
Connection: close\r
436
Content-Type: text/plain; charset=UTF-8\r
437
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
438
""", """Bazaar-NG meta directory, format 1
439
""")
440
441
1786.1.26 by John Arbash Meinel
Update and test handle_response.
442
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
443
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
444
Server: Apache/2.0.54 (Fedora)\r
445
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
446
ETag: "238a3c-16ec2-805c5540"\r
447
Accept-Ranges: bytes\r
448
Content-Length: 100\r
1786.1.26 by John Arbash Meinel
Update and test handle_response.
449
Content-Range: bytes 100-199/93890\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
450
Connection: close\r
451
Content-Type: text/plain; charset=UTF-8\r
452
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
453
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
1786.1.26 by John Arbash Meinel
Update and test handle_response.
454
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
455
456
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
457
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
458
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
459
Server: Apache/2.0.54 (Fedora)\r
460
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
461
ETag: "238a3c-16ec2-805c5540"\r
462
Accept-Ranges: bytes\r
463
Content-Length: 100\r
464
Content-Range: bytes 100-199/93890\r
465
Connection: close\r
466
\r
467
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
468
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
469
470
1786.1.26 by John Arbash Meinel
Update and test handle_response.
471
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
472
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
473
Server: Apache/2.0.54 (Fedora)\r
474
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
475
ETag: "238a3c-16ec2-805c5540"\r
476
Accept-Ranges: bytes\r
477
Content-Length: 1534\r
478
Connection: close\r
479
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
480
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
481
\r""", """--418470f848b63279b\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
482
Content-type: text/plain; charset=UTF-8\r
483
Content-range: bytes 0-254/93890\r
484
\r
485
mbp@sourcefrog.net-20050309040815-13242001617e4a06
486
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627
487
mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8
488
mbp@sourcefrog.net-20050309041501-c840e09071de3b67
489
mbp@sourcefrog.net-20050309044615-c24a3250be83220a
490
\r
491
--418470f848b63279b\r
492
Content-type: text/plain; charset=UTF-8\r
493
Content-range: bytes 1000-2049/93890\r
494
\r
495
40-fd4ec249b6b139ab
496
mbp@sourcefrog.net-20050311063625-07858525021f270b
497
mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9
498
mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a
499
mbp@sourcefrog.net-20050311232353-f5e33da490872c6a
500
mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0
501
mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb
502
mbp@sourcefrog.net-20050312073831-a47c3335ece1920f
503
mbp@sourcefrog.net-20050312085412-13373aa129ccbad3
504
mbp@sourcefrog.net-20050313052251-2bf004cb96b39933
505
mbp@sourcefrog.net-20050313052856-3edd84094687cb11
506
mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d
507
mbp@sourcefrog.net-20050313053853-7c64085594ff3072
508
mbp@sourcefrog.net-20050313054757-a86c3f5871069e22
509
mbp@sourcefrog.net-20050313061422-418f1f73b94879b9
510
mbp@sourcefrog.net-20050313120651-497bd231b19df600
511
mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a
512
mbp@sourcefrog.net-20050314025438-d52099f915fe65fc
513
mbp@sourcefrog.net-20050314025539-637a636692c055cf
514
mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba
515
mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62
516
mbp@source\r
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
517
--418470f848b63279b--\r
518
""")
519
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
520
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
521
_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
522
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
523
Server: Apache/2.2.2 (Unix) DAV/2\r
524
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
525
Accept-Ranges: bytes\r
526
Content-Type: multipart/byteranges; boundary="squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196"\r
527
Content-Length: 598\r
528
X-Cache: MISS from localhost.localdomain\r
529
X-Cache-Lookup: HIT from localhost.localdomain:3128\r
530
Proxy-Connection: keep-alive\r
531
\r
532
""",
533
"""\r
534
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
535
Content-Type: text/plain\r
536
Content-Range: bytes 0-99/18672\r
537
\r
538
# bzr knit index 8
539
540
scott@netsplit.com-20050708230047-47c7868f276b939f fulltext 0 863  :
541
scott@netsp\r
542
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
543
Content-Type: text/plain\r
544
Content-Range: bytes 300-499/18672\r
545
\r
546
com-20050708231537-2b124b835395399a :
547
scott@netsplit.com-20050820234126-551311dbb7435b51 line-delta 1803 479 .scott@netsplit.com-20050820232911-dc4322a084eadf7e :
548
scott@netsplit.com-20050821213706-c86\r
549
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196--\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
550
""")
551
552
1786.1.26 by John Arbash Meinel
Update and test handle_response.
553
# This is made up
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
554
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
555
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
556
Server: Apache/2.0.54 (Fedora)\r
557
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
558
ETag: "56691-23-38e9ae00"\r
559
Accept-Ranges: bytes\r
560
Content-Length: 35\r
561
Connection: close\r
562
\r
563
""", """Bazaar-NG meta directory, format 1
564
""")
565
566
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
567
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
568
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
569
Server: Apache/2.0.54 (Fedora)\r
570
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
571
ETag: "56691-23-38e9ae00"\r
572
Accept-Ranges: bytes\r
573
Connection: close\r
574
Content-Type: text/plain; charset=UTF-8\r
575
\r
576
""", """Bazaar-NG meta directory, format 1
577
""")
578
579
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
580
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
581
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
582
Server: Apache/2.0.54 (Fedora)\r
583
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
584
ETag: "238a3c-16ec2-805c5540"\r
585
Accept-Ranges: bytes\r
586
Content-Length: 100\r
587
Connection: close\r
588
\r
589
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
590
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
591
592
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
593
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
594
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
595
Server: Apache/2.0.54 (Fedora)\r
596
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
597
ETag: "238a3c-16ec2-805c5540"\r
598
Accept-Ranges: bytes\r
599
Content-Length: 100\r
600
Content-Range: bytes 100-199/93890\r
601
Connection: close\r
602
Content-Type: text/plain; charset=UTF-8\r
603
\r
604
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
605
606
1786.1.26 by John Arbash Meinel
Update and test handle_response.
607
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
608
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
609
Connection: close\r
610
Content-Type: text/html; charset=iso-8859-1\r
611
\r
612
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
613
<html><head>
614
<title>404 Not Found</title>
615
</head><body>
616
<h1>Not Found</h1>
617
<p>I don't know what I'm doing</p>
618
<hr>
619
</body></html>
620
""")
621
622
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
623
_multipart_no_content_range = (206, """HTTP/1.0 206 Partial Content\r
624
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
625
Content-Length: 598\r
626
\r
627
""",
628
"""\r
629
--THIS_SEPARATES\r
630
Content-Type: text/plain\r
631
\r
632
# bzr knit index 8
633
--THIS_SEPARATES\r
634
""")
635
636
637
_multipart_no_boundary = (206, """HTTP/1.0 206 Partial Content\r
638
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
639
Content-Length: 598\r
640
\r
641
""",
642
"""\r
643
--THIS_SEPARATES\r
644
Content-Type: text/plain\r
645
Content-Range: bytes 0-18/18672\r
646
\r
647
# bzr knit index 8
648
649
The range ended at the line above, this text is garbage instead of a boundary
650
line
651
""")
652
653
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
654
class TestHandleResponse(tests.TestCase):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
655
656
    def _build_HTTPMessage(self, raw_headers):
657
        status_and_headers = StringIO(raw_headers)
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
658
        # Get rid of the status line
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
659
        status_and_headers.readline()
660
        msg = httplib.HTTPMessage(status_and_headers)
661
        return msg
662
1786.1.26 by John Arbash Meinel
Update and test handle_response.
663
    def get_response(self, a_response):
664
        """Process a supplied response, and return the result."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
665
        code, raw_headers, body = a_response
666
        msg = self._build_HTTPMessage(raw_headers)
667
        return response.handle_response('http://foo', code, msg,
1786.1.26 by John Arbash Meinel
Update and test handle_response.
668
                                        StringIO(a_response[2]))
669
670
    def test_full_text(self):
671
        out = self.get_response(_full_text_response)
672
        # It is a StringIO from the original data
673
        self.assertEqual(_full_text_response[2], out.read())
674
675
    def test_single_range(self):
676
        out = self.get_response(_single_range_response)
677
678
        out.seek(100)
679
        self.assertEqual(_single_range_response[2], out.read(100))
680
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
681
    def test_single_range_no_content(self):
682
        out = self.get_response(_single_range_no_content_type)
683
684
        out.seek(100)
685
        self.assertEqual(_single_range_no_content_type[2], out.read(100))
686
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
687
    def test_single_range_truncated(self):
688
        out = self.get_response(_single_range_response_truncated)
689
        # Content-Range declares 100 but only 51 present
690
        self.assertRaises(errors.ShortReadvError, out.seek, out.tell() + 51)
691
1786.1.26 by John Arbash Meinel
Update and test handle_response.
692
    def test_multi_range(self):
693
        out = self.get_response(_multipart_range_response)
694
695
        # Just make sure we can read the right contents
696
        out.seek(0)
697
        out.read(255)
698
699
        out.seek(1000)
700
        out.read(1050)
701
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
702
    def test_multi_squid_range(self):
703
        out = self.get_response(_multipart_squid_range_response)
704
705
        # Just make sure we can read the right contents
706
        out.seek(0)
707
        out.read(100)
708
709
        out.seek(300)
710
        out.read(200)
711
1786.1.26 by John Arbash Meinel
Update and test handle_response.
712
    def test_invalid_response(self):
713
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
714
                          self.get_response, _invalid_response)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
715
716
    def test_full_text_no_content_type(self):
717
        # We should not require Content-Type for a full response
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
718
        code, raw_headers, body = _full_text_response_no_content_type
719
        msg = self._build_HTTPMessage(raw_headers)
720
        out = response.handle_response('http://foo', code, msg, StringIO(body))
721
        self.assertEqual(body, out.read())
1786.1.26 by John Arbash Meinel
Update and test handle_response.
722
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
723
    def test_full_text_no_content_length(self):
724
        code, raw_headers, body = _full_text_response_no_content_length
725
        msg = self._build_HTTPMessage(raw_headers)
726
        out = response.handle_response('http://foo', code, msg, StringIO(body))
727
        self.assertEqual(body, out.read())
728
1786.1.26 by John Arbash Meinel
Update and test handle_response.
729
    def test_missing_content_range(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
730
        code, raw_headers, body = _single_range_no_content_range
731
        msg = self._build_HTTPMessage(raw_headers)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
732
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
733
                          response.handle_response,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
734
                          'http://bogus', code, msg, StringIO(body))
735
736
    def test_multipart_no_content_range(self):
737
        code, raw_headers, body = _multipart_no_content_range
738
        msg = self._build_HTTPMessage(raw_headers)
739
        self.assertRaises(errors.InvalidHttpResponse,
740
                          response.handle_response,
741
                          'http://bogus', code, msg, StringIO(body))
742
743
    def test_multipart_no_boundary(self):
744
        out = self.get_response(_multipart_no_boundary)
745
        out.read()  # Read the whole range
746
        # Fail to find the boundary line
747
        self.assertRaises(errors.InvalidHttpResponse, out.seek, 1, 1)