1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Tests from HTTP response parsing.
19
The handle_response method read the response body of a GET request an returns
20
the corresponding RangeFile.
22
There are four different kinds of RangeFile:
23
- a whole file whose size is unknown, seen as a simple byte stream,
24
- a whole file whose size is known, we can't read past its end,
25
- a single range file, a part of a file with a start and a size,
26
- a multiple range file, several consecutive parts with known start offset
29
Some properties are common to all kinds:
30
- seek can only be forward (its really a socket underneath),
31
- read can't cross ranges,
32
- successive ranges are taken into account transparently,
34
- the expected pattern of use is either seek(offset)+read(size) or a single
35
read with no size specified. For multiple range files, multiple read() will
36
return the corresponding ranges, trying to read further will raise
40
from cStringIO import StringIO
47
from bzrlib.transport.http import response
50
class TestRangeFileMixin(object):
51
"""Tests for accessing the first range in a RangeFile."""
53
# A simple string used to represent a file part (also called a range), in
54
# which offsets are easy to calculate for test writers. It's used as a
55
# building block with slight variations but basically 'a' is the first char
56
# of the range and 'z' is the last.
57
alpha = 'abcdefghijklmnopqrstuvwxyz'
59
def test_can_read_at_first_access(self):
60
"""Test that the just created file can be read."""
61
self.assertEquals(self.alpha, self._file.read())
63
def test_seek_read(self):
64
"""Test seek/read inside the range."""
66
start = self.first_range_start
67
# Before any use, tell() should be at the range start
68
self.assertEquals(start, f.tell())
69
cur = start # For an overall offset assertion
72
self.assertEquals('def', f.read(3))
76
self.assertEquals('klmn', f.read(4))
78
# read(0) in the middle of a range
79
self.assertEquals('', f.read(0))
83
self.assertEquals(here, f.tell())
84
self.assertEquals(cur, f.tell())
86
def test_read_zero(self):
88
start = self.first_range_start
89
self.assertEquals('', f.read(0))
91
self.assertEquals('', f.read(0))
93
def test_seek_at_range_end(self):
97
def test_read_at_range_end(self):
98
"""Test read behaviour at range end."""
100
self.assertEquals(self.alpha, f.read())
101
self.assertEquals('', f.read(0))
102
self.assertRaises(errors.InvalidRange, f.read, 1)
104
def test_unbounded_read_after_seek(self):
107
# Should not cross ranges
108
self.assertEquals('yz', f.read())
110
def test_seek_backwards(self):
112
start = self.first_range_start
115
self.assertRaises(errors.InvalidRange, f.seek, start + 5)
117
def test_seek_outside_single_range(self):
119
if f._size == -1 or f._boundary is not None:
120
raise tests.TestNotApplicable('Needs a fully defined range')
121
# Will seek past the range and then errors out
122
self.assertRaises(errors.InvalidRange,
123
f.seek, self.first_range_start + 27)
125
def test_read_past_end_of_range(self):
128
raise tests.TestNotApplicable("Can't check an unknown size")
129
start = self.first_range_start
131
self.assertRaises(errors.InvalidRange, f.read, 10)
133
def test_seek_from_end(self):
134
"""Test seeking from the end of the file.
136
The semantic is unclear in case of multiple ranges. Seeking from end
137
exists only for the http transports, cannot be used if the file size is
138
unknown and is not used in bzrlib itself. This test must be (and is)
139
overridden by daughter classes.
141
Reading from end makes sense only when a range has been requested from
142
the end of the file (see HttpTransportBase._get() when using the
143
'tail_amount' parameter). The HTTP response can only be a whole file or
148
self.assertEquals('yz', f.read())
151
class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
152
"""Test a RangeFile for a whole file whose size is not known."""
155
super(TestRangeFileSizeUnknown, self).setUp()
156
self._file = response.RangeFile('Whole_file_size_known',
157
StringIO(self.alpha))
158
# We define no range, relying on RangeFile to provide default values
159
self.first_range_start = 0 # It's the whole file
161
def test_seek_from_end(self):
162
"""See TestRangeFileMixin.test_seek_from_end.
164
The end of the file can't be determined since the size is unknown.
166
self.assertRaises(errors.InvalidRange, self._file.seek, -1, 2)
168
def test_read_at_range_end(self):
169
"""Test read behaviour at range end."""
171
self.assertEquals(self.alpha, f.read())
172
self.assertEquals('', f.read(0))
173
self.assertEquals('', f.read(1))
175
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
176
"""Test a RangeFile for a whole file whose size is known."""
179
super(TestRangeFileSizeKnown, self).setUp()
180
self._file = response.RangeFile('Whole_file_size_known',
181
StringIO(self.alpha))
182
self._file.set_range(0, len(self.alpha))
183
self.first_range_start = 0 # It's the whole file
186
class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
187
"""Test a RangeFile for a single range."""
190
super(TestRangeFileSingleRange, self).setUp()
191
self._file = response.RangeFile('Single_range_file',
192
StringIO(self.alpha))
193
self.first_range_start = 15
194
self._file.set_range(self.first_range_start, len(self.alpha))
197
def test_read_before_range(self):
198
# This can't occur under normal circumstances, we have to force it
200
f._pos = 0 # Force an invalid pos
201
self.assertRaises(errors.InvalidRange, f.read, 2)
203
class TestRangeFilMultipleRanges(tests.TestCase, TestRangeFileMixin):
204
"""Test a RangeFile for multiple ranges.
206
The RangeFile used for the tests contains three ranges:
208
- at offset 25: alpha
209
- at offset 100: alpha
210
- at offset 126: alpha.upper()
212
The two last ranges are contiguous. This only rarely occurs (should not in
213
fact) in real uses but may lead to hard to track bugs.
217
super(TestRangeFilMultipleRanges, self).setUp()
219
boundary = 'separation'
222
self.first_range_start = 25
223
file_size = 200 # big enough to encompass all ranges
224
for (start, part) in [(self.first_range_start, self.alpha),
225
# Two contiguous ranges
227
(126, self.alpha.upper())]:
228
content += self._multipart_byterange(part, start, boundary,
231
content += self._boundary_line(boundary)
233
self._file = response.RangeFile('Multiple_ranges_file',
235
# Ranges are set by decoding the range headers, the RangeFile user is
236
# supposed to call the following before using seek or read since it
237
# requires knowing the *response* headers (in that case the boundary
238
# which is part of the Content-Type header).
239
self._file.set_boundary(boundary)
241
def _boundary_line(self, boundary):
242
"""Helper to build the formatted boundary line."""
243
return '--' + boundary + '\r\n'
245
def _multipart_byterange(self, data, offset, boundary, file_size='*'):
246
"""Encode a part of a file as a multipart/byterange MIME type.
248
When a range request is issued, the HTTP response body can be
249
decomposed in parts, each one representing a range (start, size) in a
252
:param data: The payload.
253
:param offset: where data starts in the file
254
:param boundary: used to separate the parts
255
:param file_size: the size of the file containing the range (default to
258
:return: a string containing the data encoded as it will appear in the
261
bline = self._boundary_line(boundary)
262
# Each range begins with a boundary line
264
# A range is described by a set of headers, but only 'Content-Range' is
265
# required for our implementation (TestHandleResponse below will
266
# exercise ranges with multiple or missing headers')
267
range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
271
# Finally the raw bytes
275
def test_read_all_ranges(self):
277
self.assertEquals(self.alpha, f.read()) # Read first range
278
f.seek(100) # Trigger the second range recognition
279
self.assertEquals(self.alpha, f.read()) # Read second range
280
self.assertEquals(126, f.tell())
281
f.seek(126) # Start of third range which is also the current pos !
282
self.assertEquals('A', f.read(1))
284
self.assertEquals('LMN', f.read(3))
286
def test_seek_from_end(self):
287
"""See TestRangeFileMixin.test_seek_from_end."""
288
# The actual implementation will seek from end for the first range only
289
# and then fail. Since seeking from end is intended to be used for a
290
# single range only anyway, this test just document the actual
294
self.assertEquals('yz', f.read())
295
self.assertRaises(errors.InvalidRange, f.seek, -2, 2)
297
def test_seek_into_void(self):
299
start = self.first_range_start
301
# Seeking to a point between two ranges is possible (only once) but
302
# reading there is forbidden
304
# We crossed a range boundary, so now the file is positioned at the
305
# start of the new range (i.e. trying to seek below 100 will error out)
309
def test_seek_across_ranges(self):
311
start = self.first_range_start
312
f.seek(126) # skip the two first ranges
313
self.assertEquals('AB', f.read(2))
315
def test_seek_twice_between_ranges(self):
317
start = self.first_range_start
318
f.seek(start + 40) # Past the first range but before the second
319
# Now the file is positioned at the second range start (100)
320
self.assertRaises(errors.InvalidRange, f.seek, start + 41)
322
def test_seek_at_range_end(self):
323
"""Test seek behavior at range end."""
329
def test_read_at_range_end(self):
331
self.assertEquals(self.alpha, f.read())
332
self.assertEquals(self.alpha, f.read())
333
self.assertEquals(self.alpha.upper(), f.read())
334
self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
337
class TestRangeFileVarious(tests.TestCase):
338
"""Tests RangeFile aspects not covered elsewhere."""
340
def test_seek_whence(self):
341
"""Test the seek whence parameter values."""
342
f = response.RangeFile('foo', StringIO('abc'))
347
self.assertRaises(ValueError, f.seek, 0, 14)
349
def test_range_syntax(self):
350
"""Test the Content-Range scanning."""
352
f = response.RangeFile('foo', StringIO())
354
def ok(expected, header_value):
355
f.set_range_from_header(header_value)
356
# Slightly peek under the covers to get the size
357
self.assertEquals(expected, (f.tell(), f._size))
359
ok((1, 10), 'bytes 1-10/11')
360
ok((1, 10), 'bytes 1-10/*')
361
ok((12, 2), '\tbytes 12-13/*')
362
ok((28, 1), ' bytes 28-28/*')
363
ok((2123, 2120), 'bytes 2123-4242/12310')
364
ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
366
def nok(header_value):
367
self.assertRaises(errors.InvalidHttpRange,
368
f.set_range_from_header, header_value)
372
nok('bytes xx-yyy/zzz')
373
nok('bytes xx-12/zzz')
374
nok('bytes 11-yy/zzz')
378
# Taken from real request responses
379
_full_text_response = (200, """HTTP/1.1 200 OK\r
380
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
381
Server: Apache/2.0.54 (Fedora)\r
382
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
383
ETag: "56691-23-38e9ae00"\r
384
Accept-Ranges: bytes\r
387
Content-Type: text/plain; charset=UTF-8\r
389
""", """Bazaar-NG meta directory, format 1
393
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
394
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
395
Server: Apache/2.0.54 (Fedora)\r
396
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
397
ETag: "238a3c-16ec2-805c5540"\r
398
Accept-Ranges: bytes\r
399
Content-Length: 100\r
400
Content-Range: bytes 100-199/93890\r
402
Content-Type: text/plain; charset=UTF-8\r
404
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
405
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
408
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
409
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
410
Server: Apache/2.0.54 (Fedora)\r
411
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
412
ETag: "238a3c-16ec2-805c5540"\r
413
Accept-Ranges: bytes\r
414
Content-Length: 100\r
415
Content-Range: bytes 100-199/93890\r
418
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
419
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
422
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
423
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
424
Server: Apache/2.0.54 (Fedora)\r
425
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
426
ETag: "238a3c-16ec2-805c5540"\r
427
Accept-Ranges: bytes\r
428
Content-Length: 1534\r
430
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
432
\r""", """--418470f848b63279b\r
433
Content-type: text/plain; charset=UTF-8\r
434
Content-range: bytes 0-254/93890\r
436
mbp@sourcefrog.net-20050309040815-13242001617e4a06
437
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627
438
mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8
439
mbp@sourcefrog.net-20050309041501-c840e09071de3b67
440
mbp@sourcefrog.net-20050309044615-c24a3250be83220a
442
--418470f848b63279b\r
443
Content-type: text/plain; charset=UTF-8\r
444
Content-range: bytes 1000-2049/93890\r
447
mbp@sourcefrog.net-20050311063625-07858525021f270b
448
mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9
449
mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a
450
mbp@sourcefrog.net-20050311232353-f5e33da490872c6a
451
mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0
452
mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb
453
mbp@sourcefrog.net-20050312073831-a47c3335ece1920f
454
mbp@sourcefrog.net-20050312085412-13373aa129ccbad3
455
mbp@sourcefrog.net-20050313052251-2bf004cb96b39933
456
mbp@sourcefrog.net-20050313052856-3edd84094687cb11
457
mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d
458
mbp@sourcefrog.net-20050313053853-7c64085594ff3072
459
mbp@sourcefrog.net-20050313054757-a86c3f5871069e22
460
mbp@sourcefrog.net-20050313061422-418f1f73b94879b9
461
mbp@sourcefrog.net-20050313120651-497bd231b19df600
462
mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a
463
mbp@sourcefrog.net-20050314025438-d52099f915fe65fc
464
mbp@sourcefrog.net-20050314025539-637a636692c055cf
465
mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba
466
mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62
468
--418470f848b63279b--\r
472
_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
473
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
474
Server: Apache/2.2.2 (Unix) DAV/2\r
475
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
476
Accept-Ranges: bytes\r
477
Content-Type: multipart/byteranges; boundary="squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196"\r
478
Content-Length: 598\r
479
X-Cache: MISS from localhost.localdomain\r
480
X-Cache-Lookup: HIT from localhost.localdomain:3128\r
481
Proxy-Connection: keep-alive\r
485
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
486
Content-Type: text/plain\r
487
Content-Range: bytes 0-99/18672\r
491
scott@netsplit.com-20050708230047-47c7868f276b939f fulltext 0 863 :
493
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
494
Content-Type: text/plain\r
495
Content-Range: bytes 300-499/18672\r
497
com-20050708231537-2b124b835395399a :
498
scott@netsplit.com-20050820234126-551311dbb7435b51 line-delta 1803 479 .scott@netsplit.com-20050820232911-dc4322a084eadf7e :
499
scott@netsplit.com-20050821213706-c86\r
500
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196--\r
505
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
506
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
507
Server: Apache/2.0.54 (Fedora)\r
508
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
509
ETag: "56691-23-38e9ae00"\r
510
Accept-Ranges: bytes\r
514
""", """Bazaar-NG meta directory, format 1
518
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
519
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
520
Server: Apache/2.0.54 (Fedora)\r
521
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
522
ETag: "56691-23-38e9ae00"\r
523
Accept-Ranges: bytes\r
525
Content-Type: text/plain; charset=UTF-8\r
527
""", """Bazaar-NG meta directory, format 1
531
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
532
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
533
Server: Apache/2.0.54 (Fedora)\r
534
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
535
ETag: "238a3c-16ec2-805c5540"\r
536
Accept-Ranges: bytes\r
537
Content-Length: 100\r
540
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
541
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
544
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
545
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
546
Server: Apache/2.0.54 (Fedora)\r
547
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
548
ETag: "238a3c-16ec2-805c5540"\r
549
Accept-Ranges: bytes\r
550
Content-Length: 100\r
551
Content-Range: bytes 100-199/93890\r
553
Content-Type: text/plain; charset=UTF-8\r
555
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
558
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
559
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
561
Content-Type: text/html; charset=iso-8859-1\r
563
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
565
<title>404 Not Found</title>
568
<p>I don't know what I'm doing</p>
574
_multipart_no_content_range = (206, """HTTP/1.0 206 Partial Content\r
575
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
576
Content-Length: 598\r
581
Content-Type: text/plain\r
588
_multipart_no_boundary = (206, """HTTP/1.0 206 Partial Content\r
589
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
590
Content-Length: 598\r
595
Content-Type: text/plain\r
596
Content-Range: bytes 0-18/18672\r
600
The range ended at the line above, this text is garbage instead of a boundary
605
class TestHandleResponse(tests.TestCase):
607
def _build_HTTPMessage(self, raw_headers):
608
status_and_headers = StringIO(raw_headers)
609
# Get rid of the status line
610
status_and_headers.readline()
611
msg = httplib.HTTPMessage(status_and_headers)
614
def get_response(self, a_response):
615
"""Process a supplied response, and return the result."""
616
code, raw_headers, body = a_response
617
msg = self._build_HTTPMessage(raw_headers)
618
return response.handle_response('http://foo', code, msg,
619
StringIO(a_response[2]))
621
def test_full_text(self):
622
out = self.get_response(_full_text_response)
623
# It is a StringIO from the original data
624
self.assertEqual(_full_text_response[2], out.read())
626
def test_single_range(self):
627
out = self.get_response(_single_range_response)
630
self.assertEqual(_single_range_response[2], out.read(100))
632
def test_single_range_no_content(self):
633
out = self.get_response(_single_range_no_content_type)
636
self.assertEqual(_single_range_no_content_type[2], out.read(100))
638
def test_single_range_truncated(self):
639
out = self.get_response(_single_range_response_truncated)
640
# Content-Range declares 100 but only 51 present
641
self.assertRaises(errors.ShortReadvError, out.seek, out.tell() + 51)
643
def test_multi_range(self):
644
out = self.get_response(_multipart_range_response)
646
# Just make sure we can read the right contents
653
def test_multi_squid_range(self):
654
out = self.get_response(_multipart_squid_range_response)
656
# Just make sure we can read the right contents
663
def test_invalid_response(self):
664
self.assertRaises(errors.InvalidHttpResponse,
665
self.get_response, _invalid_response)
667
def test_full_text_no_content_type(self):
668
# We should not require Content-Type for a full response
669
code, raw_headers, body = _full_text_response_no_content_type
670
msg = self._build_HTTPMessage(raw_headers)
671
out = response.handle_response('http://foo', code, msg, StringIO(body))
672
self.assertEqual(body, out.read())
674
def test_full_text_no_content_length(self):
675
code, raw_headers, body = _full_text_response_no_content_length
676
msg = self._build_HTTPMessage(raw_headers)
677
out = response.handle_response('http://foo', code, msg, StringIO(body))
678
self.assertEqual(body, out.read())
680
def test_missing_content_range(self):
681
code, raw_headers, body = _single_range_no_content_range
682
msg = self._build_HTTPMessage(raw_headers)
683
self.assertRaises(errors.InvalidHttpResponse,
684
response.handle_response,
685
'http://bogus', code, msg, StringIO(body))
687
def test_multipart_no_content_range(self):
688
code, raw_headers, body = _multipart_no_content_range
689
msg = self._build_HTTPMessage(raw_headers)
690
self.assertRaises(errors.InvalidHttpResponse,
691
response.handle_response,
692
'http://bogus', code, msg, StringIO(body))
694
def test_multipart_no_boundary(self):
695
out = self.get_response(_multipart_no_boundary)
696
out.read() # Read the whole range
697
# Fail to find the boundary line
698
self.assertRaises(errors.InvalidHttpResponse, out.seek, 1, 1)