37
37
InvalidHttpResponse.
40
from cStringIO import StringIO
41
import http.client as http_client
42
except ImportError: # python < 3 without future
43
import httplib as http_client
46
parse_headers = http_client.parse_headers
47
except AttributeError: # python 2
48
parse_headers = http_client.HTTPMessage
47
from bzrlib.transport.http import (
54
from ..sixish import (
58
from ..transport.http import (
51
from bzrlib.tests.file_utils import (
62
from .file_utils import (
57
68
"""A socket-like object that can be given a predefined content."""
59
70
def __init__(self, data):
60
self.readfile = StringIO(data)
71
self.readfile = BytesIO(data)
62
73
def makefile(self, mode='r', bufsize=None):
63
74
return self.readfile
66
class FakeHTTPConnection(_urllib2_wrappers.HTTPConnection):
77
class FakeHTTPConnection(HTTPConnection):
68
79
def __init__(self, sock):
69
_urllib2_wrappers.HTTPConnection.__init__(self, 'localhost')
80
HTTPConnection.__init__(self, 'localhost')
70
81
# Set the socket to bypass the connection
89
class TestResponseFileIter(tests.TestCase):
91
def test_iter_empty(self):
92
f = response.ResponseFile('empty', BytesIO())
93
self.assertEqual([], list(f))
95
def test_iter_many(self):
96
f = response.ResponseFile('many', BytesIO(b'0\n1\nboo!\n'))
97
self.assertEqual([b'0\n', b'1\n', b'boo!\n'], list(f))
78
100
class TestHTTPConnection(tests.TestCase):
80
102
def test_cleanup_pipe(self):
81
sock = ReadSocket("""HTTP/1.1 200 OK\r
103
sock = ReadSocket(b"""HTTP/1.1 200 OK\r
82
104
Content-Type: text/plain; charset=UTF-8\r
83
105
Content-Length: 18
92
114
# Now, get the response
93
115
resp = conn.getresponse()
94
116
# Read part of the response
95
self.assertEquals('0123456789\n', resp.read(11))
117
self.assertEqual(b'0123456789\n', resp.read(11))
96
118
# Override the thresold to force the warning emission
97
conn._range_warning_thresold = 6 # There are 7 bytes pending
119
conn._range_warning_thresold = 6 # There are 7 bytes pending
98
120
conn.cleanup_pipe()
99
121
self.assertContainsRe(self.get_log(), 'Got a 200 response when asking')
106
128
# which offsets are easy to calculate for test writers. It's used as a
107
129
# building block with slight variations but basically 'a' is the first char
108
130
# of the range and 'z' is the last.
109
alpha = 'abcdefghijklmnopqrstuvwxyz'
131
alpha = b'abcdefghijklmnopqrstuvwxyz'
111
133
def test_can_read_at_first_access(self):
112
134
"""Test that the just created file can be read."""
113
self.assertEquals(self.alpha, self._file.read())
135
self.assertEqual(self.alpha, self._file.read())
115
137
def test_seek_read(self):
116
138
"""Test seek/read inside the range."""
118
140
start = self.first_range_start
119
141
# Before any use, tell() should be at the range start
120
self.assertEquals(start, f.tell())
121
cur = start # For an overall offset assertion
142
self.assertEqual(start, f.tell())
143
cur = start # For an overall offset assertion
122
144
f.seek(start + 3)
124
self.assertEquals('def', f.read(3))
146
self.assertEqual(b'def', f.read(3))
125
147
cur += len('def')
128
self.assertEquals('klmn', f.read(4))
150
self.assertEqual(b'klmn', f.read(4))
129
151
cur += len('klmn')
130
152
# read(0) in the middle of a range
131
self.assertEquals('', f.read(0))
153
self.assertEqual(b'', f.read(0))
135
self.assertEquals(here, f.tell())
136
self.assertEquals(cur, f.tell())
157
self.assertEqual(here, f.tell())
158
self.assertEqual(cur, f.tell())
138
160
def test_read_zero(self):
140
start = self.first_range_start
141
self.assertEquals('', f.read(0))
162
self.assertEqual(b'', f.read(0))
143
self.assertEquals('', f.read(0))
164
self.assertEqual(b'', f.read(0))
145
166
def test_seek_at_range_end(self):
149
170
def test_read_at_range_end(self):
150
171
"""Test read behaviour at range end."""
152
self.assertEquals(self.alpha, f.read())
153
self.assertEquals('', f.read(0))
173
self.assertEqual(self.alpha, f.read())
174
self.assertEqual(b'', f.read(0))
154
175
self.assertRaises(errors.InvalidRange, f.read, 1)
156
177
def test_unbounded_read_after_seek(self):
159
180
# Should not cross ranges
160
self.assertEquals('yz', f.read())
181
self.assertEqual(b'yz', f.read())
162
183
def test_seek_backwards(self):
183
204
self.assertRaises(errors.InvalidRange, f.read, 10)
185
206
def test_seek_from_end(self):
186
"""Test seeking from the end of the file.
188
The semantic is unclear in case of multiple ranges. Seeking from end
189
exists only for the http transports, cannot be used if the file size is
190
unknown and is not used in bzrlib itself. This test must be (and is)
191
overridden by daughter classes.
193
Reading from end makes sense only when a range has been requested from
194
the end of the file (see HttpTransportBase._get() when using the
195
'tail_amount' parameter). The HTTP response can only be a whole file or
200
self.assertEquals('yz', f.read())
207
"""Test seeking from the end of the file.
209
The semantic is unclear in case of multiple ranges. Seeking from end
210
exists only for the http transports, cannot be used if the file size is
211
unknown and is not used in breezy itself. This test must be (and is)
212
overridden by daughter classes.
214
Reading from end makes sense only when a range has been requested from
215
the end of the file (see HttpTransportBase._get() when using the
216
'tail_amount' parameter). The HTTP response can only be a whole file or
221
self.assertEqual(b'yz', f.read())
203
224
class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
207
228
super(TestRangeFileSizeUnknown, self).setUp()
208
229
self._file = response.RangeFile('Whole_file_size_known',
209
StringIO(self.alpha))
210
231
# We define no range, relying on RangeFile to provide default values
211
self.first_range_start = 0 # It's the whole file
232
self.first_range_start = 0 # It's the whole file
213
234
def test_seek_from_end(self):
214
235
"""See TestRangeFileMixin.test_seek_from_end.
220
241
def test_read_at_range_end(self):
221
242
"""Test read behaviour at range end."""
223
self.assertEquals(self.alpha, f.read())
224
self.assertEquals('', f.read(0))
225
self.assertEquals('', f.read(1))
244
self.assertEqual(self.alpha, f.read())
245
self.assertEqual(b'', f.read(0))
246
self.assertEqual(b'', f.read(1))
228
249
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
232
253
super(TestRangeFileSizeKnown, self).setUp()
233
254
self._file = response.RangeFile('Whole_file_size_known',
234
StringIO(self.alpha))
235
256
self._file.set_range(0, len(self.alpha))
236
self.first_range_start = 0 # It's the whole file
257
self.first_range_start = 0 # It's the whole file
239
260
class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
243
264
super(TestRangeFileSingleRange, self).setUp()
244
265
self._file = response.RangeFile('Single_range_file',
245
StringIO(self.alpha))
246
267
self.first_range_start = 15
247
268
self._file.set_range(self.first_range_start, len(self.alpha))
250
270
def test_read_before_range(self):
251
271
# This can't occur under normal circumstances, we have to force it
253
f._pos = 0 # Force an invalid pos
273
f._pos = 0 # Force an invalid pos
254
274
self.assertRaises(errors.InvalidRange, f.read, 2)
271
291
# in HTTP response headers and the boundary lines that separate
272
292
# multipart content.
274
boundary = "separation"
294
boundary = b"separation"
277
297
super(TestRangeFileMultipleRanges, self).setUp()
279
299
boundary = self.boundary
282
302
self.first_range_start = 25
283
file_size = 200 # big enough to encompass all ranges
303
file_size = 200 # big enough to encompass all ranges
284
304
for (start, part) in [(self.first_range_start, self.alpha),
285
305
# Two contiguous ranges
286
306
(100, self.alpha),
291
311
content += self._boundary_line()
293
313
self._file = response.RangeFile('Multiple_ranges_file',
295
315
self.set_file_boundary()
297
317
def _boundary_line(self):
298
318
"""Helper to build the formatted boundary line."""
299
return '--' + self.boundary + '\r\n'
319
return b'--' + self.boundary + b'\r\n'
301
321
def set_file_boundary(self):
302
322
# Ranges are set by decoding the range headers, the RangeFile user is
305
325
# which is part of the Content-Type header).
306
326
self._file.set_boundary(self.boundary)
308
def _multipart_byterange(self, data, offset, boundary, file_size='*'):
328
def _multipart_byterange(self, data, offset, boundary, file_size=b'*'):
309
329
"""Encode a part of a file as a multipart/byterange MIME type.
311
331
When a range request is issued, the HTTP response body can be
327
347
# A range is described by a set of headers, but only 'Content-Range' is
328
348
# required for our implementation (TestHandleResponse below will
329
349
# exercise ranges with multiple or missing headers')
330
range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
350
if isinstance(file_size, int):
351
file_size = b'%d' % file_size
352
range += b'Content-Range: bytes %d-%d/%s\r\n' % (offset,
334
357
# Finally the raw bytes
338
361
def test_read_all_ranges(self):
340
self.assertEquals(self.alpha, f.read()) # Read first range
341
f.seek(100) # Trigger the second range recognition
342
self.assertEquals(self.alpha, f.read()) # Read second range
343
self.assertEquals(126, f.tell())
344
f.seek(126) # Start of third range which is also the current pos !
345
self.assertEquals('A', f.read(1))
363
self.assertEqual(self.alpha, f.read()) # Read first range
364
f.seek(100) # Trigger the second range recognition
365
self.assertEqual(self.alpha, f.read()) # Read second range
366
self.assertEqual(126, f.tell())
367
f.seek(126) # Start of third range which is also the current pos !
368
self.assertEqual(b'A', f.read(1))
347
self.assertEquals('LMN', f.read(3))
370
self.assertEqual(b'LMN', f.read(3))
349
372
def test_seek_from_end(self):
350
373
"""See TestRangeFileMixin.test_seek_from_end."""
372
395
def test_seek_across_ranges(self):
374
start = self.first_range_start
375
f.seek(126) # skip the two first ranges
376
self.assertEquals('AB', f.read(2))
397
f.seek(126) # skip the two first ranges
398
self.assertEqual(b'AB', f.read(2))
378
400
def test_checked_read_dont_overflow_buffers(self):
380
start = self.first_range_start
381
402
# We force a very low value to exercise all code paths in _checked_read
382
403
f._discarded_buf_size = 8
383
f.seek(126) # skip the two first ranges
384
self.assertEquals('AB', f.read(2))
404
f.seek(126) # skip the two first ranges
405
self.assertEqual(b'AB', f.read(2))
386
407
def test_seek_twice_between_ranges(self):
388
409
start = self.first_range_start
389
f.seek(start + 40) # Past the first range but before the second
410
f.seek(start + 40) # Past the first range but before the second
390
411
# Now the file is positioned at the second range start (100)
391
412
self.assertRaises(errors.InvalidRange, f.seek, start + 41)
400
421
def test_read_at_range_end(self):
402
self.assertEquals(self.alpha, f.read())
403
self.assertEquals(self.alpha, f.read())
404
self.assertEquals(self.alpha.upper(), f.read())
423
self.assertEqual(self.alpha, f.read())
424
self.assertEqual(self.alpha, f.read())
425
self.assertEqual(self.alpha.upper(), f.read())
405
426
self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
423
444
# The boundary as it appears in boundary lines
424
445
# IIS 6 and 7 use this value
425
_boundary_trimmed = "q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
426
boundary = '<' + _boundary_trimmed + '>'
446
_boundary_trimmed = b"q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
447
boundary = b'<' + _boundary_trimmed + b'>'
428
449
def set_file_boundary(self):
429
450
# Emulate broken rfc822.unquote() here by removing angles
445
466
def test_range_syntax(self):
446
467
"""Test the Content-Range scanning."""
448
f = response.RangeFile('foo', StringIO())
469
f = response.RangeFile('foo', BytesIO())
450
471
def ok(expected, header_value):
451
472
f.set_range_from_header(header_value)
452
473
# Slightly peek under the covers to get the size
453
self.assertEquals(expected, (f.tell(), f._size))
474
self.assertEqual(expected, (f.tell(), f._size))
455
476
ok((1, 10), 'bytes 1-10/11')
456
477
ok((1, 10), 'bytes 1-10/*')
457
478
ok((12, 2), '\tbytes 12-13/*')
458
479
ok((28, 1), ' bytes 28-28/*')
459
480
ok((2123, 2120), 'bytes 2123-4242/12310')
460
ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
481
ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
462
483
def nok(header_value):
463
484
self.assertRaises(errors.InvalidHttpRange,
474
495
# Taken from real request responses
475
_full_text_response = (200, """HTTP/1.1 200 OK\r
496
_full_text_response = (200, b"""HTTP/1.1 200 OK\r
476
497
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
477
498
Server: Apache/2.0.54 (Fedora)\r
478
499
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
482
503
Connection: close\r
483
504
Content-Type: text/plain; charset=UTF-8\r
485
""", """Bazaar-NG meta directory, format 1
506
""", b"""Bazaar-NG meta directory, format 1
489
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
510
_single_range_response = (206, b"""HTTP/1.1 206 Partial Content\r
490
511
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
491
512
Server: Apache/2.0.54 (Fedora)\r
492
513
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
497
518
Connection: close\r
498
519
Content-Type: text/plain; charset=UTF-8\r
500
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
521
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
501
522
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
504
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
525
_single_range_no_content_type = (206, b"""HTTP/1.1 206 Partial Content\r
505
526
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
506
527
Server: Apache/2.0.54 (Fedora)\r
507
528
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
511
532
Content-Range: bytes 100-199/93890\r
512
533
Connection: close\r
514
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
535
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
515
536
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
518
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
539
_multipart_range_response = (206, b"""HTTP/1.1 206 Partial Content\r
519
540
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
520
541
Server: Apache/2.0.54 (Fedora)\r
521
542
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
607
628
Content-Length: 35\r
608
629
Connection: close\r
610
""", """Bazaar-NG meta directory, format 1
631
""", b"""Bazaar-NG meta directory, format 1
614
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
635
_full_text_response_no_content_length = (200, b"""HTTP/1.1 200 OK\r
615
636
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
616
637
Server: Apache/2.0.54 (Fedora)\r
617
638
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
620
641
Connection: close\r
621
642
Content-Type: text/plain; charset=UTF-8\r
623
""", """Bazaar-NG meta directory, format 1
644
""", b"""Bazaar-NG meta directory, format 1
627
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
648
_single_range_no_content_range = (206, b"""HTTP/1.1 206 Partial Content\r
628
649
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
629
650
Server: Apache/2.0.54 (Fedora)\r
630
651
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
633
654
Content-Length: 100\r
634
655
Connection: close\r
636
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
657
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
637
658
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
640
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
661
_single_range_response_truncated = (206, b"""HTTP/1.1 206 Partial Content\r
641
662
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
642
663
Server: Apache/2.0.54 (Fedora)\r
643
664
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
648
669
Connection: close\r
649
670
Content-Type: text/plain; charset=UTF-8\r
651
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
654
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
672
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
675
_invalid_response = (444, b"""HTTP/1.1 444 Bad Response\r
655
676
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
656
677
Connection: close\r
657
678
Content-Type: text/html; charset=iso-8859-1\r
659
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
680
""", b"""<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
661
682
<title>404 Not Found</title>
701
722
class TestHandleResponse(tests.TestCase):
703
724
def _build_HTTPMessage(self, raw_headers):
704
status_and_headers = StringIO(raw_headers)
725
status_and_headers = BytesIO(raw_headers)
705
726
# Get rid of the status line
706
727
status_and_headers.readline()
707
msg = httplib.HTTPMessage(status_and_headers)
728
msg = parse_headers(status_and_headers)
710
734
def get_response(self, a_response):
711
735
"""Process a supplied response, and return the result."""
712
736
code, raw_headers, body = a_response
713
msg = self._build_HTTPMessage(raw_headers)
714
return response.handle_response('http://foo', code, msg,
715
StringIO(a_response[2]))
737
getheader = self._build_HTTPMessage(raw_headers)
738
return response.handle_response(
739
'http://foo', code, getheader, BytesIO(a_response[2]))
717
741
def test_full_text(self):
718
742
out = self.get_response(_full_text_response)
719
# It is a StringIO from the original data
743
# It is a BytesIO from the original data
720
744
self.assertEqual(_full_text_response[2], out.read())
722
746
def test_single_range(self):
763
787
def test_full_text_no_content_type(self):
764
788
# We should not require Content-Type for a full response
765
789
code, raw_headers, body = _full_text_response_no_content_type
766
msg = self._build_HTTPMessage(raw_headers)
767
out = response.handle_response('http://foo', code, msg, StringIO(body))
790
getheader = self._build_HTTPMessage(raw_headers)
791
out = response.handle_response(
792
'http://foo', code, getheader, BytesIO(body))
768
793
self.assertEqual(body, out.read())
770
795
def test_full_text_no_content_length(self):
771
796
code, raw_headers, body = _full_text_response_no_content_length
772
msg = self._build_HTTPMessage(raw_headers)
773
out = response.handle_response('http://foo', code, msg, StringIO(body))
797
getheader = self._build_HTTPMessage(raw_headers)
798
out = response.handle_response(
799
'http://foo', code, getheader, BytesIO(body))
774
800
self.assertEqual(body, out.read())
776
802
def test_missing_content_range(self):
777
803
code, raw_headers, body = _single_range_no_content_range
778
msg = self._build_HTTPMessage(raw_headers)
804
getheader = self._build_HTTPMessage(raw_headers)
779
805
self.assertRaises(errors.InvalidHttpResponse,
780
806
response.handle_response,
781
'http://bogus', code, msg, StringIO(body))
807
'http://bogus', code, getheader, BytesIO(body))
783
809
def test_multipart_no_content_range(self):
784
810
code, raw_headers, body = _multipart_no_content_range
785
msg = self._build_HTTPMessage(raw_headers)
811
getheader = self._build_HTTPMessage(raw_headers)
786
812
self.assertRaises(errors.InvalidHttpResponse,
787
813
response.handle_response,
788
'http://bogus', code, msg, StringIO(body))
814
'http://bogus', code, getheader, BytesIO(body))
790
816
def test_multipart_no_boundary(self):
791
817
out = self.get_response(_multipart_no_boundary)
803
tests.TestCase.setUp(self)
829
super(TestRangeFileSizeReadLimited, self).setUp()
804
830
# create a test datablock larger than _max_read_size.
805
831
chunk_size = response.RangeFile._max_read_size
806
test_pattern = '0123456789ABCDEF'
807
self.test_data = test_pattern * (3 * chunk_size / len(test_pattern))
832
test_pattern = b'0123456789ABCDEF'
833
self.test_data = test_pattern * (3 * chunk_size // len(test_pattern))
808
834
self.test_data_len = len(self.test_data)
810
836
def test_max_read_size(self):