37
37
InvalidHttpResponse.
40
from cStringIO import StringIO
41
import http.client as http_client
42
except ImportError: # python < 3 without future
43
import httplib as http_client
46
parse_headers = http_client.parse_headers
47
except AttributeError: # python 2
48
parse_headers = http_client.HTTPMessage
47
from bzrlib.transport.http import (
54
from ..sixish import (
58
from ..transport.http import (
51
from bzrlib.tests.file_utils import (
62
from .file_utils import (
57
68
"""A socket-like object that can be given a predefined content."""
59
70
def __init__(self, data):
60
self.readfile = StringIO(data)
71
self.readfile = BytesIO(data)
62
73
def makefile(self, mode='r', bufsize=None):
63
74
return self.readfile
66
class FakeHTTPConnection(_urllib2_wrappers.HTTPConnection):
77
class FakeHTTPConnection(HTTPConnection):
68
79
def __init__(self, sock):
69
_urllib2_wrappers.HTTPConnection.__init__(self, 'localhost')
80
HTTPConnection.__init__(self, 'localhost')
70
81
# Set the socket to bypass the connection
89
class TestResponseFileIter(tests.TestCase):
91
def test_iter_empty(self):
92
f = response.ResponseFile('empty', BytesIO())
93
self.assertEqual([], list(f))
95
def test_iter_many(self):
96
f = response.ResponseFile('many', BytesIO(b'0\n1\nboo!\n'))
97
self.assertEqual([b'0\n', b'1\n', b'boo!\n'], list(f))
99
def test_readlines(self):
100
f = response.ResponseFile('many', BytesIO(b'0\n1\nboo!\n'))
101
self.assertEqual([b'0\n', b'1\n', b'boo!\n'], f.readlines())
78
104
class TestHTTPConnection(tests.TestCase):
80
106
def test_cleanup_pipe(self):
81
sock = ReadSocket("""HTTP/1.1 200 OK\r
107
sock = ReadSocket(b"""HTTP/1.1 200 OK\r
82
108
Content-Type: text/plain; charset=UTF-8\r
83
109
Content-Length: 18
92
118
# Now, get the response
93
119
resp = conn.getresponse()
94
120
# Read part of the response
95
self.assertEquals('0123456789\n', resp.read(11))
121
self.assertEqual(b'0123456789\n', resp.read(11))
96
122
# Override the thresold to force the warning emission
97
conn._range_warning_thresold = 6 # There are 7 bytes pending
123
conn._range_warning_thresold = 6 # There are 7 bytes pending
98
124
conn.cleanup_pipe()
99
125
self.assertContainsRe(self.get_log(), 'Got a 200 response when asking')
106
132
# which offsets are easy to calculate for test writers. It's used as a
107
133
# building block with slight variations but basically 'a' is the first char
108
134
# of the range and 'z' is the last.
109
alpha = 'abcdefghijklmnopqrstuvwxyz'
135
alpha = b'abcdefghijklmnopqrstuvwxyz'
111
137
def test_can_read_at_first_access(self):
112
138
"""Test that the just created file can be read."""
113
self.assertEquals(self.alpha, self._file.read())
139
self.assertEqual(self.alpha, self._file.read())
115
141
def test_seek_read(self):
116
142
"""Test seek/read inside the range."""
118
144
start = self.first_range_start
119
145
# Before any use, tell() should be at the range start
120
self.assertEquals(start, f.tell())
121
cur = start # For an overall offset assertion
146
self.assertEqual(start, f.tell())
147
cur = start # For an overall offset assertion
122
148
f.seek(start + 3)
124
self.assertEquals('def', f.read(3))
150
self.assertEqual(b'def', f.read(3))
125
151
cur += len('def')
128
self.assertEquals('klmn', f.read(4))
154
self.assertEqual(b'klmn', f.read(4))
129
155
cur += len('klmn')
130
156
# read(0) in the middle of a range
131
self.assertEquals('', f.read(0))
157
self.assertEqual(b'', f.read(0))
135
self.assertEquals(here, f.tell())
136
self.assertEquals(cur, f.tell())
161
self.assertEqual(here, f.tell())
162
self.assertEqual(cur, f.tell())
138
164
def test_read_zero(self):
140
start = self.first_range_start
141
self.assertEquals('', f.read(0))
166
self.assertEqual(b'', f.read(0))
143
self.assertEquals('', f.read(0))
168
self.assertEqual(b'', f.read(0))
145
170
def test_seek_at_range_end(self):
149
174
def test_read_at_range_end(self):
150
175
"""Test read behaviour at range end."""
152
self.assertEquals(self.alpha, f.read())
153
self.assertEquals('', f.read(0))
177
self.assertEqual(self.alpha, f.read())
178
self.assertEqual(b'', f.read(0))
154
179
self.assertRaises(errors.InvalidRange, f.read, 1)
156
181
def test_unbounded_read_after_seek(self):
159
184
# Should not cross ranges
160
self.assertEquals('yz', f.read())
185
self.assertEqual(b'yz', f.read())
162
187
def test_seek_backwards(self):
183
208
self.assertRaises(errors.InvalidRange, f.read, 10)
185
210
def test_seek_from_end(self):
186
"""Test seeking from the end of the file.
188
The semantic is unclear in case of multiple ranges. Seeking from end
189
exists only for the http transports, cannot be used if the file size is
190
unknown and is not used in bzrlib itself. This test must be (and is)
191
overridden by daughter classes.
193
Reading from end makes sense only when a range has been requested from
194
the end of the file (see HttpTransportBase._get() when using the
195
'tail_amount' parameter). The HTTP response can only be a whole file or
200
self.assertEquals('yz', f.read())
211
"""Test seeking from the end of the file.
213
The semantic is unclear in case of multiple ranges. Seeking from end
214
exists only for the http transports, cannot be used if the file size is
215
unknown and is not used in breezy itself. This test must be (and is)
216
overridden by daughter classes.
218
Reading from end makes sense only when a range has been requested from
219
the end of the file (see HttpTransportBase._get() when using the
220
'tail_amount' parameter). The HTTP response can only be a whole file or
225
self.assertEqual(b'yz', f.read())
203
228
class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
207
232
super(TestRangeFileSizeUnknown, self).setUp()
208
233
self._file = response.RangeFile('Whole_file_size_known',
209
StringIO(self.alpha))
210
235
# We define no range, relying on RangeFile to provide default values
211
self.first_range_start = 0 # It's the whole file
236
self.first_range_start = 0 # It's the whole file
213
238
def test_seek_from_end(self):
214
239
"""See TestRangeFileMixin.test_seek_from_end.
220
245
def test_read_at_range_end(self):
221
246
"""Test read behaviour at range end."""
223
self.assertEquals(self.alpha, f.read())
224
self.assertEquals('', f.read(0))
225
self.assertEquals('', f.read(1))
248
self.assertEqual(self.alpha, f.read())
249
self.assertEqual(b'', f.read(0))
250
self.assertEqual(b'', f.read(1))
228
253
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
232
257
super(TestRangeFileSizeKnown, self).setUp()
233
258
self._file = response.RangeFile('Whole_file_size_known',
234
StringIO(self.alpha))
235
260
self._file.set_range(0, len(self.alpha))
236
self.first_range_start = 0 # It's the whole file
261
self.first_range_start = 0 # It's the whole file
239
264
class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
243
268
super(TestRangeFileSingleRange, self).setUp()
244
269
self._file = response.RangeFile('Single_range_file',
245
StringIO(self.alpha))
246
271
self.first_range_start = 15
247
272
self._file.set_range(self.first_range_start, len(self.alpha))
250
274
def test_read_before_range(self):
251
275
# This can't occur under normal circumstances, we have to force it
253
f._pos = 0 # Force an invalid pos
277
f._pos = 0 # Force an invalid pos
254
278
self.assertRaises(errors.InvalidRange, f.read, 2)
271
295
# in HTTP response headers and the boundary lines that separate
272
296
# multipart content.
274
boundary = "separation"
298
boundary = b"separation"
277
301
super(TestRangeFileMultipleRanges, self).setUp()
279
303
boundary = self.boundary
282
306
self.first_range_start = 25
283
file_size = 200 # big enough to encompass all ranges
307
file_size = 200 # big enough to encompass all ranges
284
308
for (start, part) in [(self.first_range_start, self.alpha),
285
309
# Two contiguous ranges
286
310
(100, self.alpha),
291
315
content += self._boundary_line()
293
317
self._file = response.RangeFile('Multiple_ranges_file',
295
319
self.set_file_boundary()
297
321
def _boundary_line(self):
298
322
"""Helper to build the formatted boundary line."""
299
return '--' + self.boundary + '\r\n'
323
return b'--' + self.boundary + b'\r\n'
301
325
def set_file_boundary(self):
302
326
# Ranges are set by decoding the range headers, the RangeFile user is
305
329
# which is part of the Content-Type header).
306
330
self._file.set_boundary(self.boundary)
308
def _multipart_byterange(self, data, offset, boundary, file_size='*'):
332
def _multipart_byterange(self, data, offset, boundary, file_size=b'*'):
309
333
"""Encode a part of a file as a multipart/byterange MIME type.
311
335
When a range request is issued, the HTTP response body can be
327
351
# A range is described by a set of headers, but only 'Content-Range' is
328
352
# required for our implementation (TestHandleResponse below will
329
353
# exercise ranges with multiple or missing headers')
330
range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
354
if isinstance(file_size, int):
355
file_size = b'%d' % file_size
356
range += b'Content-Range: bytes %d-%d/%s\r\n' % (offset,
334
361
# Finally the raw bytes
338
365
def test_read_all_ranges(self):
340
self.assertEquals(self.alpha, f.read()) # Read first range
341
f.seek(100) # Trigger the second range recognition
342
self.assertEquals(self.alpha, f.read()) # Read second range
343
self.assertEquals(126, f.tell())
344
f.seek(126) # Start of third range which is also the current pos !
345
self.assertEquals('A', f.read(1))
367
self.assertEqual(self.alpha, f.read()) # Read first range
368
f.seek(100) # Trigger the second range recognition
369
self.assertEqual(self.alpha, f.read()) # Read second range
370
self.assertEqual(126, f.tell())
371
f.seek(126) # Start of third range which is also the current pos !
372
self.assertEqual(b'A', f.read(1))
347
self.assertEquals('LMN', f.read(3))
374
self.assertEqual(b'LMN', f.read(3))
349
376
def test_seek_from_end(self):
350
377
"""See TestRangeFileMixin.test_seek_from_end."""
372
399
def test_seek_across_ranges(self):
374
start = self.first_range_start
375
f.seek(126) # skip the two first ranges
376
self.assertEquals('AB', f.read(2))
401
f.seek(126) # skip the two first ranges
402
self.assertEqual(b'AB', f.read(2))
378
404
def test_checked_read_dont_overflow_buffers(self):
380
start = self.first_range_start
381
406
# We force a very low value to exercise all code paths in _checked_read
382
407
f._discarded_buf_size = 8
383
f.seek(126) # skip the two first ranges
384
self.assertEquals('AB', f.read(2))
408
f.seek(126) # skip the two first ranges
409
self.assertEqual(b'AB', f.read(2))
386
411
def test_seek_twice_between_ranges(self):
388
413
start = self.first_range_start
389
f.seek(start + 40) # Past the first range but before the second
414
f.seek(start + 40) # Past the first range but before the second
390
415
# Now the file is positioned at the second range start (100)
391
416
self.assertRaises(errors.InvalidRange, f.seek, start + 41)
400
425
def test_read_at_range_end(self):
402
self.assertEquals(self.alpha, f.read())
403
self.assertEquals(self.alpha, f.read())
404
self.assertEquals(self.alpha.upper(), f.read())
427
self.assertEqual(self.alpha, f.read())
428
self.assertEqual(self.alpha, f.read())
429
self.assertEqual(self.alpha.upper(), f.read())
405
430
self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
423
448
# The boundary as it appears in boundary lines
424
449
# IIS 6 and 7 use this value
425
_boundary_trimmed = "q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
426
boundary = '<' + _boundary_trimmed + '>'
450
_boundary_trimmed = b"q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
451
boundary = b'<' + _boundary_trimmed + b'>'
428
453
def set_file_boundary(self):
429
454
# Emulate broken rfc822.unquote() here by removing angles
445
470
def test_range_syntax(self):
446
471
"""Test the Content-Range scanning."""
448
f = response.RangeFile('foo', StringIO())
473
f = response.RangeFile('foo', BytesIO())
450
475
def ok(expected, header_value):
451
476
f.set_range_from_header(header_value)
452
477
# Slightly peek under the covers to get the size
453
self.assertEquals(expected, (f.tell(), f._size))
478
self.assertEqual(expected, (f.tell(), f._size))
455
480
ok((1, 10), 'bytes 1-10/11')
456
481
ok((1, 10), 'bytes 1-10/*')
457
482
ok((12, 2), '\tbytes 12-13/*')
458
483
ok((28, 1), ' bytes 28-28/*')
459
484
ok((2123, 2120), 'bytes 2123-4242/12310')
460
ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
485
ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
462
487
def nok(header_value):
463
488
self.assertRaises(errors.InvalidHttpRange,
474
499
# Taken from real request responses
475
_full_text_response = (200, """HTTP/1.1 200 OK\r
500
_full_text_response = (200, b"""HTTP/1.1 200 OK\r
476
501
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
477
502
Server: Apache/2.0.54 (Fedora)\r
478
503
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
482
507
Connection: close\r
483
508
Content-Type: text/plain; charset=UTF-8\r
485
""", """Bazaar-NG meta directory, format 1
510
""", b"""Bazaar-NG meta directory, format 1
489
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
514
_single_range_response = (206, b"""HTTP/1.1 206 Partial Content\r
490
515
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
491
516
Server: Apache/2.0.54 (Fedora)\r
492
517
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
497
522
Connection: close\r
498
523
Content-Type: text/plain; charset=UTF-8\r
500
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
525
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
501
526
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
504
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
529
_single_range_no_content_type = (206, b"""HTTP/1.1 206 Partial Content\r
505
530
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
506
531
Server: Apache/2.0.54 (Fedora)\r
507
532
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
511
536
Content-Range: bytes 100-199/93890\r
512
537
Connection: close\r
514
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
539
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
515
540
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
518
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
543
_multipart_range_response = (206, b"""HTTP/1.1 206 Partial Content\r
519
544
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
520
545
Server: Apache/2.0.54 (Fedora)\r
521
546
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
607
632
Content-Length: 35\r
608
633
Connection: close\r
610
""", """Bazaar-NG meta directory, format 1
635
""", b"""Bazaar-NG meta directory, format 1
614
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
639
_full_text_response_no_content_length = (200, b"""HTTP/1.1 200 OK\r
615
640
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
616
641
Server: Apache/2.0.54 (Fedora)\r
617
642
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
620
645
Connection: close\r
621
646
Content-Type: text/plain; charset=UTF-8\r
623
""", """Bazaar-NG meta directory, format 1
648
""", b"""Bazaar-NG meta directory, format 1
627
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
652
_single_range_no_content_range = (206, b"""HTTP/1.1 206 Partial Content\r
628
653
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
629
654
Server: Apache/2.0.54 (Fedora)\r
630
655
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
633
658
Content-Length: 100\r
634
659
Connection: close\r
636
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
661
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
637
662
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
640
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
665
_single_range_response_truncated = (206, b"""HTTP/1.1 206 Partial Content\r
641
666
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
642
667
Server: Apache/2.0.54 (Fedora)\r
643
668
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
648
673
Connection: close\r
649
674
Content-Type: text/plain; charset=UTF-8\r
651
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
654
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
676
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
679
_invalid_response = (444, b"""HTTP/1.1 444 Bad Response\r
655
680
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
656
681
Connection: close\r
657
682
Content-Type: text/html; charset=iso-8859-1\r
659
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
684
""", b"""<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
661
686
<title>404 Not Found</title>
701
726
class TestHandleResponse(tests.TestCase):
703
728
def _build_HTTPMessage(self, raw_headers):
704
status_and_headers = StringIO(raw_headers)
729
status_and_headers = BytesIO(raw_headers)
705
730
# Get rid of the status line
706
731
status_and_headers.readline()
707
msg = httplib.HTTPMessage(status_and_headers)
732
msg = parse_headers(status_and_headers)
710
738
def get_response(self, a_response):
711
739
"""Process a supplied response, and return the result."""
712
740
code, raw_headers, body = a_response
713
msg = self._build_HTTPMessage(raw_headers)
714
return response.handle_response('http://foo', code, msg,
715
StringIO(a_response[2]))
741
getheader = self._build_HTTPMessage(raw_headers)
742
return response.handle_response(
743
'http://foo', code, getheader, BytesIO(a_response[2]))
717
745
def test_full_text(self):
718
746
out = self.get_response(_full_text_response)
719
# It is a StringIO from the original data
747
# It is a BytesIO from the original data
720
748
self.assertEqual(_full_text_response[2], out.read())
722
750
def test_single_range(self):
763
791
def test_full_text_no_content_type(self):
764
792
# We should not require Content-Type for a full response
765
793
code, raw_headers, body = _full_text_response_no_content_type
766
msg = self._build_HTTPMessage(raw_headers)
767
out = response.handle_response('http://foo', code, msg, StringIO(body))
794
getheader = self._build_HTTPMessage(raw_headers)
795
out = response.handle_response(
796
'http://foo', code, getheader, BytesIO(body))
768
797
self.assertEqual(body, out.read())
770
799
def test_full_text_no_content_length(self):
771
800
code, raw_headers, body = _full_text_response_no_content_length
772
msg = self._build_HTTPMessage(raw_headers)
773
out = response.handle_response('http://foo', code, msg, StringIO(body))
801
getheader = self._build_HTTPMessage(raw_headers)
802
out = response.handle_response(
803
'http://foo', code, getheader, BytesIO(body))
774
804
self.assertEqual(body, out.read())
776
806
def test_missing_content_range(self):
777
807
code, raw_headers, body = _single_range_no_content_range
778
msg = self._build_HTTPMessage(raw_headers)
808
getheader = self._build_HTTPMessage(raw_headers)
779
809
self.assertRaises(errors.InvalidHttpResponse,
780
810
response.handle_response,
781
'http://bogus', code, msg, StringIO(body))
811
'http://bogus', code, getheader, BytesIO(body))
783
813
def test_multipart_no_content_range(self):
784
814
code, raw_headers, body = _multipart_no_content_range
785
msg = self._build_HTTPMessage(raw_headers)
815
getheader = self._build_HTTPMessage(raw_headers)
786
816
self.assertRaises(errors.InvalidHttpResponse,
787
817
response.handle_response,
788
'http://bogus', code, msg, StringIO(body))
818
'http://bogus', code, getheader, BytesIO(body))
790
820
def test_multipart_no_boundary(self):
791
821
out = self.get_response(_multipart_no_boundary)
803
tests.TestCase.setUp(self)
833
super(TestRangeFileSizeReadLimited, self).setUp()
804
834
# create a test datablock larger than _max_read_size.
805
835
chunk_size = response.RangeFile._max_read_size
806
test_pattern = '0123456789ABCDEF'
807
self.test_data = test_pattern * (3 * chunk_size / len(test_pattern))
836
test_pattern = b'0123456789ABCDEF'
837
self.test_data = test_pattern * (3 * chunk_size // len(test_pattern))
808
838
self.test_data_len = len(self.test_data)
810
840
def test_max_read_size(self):