37
37
InvalidHttpResponse.
41
import http.client as http_client
42
except ImportError: # python < 3 without future
43
import httplib as http_client
46
parse_headers = http_client.parse_headers
47
except AttributeError: # python 2
48
parse_headers = http_client.HTTPMessage
40
from cStringIO import StringIO
54
from ..sixish import (
58
from ..transport.http import (
47
from bzrlib.transport.http import (
62
from .file_utils import (
51
from bzrlib.tests.file_utils import (
68
57
"""A socket-like object that can be given a predefined content."""
70
59
def __init__(self, data):
71
self.readfile = BytesIO(data)
60
self.readfile = StringIO(data)
73
62
def makefile(self, mode='r', bufsize=None):
74
63
return self.readfile
77
class FakeHTTPConnection(HTTPConnection):
66
class FakeHTTPConnection(_urllib2_wrappers.HTTPConnection):
79
68
def __init__(self, sock):
80
HTTPConnection.__init__(self, 'localhost')
69
_urllib2_wrappers.HTTPConnection.__init__(self, 'localhost')
81
70
# Set the socket to bypass the connection
89
class TestResponseFileIter(tests.TestCase):
91
def test_iter_empty(self):
92
f = response.ResponseFile('empty', BytesIO())
93
self.assertEqual([], list(f))
95
def test_iter_many(self):
96
f = response.ResponseFile('many', BytesIO(b'0\n1\nboo!\n'))
97
self.assertEqual([b'0\n', b'1\n', b'boo!\n'], list(f))
100
78
class TestHTTPConnection(tests.TestCase):
102
80
def test_cleanup_pipe(self):
103
sock = ReadSocket(b"""HTTP/1.1 200 OK\r
81
sock = ReadSocket("""HTTP/1.1 200 OK\r
104
82
Content-Type: text/plain; charset=UTF-8\r
105
83
Content-Length: 18
114
92
# Now, get the response
115
93
resp = conn.getresponse()
116
94
# Read part of the response
117
self.assertEqual(b'0123456789\n', resp.read(11))
95
self.assertEquals('0123456789\n', resp.read(11))
118
96
# Override the thresold to force the warning emission
119
conn._range_warning_thresold = 6 # There are 7 bytes pending
97
conn._range_warning_thresold = 6 # There are 7 bytes pending
120
98
conn.cleanup_pipe()
121
99
self.assertContainsRe(self.get_log(), 'Got a 200 response when asking')
128
106
# which offsets are easy to calculate for test writers. It's used as a
129
107
# building block with slight variations but basically 'a' is the first char
130
108
# of the range and 'z' is the last.
131
alpha = b'abcdefghijklmnopqrstuvwxyz'
109
alpha = 'abcdefghijklmnopqrstuvwxyz'
133
111
def test_can_read_at_first_access(self):
134
112
"""Test that the just created file can be read."""
135
self.assertEqual(self.alpha, self._file.read())
113
self.assertEquals(self.alpha, self._file.read())
137
115
def test_seek_read(self):
138
116
"""Test seek/read inside the range."""
140
118
start = self.first_range_start
141
119
# Before any use, tell() should be at the range start
142
self.assertEqual(start, f.tell())
143
cur = start # For an overall offset assertion
120
self.assertEquals(start, f.tell())
121
cur = start # For an overall offset assertion
144
122
f.seek(start + 3)
146
self.assertEqual(b'def', f.read(3))
124
self.assertEquals('def', f.read(3))
147
125
cur += len('def')
150
self.assertEqual(b'klmn', f.read(4))
128
self.assertEquals('klmn', f.read(4))
151
129
cur += len('klmn')
152
130
# read(0) in the middle of a range
153
self.assertEqual(b'', f.read(0))
131
self.assertEquals('', f.read(0))
157
self.assertEqual(here, f.tell())
158
self.assertEqual(cur, f.tell())
135
self.assertEquals(here, f.tell())
136
self.assertEquals(cur, f.tell())
160
138
def test_read_zero(self):
162
self.assertEqual(b'', f.read(0))
140
start = self.first_range_start
141
self.assertEquals('', f.read(0))
164
self.assertEqual(b'', f.read(0))
143
self.assertEquals('', f.read(0))
166
145
def test_seek_at_range_end(self):
170
149
def test_read_at_range_end(self):
171
150
"""Test read behaviour at range end."""
173
self.assertEqual(self.alpha, f.read())
174
self.assertEqual(b'', f.read(0))
152
self.assertEquals(self.alpha, f.read())
153
self.assertEquals('', f.read(0))
175
154
self.assertRaises(errors.InvalidRange, f.read, 1)
177
156
def test_unbounded_read_after_seek(self):
180
159
# Should not cross ranges
181
self.assertEqual(b'yz', f.read())
160
self.assertEquals('yz', f.read())
183
162
def test_seek_backwards(self):
204
183
self.assertRaises(errors.InvalidRange, f.read, 10)
206
185
def test_seek_from_end(self):
207
"""Test seeking from the end of the file.
209
The semantic is unclear in case of multiple ranges. Seeking from end
210
exists only for the http transports, cannot be used if the file size is
211
unknown and is not used in breezy itself. This test must be (and is)
212
overridden by daughter classes.
214
Reading from end makes sense only when a range has been requested from
215
the end of the file (see HttpTransportBase._get() when using the
216
'tail_amount' parameter). The HTTP response can only be a whole file or
221
self.assertEqual(b'yz', f.read())
186
"""Test seeking from the end of the file.
188
The semantic is unclear in case of multiple ranges. Seeking from end
189
exists only for the http transports, cannot be used if the file size is
190
unknown and is not used in bzrlib itself. This test must be (and is)
191
overridden by daughter classes.
193
Reading from end makes sense only when a range has been requested from
194
the end of the file (see HttpTransportBase._get() when using the
195
'tail_amount' parameter). The HTTP response can only be a whole file or
200
self.assertEquals('yz', f.read())
224
203
class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
228
207
super(TestRangeFileSizeUnknown, self).setUp()
229
208
self._file = response.RangeFile('Whole_file_size_known',
209
StringIO(self.alpha))
231
210
# We define no range, relying on RangeFile to provide default values
232
self.first_range_start = 0 # It's the whole file
211
self.first_range_start = 0 # It's the whole file
234
213
def test_seek_from_end(self):
235
214
"""See TestRangeFileMixin.test_seek_from_end.
241
220
def test_read_at_range_end(self):
242
221
"""Test read behaviour at range end."""
244
self.assertEqual(self.alpha, f.read())
245
self.assertEqual(b'', f.read(0))
246
self.assertEqual(b'', f.read(1))
223
self.assertEquals(self.alpha, f.read())
224
self.assertEquals('', f.read(0))
225
self.assertEquals('', f.read(1))
249
228
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
253
232
super(TestRangeFileSizeKnown, self).setUp()
254
233
self._file = response.RangeFile('Whole_file_size_known',
234
StringIO(self.alpha))
256
235
self._file.set_range(0, len(self.alpha))
257
self.first_range_start = 0 # It's the whole file
236
self.first_range_start = 0 # It's the whole file
260
239
class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
264
243
super(TestRangeFileSingleRange, self).setUp()
265
244
self._file = response.RangeFile('Single_range_file',
245
StringIO(self.alpha))
267
246
self.first_range_start = 15
268
247
self._file.set_range(self.first_range_start, len(self.alpha))
270
250
def test_read_before_range(self):
271
251
# This can't occur under normal circumstances, we have to force it
273
f._pos = 0 # Force an invalid pos
253
f._pos = 0 # Force an invalid pos
274
254
self.assertRaises(errors.InvalidRange, f.read, 2)
291
271
# in HTTP response headers and the boundary lines that separate
292
272
# multipart content.
294
boundary = b"separation"
274
boundary = "separation"
297
277
super(TestRangeFileMultipleRanges, self).setUp()
299
279
boundary = self.boundary
302
282
self.first_range_start = 25
303
file_size = 200 # big enough to encompass all ranges
283
file_size = 200 # big enough to encompass all ranges
304
284
for (start, part) in [(self.first_range_start, self.alpha),
305
285
# Two contiguous ranges
306
286
(100, self.alpha),
311
291
content += self._boundary_line()
313
293
self._file = response.RangeFile('Multiple_ranges_file',
315
295
self.set_file_boundary()
317
297
def _boundary_line(self):
318
298
"""Helper to build the formatted boundary line."""
319
return b'--' + self.boundary + b'\r\n'
299
return '--' + self.boundary + '\r\n'
321
301
def set_file_boundary(self):
322
302
# Ranges are set by decoding the range headers, the RangeFile user is
325
305
# which is part of the Content-Type header).
326
306
self._file.set_boundary(self.boundary)
328
def _multipart_byterange(self, data, offset, boundary, file_size=b'*'):
308
def _multipart_byterange(self, data, offset, boundary, file_size='*'):
329
309
"""Encode a part of a file as a multipart/byterange MIME type.
331
311
When a range request is issued, the HTTP response body can be
347
327
# A range is described by a set of headers, but only 'Content-Range' is
348
328
# required for our implementation (TestHandleResponse below will
349
329
# exercise ranges with multiple or missing headers')
350
if isinstance(file_size, int):
351
file_size = b'%d' % file_size
352
range += b'Content-Range: bytes %d-%d/%s\r\n' % (offset,
330
range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
357
334
# Finally the raw bytes
361
338
def test_read_all_ranges(self):
363
self.assertEqual(self.alpha, f.read()) # Read first range
364
f.seek(100) # Trigger the second range recognition
365
self.assertEqual(self.alpha, f.read()) # Read second range
366
self.assertEqual(126, f.tell())
367
f.seek(126) # Start of third range which is also the current pos !
368
self.assertEqual(b'A', f.read(1))
340
self.assertEquals(self.alpha, f.read()) # Read first range
341
f.seek(100) # Trigger the second range recognition
342
self.assertEquals(self.alpha, f.read()) # Read second range
343
self.assertEquals(126, f.tell())
344
f.seek(126) # Start of third range which is also the current pos !
345
self.assertEquals('A', f.read(1))
370
self.assertEqual(b'LMN', f.read(3))
347
self.assertEquals('LMN', f.read(3))
372
349
def test_seek_from_end(self):
373
350
"""See TestRangeFileMixin.test_seek_from_end."""
395
372
def test_seek_across_ranges(self):
397
f.seek(126) # skip the two first ranges
398
self.assertEqual(b'AB', f.read(2))
374
start = self.first_range_start
375
f.seek(126) # skip the two first ranges
376
self.assertEquals('AB', f.read(2))
400
378
def test_checked_read_dont_overflow_buffers(self):
380
start = self.first_range_start
402
381
# We force a very low value to exercise all code paths in _checked_read
403
382
f._discarded_buf_size = 8
404
f.seek(126) # skip the two first ranges
405
self.assertEqual(b'AB', f.read(2))
383
f.seek(126) # skip the two first ranges
384
self.assertEquals('AB', f.read(2))
407
386
def test_seek_twice_between_ranges(self):
409
388
start = self.first_range_start
410
f.seek(start + 40) # Past the first range but before the second
389
f.seek(start + 40) # Past the first range but before the second
411
390
# Now the file is positioned at the second range start (100)
412
391
self.assertRaises(errors.InvalidRange, f.seek, start + 41)
421
400
def test_read_at_range_end(self):
423
self.assertEqual(self.alpha, f.read())
424
self.assertEqual(self.alpha, f.read())
425
self.assertEqual(self.alpha.upper(), f.read())
402
self.assertEquals(self.alpha, f.read())
403
self.assertEquals(self.alpha, f.read())
404
self.assertEquals(self.alpha.upper(), f.read())
426
405
self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
444
423
# The boundary as it appears in boundary lines
445
424
# IIS 6 and 7 use this value
446
_boundary_trimmed = b"q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
447
boundary = b'<' + _boundary_trimmed + b'>'
425
_boundary_trimmed = "q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
426
boundary = '<' + _boundary_trimmed + '>'
449
428
def set_file_boundary(self):
450
429
# Emulate broken rfc822.unquote() here by removing angles
466
445
def test_range_syntax(self):
467
446
"""Test the Content-Range scanning."""
469
f = response.RangeFile('foo', BytesIO())
448
f = response.RangeFile('foo', StringIO())
471
450
def ok(expected, header_value):
472
451
f.set_range_from_header(header_value)
473
452
# Slightly peek under the covers to get the size
474
self.assertEqual(expected, (f.tell(), f._size))
453
self.assertEquals(expected, (f.tell(), f._size))
476
455
ok((1, 10), 'bytes 1-10/11')
477
456
ok((1, 10), 'bytes 1-10/*')
478
457
ok((12, 2), '\tbytes 12-13/*')
479
458
ok((28, 1), ' bytes 28-28/*')
480
459
ok((2123, 2120), 'bytes 2123-4242/12310')
481
ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
460
ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
483
462
def nok(header_value):
484
463
self.assertRaises(errors.InvalidHttpRange,
495
474
# Taken from real request responses
496
_full_text_response = (200, b"""HTTP/1.1 200 OK\r
475
_full_text_response = (200, """HTTP/1.1 200 OK\r
497
476
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
498
477
Server: Apache/2.0.54 (Fedora)\r
499
478
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
503
482
Connection: close\r
504
483
Content-Type: text/plain; charset=UTF-8\r
506
""", b"""Bazaar-NG meta directory, format 1
485
""", """Bazaar-NG meta directory, format 1
510
_single_range_response = (206, b"""HTTP/1.1 206 Partial Content\r
489
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
511
490
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
512
491
Server: Apache/2.0.54 (Fedora)\r
513
492
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
518
497
Connection: close\r
519
498
Content-Type: text/plain; charset=UTF-8\r
521
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
500
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
522
501
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
525
_single_range_no_content_type = (206, b"""HTTP/1.1 206 Partial Content\r
504
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
526
505
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
527
506
Server: Apache/2.0.54 (Fedora)\r
528
507
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
532
511
Content-Range: bytes 100-199/93890\r
533
512
Connection: close\r
535
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
514
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
536
515
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
539
_multipart_range_response = (206, b"""HTTP/1.1 206 Partial Content\r
518
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
540
519
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
541
520
Server: Apache/2.0.54 (Fedora)\r
542
521
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
628
607
Content-Length: 35\r
629
608
Connection: close\r
631
""", b"""Bazaar-NG meta directory, format 1
610
""", """Bazaar-NG meta directory, format 1
635
_full_text_response_no_content_length = (200, b"""HTTP/1.1 200 OK\r
614
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
636
615
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
637
616
Server: Apache/2.0.54 (Fedora)\r
638
617
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
641
620
Connection: close\r
642
621
Content-Type: text/plain; charset=UTF-8\r
644
""", b"""Bazaar-NG meta directory, format 1
623
""", """Bazaar-NG meta directory, format 1
648
_single_range_no_content_range = (206, b"""HTTP/1.1 206 Partial Content\r
627
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
649
628
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
650
629
Server: Apache/2.0.54 (Fedora)\r
651
630
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
654
633
Content-Length: 100\r
655
634
Connection: close\r
657
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
636
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
658
637
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
661
_single_range_response_truncated = (206, b"""HTTP/1.1 206 Partial Content\r
640
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
662
641
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
663
642
Server: Apache/2.0.54 (Fedora)\r
664
643
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
669
648
Connection: close\r
670
649
Content-Type: text/plain; charset=UTF-8\r
672
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
675
_invalid_response = (444, b"""HTTP/1.1 444 Bad Response\r
651
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
654
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
676
655
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
677
656
Connection: close\r
678
657
Content-Type: text/html; charset=iso-8859-1\r
680
""", b"""<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
659
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
682
661
<title>404 Not Found</title>
722
701
class TestHandleResponse(tests.TestCase):
724
703
def _build_HTTPMessage(self, raw_headers):
725
status_and_headers = BytesIO(raw_headers)
704
status_and_headers = StringIO(raw_headers)
726
705
# Get rid of the status line
727
706
status_and_headers.readline()
728
msg = parse_headers(status_and_headers)
707
msg = httplib.HTTPMessage(status_and_headers)
734
710
def get_response(self, a_response):
735
711
"""Process a supplied response, and return the result."""
736
712
code, raw_headers, body = a_response
737
getheader = self._build_HTTPMessage(raw_headers)
738
return response.handle_response(
739
'http://foo', code, getheader, BytesIO(a_response[2]))
713
msg = self._build_HTTPMessage(raw_headers)
714
return response.handle_response('http://foo', code, msg,
715
StringIO(a_response[2]))
741
717
def test_full_text(self):
742
718
out = self.get_response(_full_text_response)
743
# It is a BytesIO from the original data
719
# It is a StringIO from the original data
744
720
self.assertEqual(_full_text_response[2], out.read())
746
722
def test_single_range(self):
787
763
def test_full_text_no_content_type(self):
788
764
# We should not require Content-Type for a full response
789
765
code, raw_headers, body = _full_text_response_no_content_type
790
getheader = self._build_HTTPMessage(raw_headers)
791
out = response.handle_response(
792
'http://foo', code, getheader, BytesIO(body))
766
msg = self._build_HTTPMessage(raw_headers)
767
out = response.handle_response('http://foo', code, msg, StringIO(body))
793
768
self.assertEqual(body, out.read())
795
770
def test_full_text_no_content_length(self):
796
771
code, raw_headers, body = _full_text_response_no_content_length
797
getheader = self._build_HTTPMessage(raw_headers)
798
out = response.handle_response(
799
'http://foo', code, getheader, BytesIO(body))
772
msg = self._build_HTTPMessage(raw_headers)
773
out = response.handle_response('http://foo', code, msg, StringIO(body))
800
774
self.assertEqual(body, out.read())
802
776
def test_missing_content_range(self):
803
777
code, raw_headers, body = _single_range_no_content_range
804
getheader = self._build_HTTPMessage(raw_headers)
778
msg = self._build_HTTPMessage(raw_headers)
805
779
self.assertRaises(errors.InvalidHttpResponse,
806
780
response.handle_response,
807
'http://bogus', code, getheader, BytesIO(body))
781
'http://bogus', code, msg, StringIO(body))
809
783
def test_multipart_no_content_range(self):
810
784
code, raw_headers, body = _multipart_no_content_range
811
getheader = self._build_HTTPMessage(raw_headers)
785
msg = self._build_HTTPMessage(raw_headers)
812
786
self.assertRaises(errors.InvalidHttpResponse,
813
787
response.handle_response,
814
'http://bogus', code, getheader, BytesIO(body))
788
'http://bogus', code, msg, StringIO(body))
816
790
def test_multipart_no_boundary(self):
817
791
out = self.get_response(_multipart_no_boundary)
829
super(TestRangeFileSizeReadLimited, self).setUp()
803
tests.TestCase.setUp(self)
830
804
# create a test datablock larger than _max_read_size.
831
805
chunk_size = response.RangeFile._max_read_size
832
test_pattern = b'0123456789ABCDEF'
833
self.test_data = test_pattern * (3 * chunk_size // len(test_pattern))
806
test_pattern = '0123456789ABCDEF'
807
self.test_data = test_pattern * (3 * chunk_size / len(test_pattern))
834
808
self.test_data_len = len(self.test_data)
836
810
def test_max_read_size(self):