1
# Copyright (C) 2006-2010 Canonical Ltd
1
# Copyright (C) 2006-2010, 2012, 2013, 2016 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
37
37
InvalidHttpResponse.
40
from cStringIO import StringIO
41
import http.client as http_client
42
parse_headers = http_client.parse_headers
43
except ImportError: # python < 3
44
import httplib as http_client
45
parse_headers = http_client.HTTPMessage
47
from bzrlib.transport.http import (
51
from ..sixish import (
54
from ..transport.http import (
51
from bzrlib.tests.file_utils import (
58
from .file_utils import (
57
64
"""A socket-like object that can be given a predefined content."""
59
66
def __init__(self, data):
60
self.readfile = StringIO(data)
67
self.readfile = BytesIO(data)
62
69
def makefile(self, mode='r', bufsize=None):
63
70
return self.readfile
85
class TestResponseFileIter(tests.TestCase):
87
def test_iter_empty(self):
88
f = response.ResponseFile('empty', BytesIO())
89
self.assertEqual([], list(f))
91
def test_iter_many(self):
92
f = response.ResponseFile('many', BytesIO(b'0\n1\nboo!\n'))
93
self.assertEqual([b'0\n', b'1\n', b'boo!\n'], list(f))
78
96
class TestHTTPConnection(tests.TestCase):
80
98
def test_cleanup_pipe(self):
81
sock = ReadSocket("""HTTP/1.1 200 OK\r
99
sock = ReadSocket(b"""HTTP/1.1 200 OK\r
82
100
Content-Type: text/plain; charset=UTF-8\r
83
101
Content-Length: 18
92
110
# Now, get the response
93
111
resp = conn.getresponse()
94
112
# Read part of the response
95
self.assertEquals('0123456789\n', resp.read(11))
113
self.assertEqual(b'0123456789\n', resp.read(11))
96
114
# Override the thresold to force the warning emission
97
conn._range_warning_thresold = 6 # There are 7 bytes pending
115
conn._range_warning_thresold = 6 # There are 7 bytes pending
98
116
conn.cleanup_pipe()
99
117
self.assertContainsRe(self.get_log(), 'Got a 200 response when asking')
106
124
# which offsets are easy to calculate for test writers. It's used as a
107
125
# building block with slight variations but basically 'a' is the first char
108
126
# of the range and 'z' is the last.
109
alpha = 'abcdefghijklmnopqrstuvwxyz'
127
alpha = b'abcdefghijklmnopqrstuvwxyz'
111
129
def test_can_read_at_first_access(self):
112
130
"""Test that the just created file can be read."""
113
self.assertEquals(self.alpha, self._file.read())
131
self.assertEqual(self.alpha, self._file.read())
115
133
def test_seek_read(self):
116
134
"""Test seek/read inside the range."""
118
136
start = self.first_range_start
119
137
# Before any use, tell() should be at the range start
120
self.assertEquals(start, f.tell())
121
cur = start # For an overall offset assertion
138
self.assertEqual(start, f.tell())
139
cur = start # For an overall offset assertion
122
140
f.seek(start + 3)
124
self.assertEquals('def', f.read(3))
142
self.assertEqual(b'def', f.read(3))
125
143
cur += len('def')
128
self.assertEquals('klmn', f.read(4))
146
self.assertEqual(b'klmn', f.read(4))
129
147
cur += len('klmn')
130
148
# read(0) in the middle of a range
131
self.assertEquals('', f.read(0))
149
self.assertEqual(b'', f.read(0))
135
self.assertEquals(here, f.tell())
136
self.assertEquals(cur, f.tell())
153
self.assertEqual(here, f.tell())
154
self.assertEqual(cur, f.tell())
138
156
def test_read_zero(self):
140
start = self.first_range_start
141
self.assertEquals('', f.read(0))
158
self.assertEqual(b'', f.read(0))
143
self.assertEquals('', f.read(0))
160
self.assertEqual(b'', f.read(0))
145
162
def test_seek_at_range_end(self):
149
166
def test_read_at_range_end(self):
150
167
"""Test read behaviour at range end."""
152
self.assertEquals(self.alpha, f.read())
153
self.assertEquals('', f.read(0))
169
self.assertEqual(self.alpha, f.read())
170
self.assertEqual(b'', f.read(0))
154
171
self.assertRaises(errors.InvalidRange, f.read, 1)
156
173
def test_unbounded_read_after_seek(self):
159
176
# Should not cross ranges
160
self.assertEquals('yz', f.read())
177
self.assertEqual(b'yz', f.read())
162
179
def test_seek_backwards(self):
183
200
self.assertRaises(errors.InvalidRange, f.read, 10)
185
202
def test_seek_from_end(self):
186
"""Test seeking from the end of the file.
188
The semantic is unclear in case of multiple ranges. Seeking from end
189
exists only for the http transports, cannot be used if the file size is
190
unknown and is not used in bzrlib itself. This test must be (and is)
191
overridden by daughter classes.
193
Reading from end makes sense only when a range has been requested from
194
the end of the file (see HttpTransportBase._get() when using the
195
'tail_amount' parameter). The HTTP response can only be a whole file or
200
self.assertEquals('yz', f.read())
203
"""Test seeking from the end of the file.
205
The semantic is unclear in case of multiple ranges. Seeking from end
206
exists only for the http transports, cannot be used if the file size is
207
unknown and is not used in breezy itself. This test must be (and is)
208
overridden by daughter classes.
210
Reading from end makes sense only when a range has been requested from
211
the end of the file (see HttpTransportBase._get() when using the
212
'tail_amount' parameter). The HTTP response can only be a whole file or
217
self.assertEqual(b'yz', f.read())
203
220
class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
207
224
super(TestRangeFileSizeUnknown, self).setUp()
208
225
self._file = response.RangeFile('Whole_file_size_known',
209
StringIO(self.alpha))
210
227
# We define no range, relying on RangeFile to provide default values
211
self.first_range_start = 0 # It's the whole file
228
self.first_range_start = 0 # It's the whole file
213
230
def test_seek_from_end(self):
214
231
"""See TestRangeFileMixin.test_seek_from_end.
220
237
def test_read_at_range_end(self):
221
238
"""Test read behaviour at range end."""
223
self.assertEquals(self.alpha, f.read())
224
self.assertEquals('', f.read(0))
225
self.assertEquals('', f.read(1))
240
self.assertEqual(self.alpha, f.read())
241
self.assertEqual(b'', f.read(0))
242
self.assertEqual(b'', f.read(1))
228
245
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
232
249
super(TestRangeFileSizeKnown, self).setUp()
233
250
self._file = response.RangeFile('Whole_file_size_known',
234
StringIO(self.alpha))
235
252
self._file.set_range(0, len(self.alpha))
236
self.first_range_start = 0 # It's the whole file
253
self.first_range_start = 0 # It's the whole file
239
256
class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
243
260
super(TestRangeFileSingleRange, self).setUp()
244
261
self._file = response.RangeFile('Single_range_file',
245
StringIO(self.alpha))
246
263
self.first_range_start = 15
247
264
self._file.set_range(self.first_range_start, len(self.alpha))
250
266
def test_read_before_range(self):
251
267
# This can't occur under normal circumstances, we have to force it
253
f._pos = 0 # Force an invalid pos
269
f._pos = 0 # Force an invalid pos
254
270
self.assertRaises(errors.InvalidRange, f.read, 2)
271
287
# in HTTP response headers and the boundary lines that separate
272
288
# multipart content.
274
boundary = "separation"
290
boundary = b"separation"
277
293
super(TestRangeFileMultipleRanges, self).setUp()
279
295
boundary = self.boundary
282
298
self.first_range_start = 25
283
file_size = 200 # big enough to encompass all ranges
299
file_size = 200 # big enough to encompass all ranges
284
300
for (start, part) in [(self.first_range_start, self.alpha),
285
301
# Two contiguous ranges
286
302
(100, self.alpha),
291
307
content += self._boundary_line()
293
309
self._file = response.RangeFile('Multiple_ranges_file',
295
311
self.set_file_boundary()
297
313
def _boundary_line(self):
298
314
"""Helper to build the formatted boundary line."""
299
return '--' + self.boundary + '\r\n'
315
return b'--' + self.boundary + b'\r\n'
301
317
def set_file_boundary(self):
302
318
# Ranges are set by decoding the range headers, the RangeFile user is
305
321
# which is part of the Content-Type header).
306
322
self._file.set_boundary(self.boundary)
308
def _multipart_byterange(self, data, offset, boundary, file_size='*'):
324
def _multipart_byterange(self, data, offset, boundary, file_size=b'*'):
309
325
"""Encode a part of a file as a multipart/byterange MIME type.
311
327
When a range request is issued, the HTTP response body can be
327
343
# A range is described by a set of headers, but only 'Content-Range' is
328
344
# required for our implementation (TestHandleResponse below will
329
345
# exercise ranges with multiple or missing headers')
330
range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
346
if isinstance(file_size, int):
347
file_size = b'%d' % file_size
348
range += b'Content-Range: bytes %d-%d/%s\r\n' % (offset,
334
353
# Finally the raw bytes
338
357
def test_read_all_ranges(self):
340
self.assertEquals(self.alpha, f.read()) # Read first range
341
f.seek(100) # Trigger the second range recognition
342
self.assertEquals(self.alpha, f.read()) # Read second range
343
self.assertEquals(126, f.tell())
344
f.seek(126) # Start of third range which is also the current pos !
345
self.assertEquals('A', f.read(1))
359
self.assertEqual(self.alpha, f.read()) # Read first range
360
f.seek(100) # Trigger the second range recognition
361
self.assertEqual(self.alpha, f.read()) # Read second range
362
self.assertEqual(126, f.tell())
363
f.seek(126) # Start of third range which is also the current pos !
364
self.assertEqual(b'A', f.read(1))
347
self.assertEquals('LMN', f.read(3))
366
self.assertEqual(b'LMN', f.read(3))
349
368
def test_seek_from_end(self):
350
369
"""See TestRangeFileMixin.test_seek_from_end."""
357
self.assertEquals('yz', f.read())
376
self.assertEqual(b'yz', f.read())
358
377
self.assertRaises(errors.InvalidRange, f.seek, -2, 2)
360
379
def test_seek_into_void(self):
372
391
def test_seek_across_ranges(self):
374
start = self.first_range_start
375
f.seek(126) # skip the two first ranges
376
self.assertEquals('AB', f.read(2))
393
f.seek(126) # skip the two first ranges
394
self.assertEqual(b'AB', f.read(2))
378
396
def test_checked_read_dont_overflow_buffers(self):
380
start = self.first_range_start
381
398
# We force a very low value to exercise all code paths in _checked_read
382
399
f._discarded_buf_size = 8
383
f.seek(126) # skip the two first ranges
384
self.assertEquals('AB', f.read(2))
400
f.seek(126) # skip the two first ranges
401
self.assertEqual(b'AB', f.read(2))
386
403
def test_seek_twice_between_ranges(self):
388
405
start = self.first_range_start
389
f.seek(start + 40) # Past the first range but before the second
406
f.seek(start + 40) # Past the first range but before the second
390
407
# Now the file is positioned at the second range start (100)
391
408
self.assertRaises(errors.InvalidRange, f.seek, start + 41)
400
417
def test_read_at_range_end(self):
402
self.assertEquals(self.alpha, f.read())
403
self.assertEquals(self.alpha, f.read())
404
self.assertEquals(self.alpha.upper(), f.read())
419
self.assertEqual(self.alpha, f.read())
420
self.assertEqual(self.alpha, f.read())
421
self.assertEqual(self.alpha.upper(), f.read())
405
422
self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
423
440
# The boundary as it appears in boundary lines
424
441
# IIS 6 and 7 use this value
425
_boundary_trimmed = "q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
426
boundary = '<' + _boundary_trimmed + '>'
442
_boundary_trimmed = b"q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
443
boundary = b'<' + _boundary_trimmed + b'>'
428
445
def set_file_boundary(self):
429
446
# Emulate broken rfc822.unquote() here by removing angles
445
462
def test_range_syntax(self):
446
463
"""Test the Content-Range scanning."""
448
f = response.RangeFile('foo', StringIO())
465
f = response.RangeFile('foo', BytesIO())
450
467
def ok(expected, header_value):
451
468
f.set_range_from_header(header_value)
452
469
# Slightly peek under the covers to get the size
453
self.assertEquals(expected, (f.tell(), f._size))
470
self.assertEqual(expected, (f.tell(), f._size))
455
472
ok((1, 10), 'bytes 1-10/11')
456
473
ok((1, 10), 'bytes 1-10/*')
457
474
ok((12, 2), '\tbytes 12-13/*')
458
475
ok((28, 1), ' bytes 28-28/*')
459
476
ok((2123, 2120), 'bytes 2123-4242/12310')
460
ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
477
ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
462
479
def nok(header_value):
463
480
self.assertRaises(errors.InvalidHttpRange,
474
491
# Taken from real request responses
475
_full_text_response = (200, """HTTP/1.1 200 OK\r
492
_full_text_response = (200, b"""HTTP/1.1 200 OK\r
476
493
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
477
494
Server: Apache/2.0.54 (Fedora)\r
478
495
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
482
499
Connection: close\r
483
500
Content-Type: text/plain; charset=UTF-8\r
485
""", """Bazaar-NG meta directory, format 1
502
""", b"""Bazaar-NG meta directory, format 1
489
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
506
_single_range_response = (206, b"""HTTP/1.1 206 Partial Content\r
490
507
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
491
508
Server: Apache/2.0.54 (Fedora)\r
492
509
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
497
514
Connection: close\r
498
515
Content-Type: text/plain; charset=UTF-8\r
500
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
517
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
501
518
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
504
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
521
_single_range_no_content_type = (206, b"""HTTP/1.1 206 Partial Content\r
505
522
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
506
523
Server: Apache/2.0.54 (Fedora)\r
507
524
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
511
528
Content-Range: bytes 100-199/93890\r
512
529
Connection: close\r
514
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
531
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
515
532
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
518
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
535
_multipart_range_response = (206, b"""HTTP/1.1 206 Partial Content\r
519
536
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
520
537
Server: Apache/2.0.54 (Fedora)\r
521
538
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
525
542
Connection: close\r
526
543
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
528
\r""", """--418470f848b63279b\r
545
\r""", b"""--418470f848b63279b\r
529
546
Content-type: text/plain; charset=UTF-8\r
530
547
Content-range: bytes 0-254/93890\r
568
_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
585
_multipart_squid_range_response = (206, b"""HTTP/1.0 206 Partial Content\r
569
586
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
570
587
Server: Apache/2.2.2 (Unix) DAV/2\r
571
588
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
600
617
# This is made up
601
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
618
_full_text_response_no_content_type = (200, b"""HTTP/1.1 200 OK\r
602
619
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
603
620
Server: Apache/2.0.54 (Fedora)\r
604
621
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
607
624
Content-Length: 35\r
608
625
Connection: close\r
610
""", """Bazaar-NG meta directory, format 1
627
""", b"""Bazaar-NG meta directory, format 1
614
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
631
_full_text_response_no_content_length = (200, b"""HTTP/1.1 200 OK\r
615
632
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
616
633
Server: Apache/2.0.54 (Fedora)\r
617
634
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
620
637
Connection: close\r
621
638
Content-Type: text/plain; charset=UTF-8\r
623
""", """Bazaar-NG meta directory, format 1
640
""", b"""Bazaar-NG meta directory, format 1
627
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
644
_single_range_no_content_range = (206, b"""HTTP/1.1 206 Partial Content\r
628
645
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
629
646
Server: Apache/2.0.54 (Fedora)\r
630
647
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
633
650
Content-Length: 100\r
634
651
Connection: close\r
636
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
653
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
637
654
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
640
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
657
_single_range_response_truncated = (206, b"""HTTP/1.1 206 Partial Content\r
641
658
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
642
659
Server: Apache/2.0.54 (Fedora)\r
643
660
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
648
665
Connection: close\r
649
666
Content-Type: text/plain; charset=UTF-8\r
651
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
654
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
668
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
671
_invalid_response = (444, b"""HTTP/1.1 444 Bad Response\r
655
672
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
656
673
Connection: close\r
657
674
Content-Type: text/html; charset=iso-8859-1\r
659
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
676
""", b"""<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
661
678
<title>404 Not Found</title>
684
_multipart_no_boundary = (206, """HTTP/1.0 206 Partial Content\r
701
_multipart_no_boundary = (206, b"""HTTP/1.0 206 Partial Content\r
685
702
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
686
703
Content-Length: 598\r
690
707
--THIS_SEPARATES\r
691
708
Content-Type: text/plain\r
692
709
Content-Range: bytes 0-18/18672\r
701
718
class TestHandleResponse(tests.TestCase):
703
720
def _build_HTTPMessage(self, raw_headers):
704
status_and_headers = StringIO(raw_headers)
721
status_and_headers = BytesIO(raw_headers)
705
722
# Get rid of the status line
706
723
status_and_headers.readline()
707
msg = httplib.HTTPMessage(status_and_headers)
724
msg = parse_headers(status_and_headers)
710
727
def get_response(self, a_response):
712
729
code, raw_headers, body = a_response
713
730
msg = self._build_HTTPMessage(raw_headers)
714
731
return response.handle_response('http://foo', code, msg,
715
StringIO(a_response[2]))
732
BytesIO(a_response[2]))
717
734
def test_full_text(self):
718
735
out = self.get_response(_full_text_response)
719
# It is a StringIO from the original data
736
# It is a BytesIO from the original data
720
737
self.assertEqual(_full_text_response[2], out.read())
722
739
def test_single_range(self):
764
781
# We should not require Content-Type for a full response
765
782
code, raw_headers, body = _full_text_response_no_content_type
766
783
msg = self._build_HTTPMessage(raw_headers)
767
out = response.handle_response('http://foo', code, msg, StringIO(body))
784
out = response.handle_response('http://foo', code, msg, BytesIO(body))
768
785
self.assertEqual(body, out.read())
770
787
def test_full_text_no_content_length(self):
771
788
code, raw_headers, body = _full_text_response_no_content_length
772
789
msg = self._build_HTTPMessage(raw_headers)
773
out = response.handle_response('http://foo', code, msg, StringIO(body))
790
out = response.handle_response('http://foo', code, msg, BytesIO(body))
774
791
self.assertEqual(body, out.read())
776
793
def test_missing_content_range(self):
778
795
msg = self._build_HTTPMessage(raw_headers)
779
796
self.assertRaises(errors.InvalidHttpResponse,
780
797
response.handle_response,
781
'http://bogus', code, msg, StringIO(body))
798
'http://bogus', code, msg, BytesIO(body))
783
800
def test_multipart_no_content_range(self):
784
801
code, raw_headers, body = _multipart_no_content_range
785
802
msg = self._build_HTTPMessage(raw_headers)
786
803
self.assertRaises(errors.InvalidHttpResponse,
787
804
response.handle_response,
788
'http://bogus', code, msg, StringIO(body))
805
'http://bogus', code, msg, BytesIO(body))
790
807
def test_multipart_no_boundary(self):
791
808
out = self.get_response(_multipart_no_boundary)
803
tests.TestCase.setUp(self)
820
super(TestRangeFileSizeReadLimited, self).setUp()
804
821
# create a test datablock larger than _max_read_size.
805
822
chunk_size = response.RangeFile._max_read_size
806
test_pattern = '0123456789ABCDEF'
807
self.test_data = test_pattern * (3 * chunk_size / len(test_pattern))
823
test_pattern = b'0123456789ABCDEF'
824
self.test_data = test_pattern * (3 * chunk_size // len(test_pattern))
808
825
self.test_data_len = len(self.test_data)
810
827
def test_max_read_size(self):