1
# Copyright (C) 2006-2010 Canonical Ltd
1
# Copyright (C) 2006-2010, 2012, 2013, 2016 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
37
37
InvalidHttpResponse.
40
from cStringIO import StringIO
41
import http.client as http_client
42
except ImportError: # python < 3
43
import httplib as http_client
47
from bzrlib.transport.http import (
49
from ..sixish import (
52
from ..transport.http import (
51
from bzrlib.tests.file_utils import (
56
from .file_utils import (
57
62
"""A socket-like object that can be given a predefined content."""
59
64
def __init__(self, data):
60
self.readfile = StringIO(data)
65
self.readfile = BytesIO(data)
62
67
def makefile(self, mode='r', bufsize=None):
63
68
return self.readfile
83
class TestResponseFileIter(tests.TestCase):
85
def test_iter_empty(self):
86
f = response.ResponseFile('empty', BytesIO())
87
self.assertEqual([], list(f))
89
def test_iter_many(self):
90
f = response.ResponseFile('many', BytesIO(b'0\n1\nboo!\n'))
91
self.assertEqual([b'0\n', b'1\n', b'boo!\n'], list(f))
78
94
class TestHTTPConnection(tests.TestCase):
80
96
def test_cleanup_pipe(self):
81
sock = ReadSocket("""HTTP/1.1 200 OK\r
97
sock = ReadSocket(b"""HTTP/1.1 200 OK\r
82
98
Content-Type: text/plain; charset=UTF-8\r
92
108
# Now, get the response
93
109
resp = conn.getresponse()
94
110
# Read part of the response
95
self.assertEquals('0123456789\n', resp.read(11))
111
self.assertEqual(b'0123456789\n', resp.read(11))
96
112
# Override the thresold to force the warning emission
97
113
conn._range_warning_thresold = 6 # There are 7 bytes pending
98
114
conn.cleanup_pipe()
106
122
# which offsets are easy to calculate for test writers. It's used as a
107
123
# building block with slight variations but basically 'a' is the first char
108
124
# of the range and 'z' is the last.
109
alpha = 'abcdefghijklmnopqrstuvwxyz'
125
alpha = b'abcdefghijklmnopqrstuvwxyz'
111
127
def test_can_read_at_first_access(self):
112
128
"""Test that the just created file can be read."""
113
self.assertEquals(self.alpha, self._file.read())
129
self.assertEqual(self.alpha, self._file.read())
115
131
def test_seek_read(self):
116
132
"""Test seek/read inside the range."""
118
134
start = self.first_range_start
119
135
# Before any use, tell() should be at the range start
120
self.assertEquals(start, f.tell())
136
self.assertEqual(start, f.tell())
121
137
cur = start # For an overall offset assertion
122
138
f.seek(start + 3)
124
self.assertEquals('def', f.read(3))
140
self.assertEqual(b'def', f.read(3))
125
141
cur += len('def')
128
self.assertEquals('klmn', f.read(4))
144
self.assertEqual(b'klmn', f.read(4))
129
145
cur += len('klmn')
130
146
# read(0) in the middle of a range
131
self.assertEquals('', f.read(0))
147
self.assertEqual(b'', f.read(0))
135
self.assertEquals(here, f.tell())
136
self.assertEquals(cur, f.tell())
151
self.assertEqual(here, f.tell())
152
self.assertEqual(cur, f.tell())
138
154
def test_read_zero(self):
140
start = self.first_range_start
141
self.assertEquals('', f.read(0))
156
self.assertEqual(b'', f.read(0))
143
self.assertEquals('', f.read(0))
158
self.assertEqual(b'', f.read(0))
145
160
def test_seek_at_range_end(self):
149
164
def test_read_at_range_end(self):
150
165
"""Test read behaviour at range end."""
152
self.assertEquals(self.alpha, f.read())
153
self.assertEquals('', f.read(0))
167
self.assertEqual(self.alpha, f.read())
168
self.assertEqual(b'', f.read(0))
154
169
self.assertRaises(errors.InvalidRange, f.read, 1)
156
171
def test_unbounded_read_after_seek(self):
159
174
# Should not cross ranges
160
self.assertEquals('yz', f.read())
175
self.assertEqual(b'yz', f.read())
162
177
def test_seek_backwards(self):
188
203
The semantic is unclear in case of multiple ranges. Seeking from end
189
204
exists only for the http transports, cannot be used if the file size is
190
unknown and is not used in bzrlib itself. This test must be (and is)
205
unknown and is not used in breezy itself. This test must be (and is)
191
206
overridden by daughter classes.
193
208
Reading from end makes sense only when a range has been requested from
207
222
super(TestRangeFileSizeUnknown, self).setUp()
208
223
self._file = response.RangeFile('Whole_file_size_known',
209
StringIO(self.alpha))
210
225
# We define no range, relying on RangeFile to provide default values
211
226
self.first_range_start = 0 # It's the whole file
220
235
def test_read_at_range_end(self):
221
236
"""Test read behaviour at range end."""
223
self.assertEquals(self.alpha, f.read())
224
self.assertEquals('', f.read(0))
225
self.assertEquals('', f.read(1))
238
self.assertEqual(self.alpha, f.read())
239
self.assertEqual(b'', f.read(0))
240
self.assertEqual(b'', f.read(1))
228
243
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
232
247
super(TestRangeFileSizeKnown, self).setUp()
233
248
self._file = response.RangeFile('Whole_file_size_known',
234
StringIO(self.alpha))
235
250
self._file.set_range(0, len(self.alpha))
236
251
self.first_range_start = 0 # It's the whole file
243
258
super(TestRangeFileSingleRange, self).setUp()
244
259
self._file = response.RangeFile('Single_range_file',
245
StringIO(self.alpha))
246
261
self.first_range_start = 15
247
262
self._file.set_range(self.first_range_start, len(self.alpha))
271
286
# in HTTP response headers and the boundary lines that separate
272
287
# multipart content.
274
boundary = "separation"
289
boundary = b"separation"
277
292
super(TestRangeFileMultipleRanges, self).setUp()
279
294
boundary = self.boundary
282
297
self.first_range_start = 25
283
298
file_size = 200 # big enough to encompass all ranges
284
299
for (start, part) in [(self.first_range_start, self.alpha),
291
306
content += self._boundary_line()
293
308
self._file = response.RangeFile('Multiple_ranges_file',
295
310
self.set_file_boundary()
297
312
def _boundary_line(self):
298
313
"""Helper to build the formatted boundary line."""
299
return '--' + self.boundary + '\r\n'
314
return b'--' + self.boundary + b'\r\n'
301
316
def set_file_boundary(self):
302
317
# Ranges are set by decoding the range headers, the RangeFile user is
305
320
# which is part of the Content-Type header).
306
321
self._file.set_boundary(self.boundary)
308
def _multipart_byterange(self, data, offset, boundary, file_size='*'):
323
def _multipart_byterange(self, data, offset, boundary, file_size=b'*'):
309
324
"""Encode a part of a file as a multipart/byterange MIME type.
311
326
When a range request is issued, the HTTP response body can be
327
342
# A range is described by a set of headers, but only 'Content-Range' is
328
343
# required for our implementation (TestHandleResponse below will
329
344
# exercise ranges with multiple or missing headers')
330
range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
345
if isinstance(file_size, int):
346
file_size = b'%d' % file_size
347
range += b'Content-Range: bytes %d-%d/%s\r\n' % (offset,
334
351
# Finally the raw bytes
338
355
def test_read_all_ranges(self):
340
self.assertEquals(self.alpha, f.read()) # Read first range
357
self.assertEqual(self.alpha, f.read()) # Read first range
341
358
f.seek(100) # Trigger the second range recognition
342
self.assertEquals(self.alpha, f.read()) # Read second range
343
self.assertEquals(126, f.tell())
359
self.assertEqual(self.alpha, f.read()) # Read second range
360
self.assertEqual(126, f.tell())
344
361
f.seek(126) # Start of third range which is also the current pos !
345
self.assertEquals('A', f.read(1))
362
self.assertEqual(b'A', f.read(1))
347
self.assertEquals('LMN', f.read(3))
364
self.assertEqual(b'LMN', f.read(3))
349
366
def test_seek_from_end(self):
350
367
"""See TestRangeFileMixin.test_seek_from_end."""
357
self.assertEquals('yz', f.read())
374
self.assertEqual(b'yz', f.read())
358
375
self.assertRaises(errors.InvalidRange, f.seek, -2, 2)
360
377
def test_seek_into_void(self):
372
389
def test_seek_across_ranges(self):
374
start = self.first_range_start
375
391
f.seek(126) # skip the two first ranges
376
self.assertEquals('AB', f.read(2))
392
self.assertEqual(b'AB', f.read(2))
378
394
def test_checked_read_dont_overflow_buffers(self):
380
start = self.first_range_start
381
396
# We force a very low value to exercise all code paths in _checked_read
382
397
f._discarded_buf_size = 8
383
398
f.seek(126) # skip the two first ranges
384
self.assertEquals('AB', f.read(2))
399
self.assertEqual(b'AB', f.read(2))
386
401
def test_seek_twice_between_ranges(self):
400
415
def test_read_at_range_end(self):
402
self.assertEquals(self.alpha, f.read())
403
self.assertEquals(self.alpha, f.read())
404
self.assertEquals(self.alpha.upper(), f.read())
417
self.assertEqual(self.alpha, f.read())
418
self.assertEqual(self.alpha, f.read())
419
self.assertEqual(self.alpha.upper(), f.read())
405
420
self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
423
438
# The boundary as it appears in boundary lines
424
439
# IIS 6 and 7 use this value
425
_boundary_trimmed = "q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
426
boundary = '<' + _boundary_trimmed + '>'
440
_boundary_trimmed = b"q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
441
boundary = b'<' + _boundary_trimmed + b'>'
428
443
def set_file_boundary(self):
429
444
# Emulate broken rfc822.unquote() here by removing angles
445
460
def test_range_syntax(self):
446
461
"""Test the Content-Range scanning."""
448
f = response.RangeFile('foo', StringIO())
463
f = response.RangeFile('foo', BytesIO())
450
465
def ok(expected, header_value):
451
466
f.set_range_from_header(header_value)
452
467
# Slightly peek under the covers to get the size
453
self.assertEquals(expected, (f.tell(), f._size))
468
self.assertEqual(expected, (f.tell(), f._size))
455
470
ok((1, 10), 'bytes 1-10/11')
456
471
ok((1, 10), 'bytes 1-10/*')
474
489
# Taken from real request responses
475
_full_text_response = (200, """HTTP/1.1 200 OK\r
490
_full_text_response = (200, b"""HTTP/1.1 200 OK\r
476
491
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
477
492
Server: Apache/2.0.54 (Fedora)\r
478
493
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
482
497
Connection: close\r
483
498
Content-Type: text/plain; charset=UTF-8\r
485
""", """Bazaar-NG meta directory, format 1
500
""", b"""Bazaar-NG meta directory, format 1
489
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
504
_single_range_response = (206, b"""HTTP/1.1 206 Partial Content\r
490
505
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
491
506
Server: Apache/2.0.54 (Fedora)\r
492
507
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
497
512
Connection: close\r
498
513
Content-Type: text/plain; charset=UTF-8\r
500
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
515
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
501
516
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
504
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
519
_single_range_no_content_type = (206, b"""HTTP/1.1 206 Partial Content\r
505
520
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
506
521
Server: Apache/2.0.54 (Fedora)\r
507
522
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
511
526
Content-Range: bytes 100-199/93890\r
512
527
Connection: close\r
514
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
529
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
515
530
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
518
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
533
_multipart_range_response = (206, b"""HTTP/1.1 206 Partial Content\r
519
534
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
520
535
Server: Apache/2.0.54 (Fedora)\r
521
536
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
525
540
Connection: close\r
526
541
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
528
\r""", """--418470f848b63279b\r
543
\r""", b"""--418470f848b63279b\r
529
544
Content-type: text/plain; charset=UTF-8\r
530
545
Content-range: bytes 0-254/93890\r
568
_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
583
_multipart_squid_range_response = (206, b"""HTTP/1.0 206 Partial Content\r
569
584
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
570
585
Server: Apache/2.2.2 (Unix) DAV/2\r
571
586
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
600
615
# This is made up
601
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
616
_full_text_response_no_content_type = (200, b"""HTTP/1.1 200 OK\r
602
617
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
603
618
Server: Apache/2.0.54 (Fedora)\r
604
619
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
607
622
Content-Length: 35\r
608
623
Connection: close\r
610
""", """Bazaar-NG meta directory, format 1
625
""", b"""Bazaar-NG meta directory, format 1
614
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
629
_full_text_response_no_content_length = (200, b"""HTTP/1.1 200 OK\r
615
630
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
616
631
Server: Apache/2.0.54 (Fedora)\r
617
632
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
620
635
Connection: close\r
621
636
Content-Type: text/plain; charset=UTF-8\r
623
""", """Bazaar-NG meta directory, format 1
638
""", b"""Bazaar-NG meta directory, format 1
627
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
642
_single_range_no_content_range = (206, b"""HTTP/1.1 206 Partial Content\r
628
643
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
629
644
Server: Apache/2.0.54 (Fedora)\r
630
645
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
633
648
Content-Length: 100\r
634
649
Connection: close\r
636
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
651
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06
637
652
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
640
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
655
_single_range_response_truncated = (206, b"""HTTP/1.1 206 Partial Content\r
641
656
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
642
657
Server: Apache/2.0.54 (Fedora)\r
643
658
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
648
663
Connection: close\r
649
664
Content-Type: text/plain; charset=UTF-8\r
651
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
654
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
666
""", b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
669
_invalid_response = (444, b"""HTTP/1.1 444 Bad Response\r
655
670
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
656
671
Connection: close\r
657
672
Content-Type: text/html; charset=iso-8859-1\r
659
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
674
""", b"""<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
661
676
<title>404 Not Found</title>
670
_multipart_no_content_range = (206, """HTTP/1.0 206 Partial Content\r
685
_multipart_no_content_range = (206, b"""HTTP/1.0 206 Partial Content\r
671
686
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
672
687
Content-Length: 598\r
676
691
--THIS_SEPARATES\r
677
692
Content-Type: text/plain\r
684
_multipart_no_boundary = (206, """HTTP/1.0 206 Partial Content\r
699
_multipart_no_boundary = (206, b"""HTTP/1.0 206 Partial Content\r
685
700
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
686
701
Content-Length: 598\r
690
705
--THIS_SEPARATES\r
691
706
Content-Type: text/plain\r
692
707
Content-Range: bytes 0-18/18672\r
701
716
class TestHandleResponse(tests.TestCase):
703
718
def _build_HTTPMessage(self, raw_headers):
704
status_and_headers = StringIO(raw_headers)
719
status_and_headers = BytesIO(raw_headers)
705
720
# Get rid of the status line
706
721
status_and_headers.readline()
707
msg = httplib.HTTPMessage(status_and_headers)
722
msg = http_client.HTTPMessage(status_and_headers)
710
725
def get_response(self, a_response):
712
727
code, raw_headers, body = a_response
713
728
msg = self._build_HTTPMessage(raw_headers)
714
729
return response.handle_response('http://foo', code, msg,
715
StringIO(a_response[2]))
730
BytesIO(a_response[2]))
717
732
def test_full_text(self):
718
733
out = self.get_response(_full_text_response)
719
# It is a StringIO from the original data
734
# It is a BytesIO from the original data
720
735
self.assertEqual(_full_text_response[2], out.read())
722
737
def test_single_range(self):
764
779
# We should not require Content-Type for a full response
765
780
code, raw_headers, body = _full_text_response_no_content_type
766
781
msg = self._build_HTTPMessage(raw_headers)
767
out = response.handle_response('http://foo', code, msg, StringIO(body))
782
out = response.handle_response('http://foo', code, msg, BytesIO(body))
768
783
self.assertEqual(body, out.read())
770
785
def test_full_text_no_content_length(self):
771
786
code, raw_headers, body = _full_text_response_no_content_length
772
787
msg = self._build_HTTPMessage(raw_headers)
773
out = response.handle_response('http://foo', code, msg, StringIO(body))
788
out = response.handle_response('http://foo', code, msg, BytesIO(body))
774
789
self.assertEqual(body, out.read())
776
791
def test_missing_content_range(self):
778
793
msg = self._build_HTTPMessage(raw_headers)
779
794
self.assertRaises(errors.InvalidHttpResponse,
780
795
response.handle_response,
781
'http://bogus', code, msg, StringIO(body))
796
'http://bogus', code, msg, BytesIO(body))
783
798
def test_multipart_no_content_range(self):
784
799
code, raw_headers, body = _multipart_no_content_range
785
800
msg = self._build_HTTPMessage(raw_headers)
786
801
self.assertRaises(errors.InvalidHttpResponse,
787
802
response.handle_response,
788
'http://bogus', code, msg, StringIO(body))
803
'http://bogus', code, msg, BytesIO(body))
790
805
def test_multipart_no_boundary(self):
791
806
out = self.get_response(_multipart_no_boundary)
803
tests.TestCase.setUp(self)
818
super(TestRangeFileSizeReadLimited, self).setUp()
804
819
# create a test datablock larger than _max_read_size.
805
820
chunk_size = response.RangeFile._max_read_size
806
test_pattern = '0123456789ABCDEF'
807
self.test_data = test_pattern * (3 * chunk_size / len(test_pattern))
821
test_pattern = b'0123456789ABCDEF'
822
self.test_data = test_pattern * (3 * chunk_size // len(test_pattern))
808
823
self.test_data_len = len(self.test_data)
810
825
def test_max_read_size(self):