/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Tests from HTTP response parsing."""
18
19
from cStringIO import StringIO
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
20
import httplib
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
21
1786.1.13 by John Arbash Meinel
Found a few bugs in error handling code, updated tests
22
from bzrlib import errors
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
23
from bzrlib.transport import http
1786.1.13 by John Arbash Meinel
Found a few bugs in error handling code, updated tests
24
from bzrlib.transport.http import response
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
25
from bzrlib.tests import TestCase
26
27
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
28
class TestRangeFileAccess(TestCase):
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
29
    """Test RangeFile."""
30
31
    def setUp(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
32
        self.alpha = 'abcdefghijklmnopqrstuvwxyz'
33
        # Each file is defined as a tuple (builder, start), 'builder' is a
34
        # callable returning a RangeFile and 'start' the start of the first (or
35
        # unique) range.
36
        self.files = [(self._file_size_unknown, 0),
37
                      (self._file_size_known, 0),
38
                      (self._file_single_range, 10),
39
                      (self._file_multi_ranges, 10),]
40
41
42
    def _file_size_unknown(self):
43
        return response.RangeFile('Whole_file_size_unknown',
44
                                  StringIO(self.alpha))
45
46
    def _file_size_known(self):
47
        alpha = self.alpha
48
        f = response.RangeFile('Whole_file_size_known', StringIO(alpha))
49
        f.set_range(0, len(alpha))
50
        return f
51
52
    def _file_single_range(self):
53
        alpha = self.alpha
54
        f = response.RangeFile('Single_range_file', StringIO(alpha))
55
        f.set_range(10, len(alpha))
56
        return f
57
58
    def _file_multi_ranges(self):
59
        alpha = self.alpha
60
61
        boundary = 'separation'
62
        bline = '--' + boundary + '\r\n'
63
        content = []
64
        content += bline
65
        file_size = 200
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
66
        for (start, part) in [(10, alpha), (100, alpha), (126, alpha.upper())]:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
67
            plen = len(part)
68
            content += 'Content-Range: bytes %d-%d/%d\r\n' % (start,
69
                                                              start+plen-1,
70
                                                              file_size)
71
            content += '\r\n'
72
            content += part
73
            content += bline
74
75
        data = ''.join(content)
76
        f = response.RangeFile('Multiple_ranges_file', StringIO(data))
77
        # Ranges are set by decoding the headers
78
        f.set_boundary(boundary)
79
        return f
80
81
    def _check_accesses_inside_range(self, f, start=0):
82
        self.assertEquals(start, f.tell())
83
        self.assertEquals('abc', f.read(3))
84
        self.assertEquals('def', f.read(3))
85
        self.assertEquals(start + 6, f.tell())
86
        f.seek(start + 10)
87
        self.assertEquals('klm', f.read(3))
88
        self.assertEquals('no', f.read(2))
89
        self.assertEquals(start + 15, f.tell())
90
        # Unbounded read, should not cross range
91
        self.assertEquals('pqrstuvwxyz', f.read())
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
92
93
    def test_valid_accesses(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
94
        """Test valid accesses: inside one or more ranges"""
95
        alpha = 'abcdefghijklmnopqrstuvwxyz'
96
97
        for builder, start in self.files[:3]:
98
            self._check_accesses_inside_range(builder(), start)
99
100
        f =  self._file_multi_ranges()
101
        self._check_accesses_inside_range(f, start=10)
102
        f.seek(100) # Will trigger the decoding and setting of the second range
103
        self._check_accesses_inside_range(f, 100)
104
105
        f =  self._file_multi_ranges()
106
        f.seek(10)
107
        # Seeking to a point between two ranges is possible (only once) but
108
        # reading there is forbidden
109
        f.seek(40)
110
        # We crossed a range boundary, so now the file is positioned at the
111
        # start of the new range (i.e. trying to seek below 100 will error out)
112
        f.seek(100)
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
113
        f.seek(125)
114
115
        f =  self._file_multi_ranges()
116
        self.assertEquals(self.alpha, f.read()) # Read first range
117
        f.seek(100)
118
        self.assertEquals(self.alpha, f.read()) # Read second range
119
        self.assertEquals(126, f.tell())
120
        f.seek(126) # Start of third range which is also the current pos !
121
        self.assertEquals('A', f.read(1))
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
122
123
    def _check_file_boundaries(self, f, start=0):
124
        f.seek(start)
125
        self.assertRaises(errors.InvalidRange, f.read, 27)
126
        # Will seek past the range and then errors out
127
        self.assertRaises(errors.InvalidRange, f.seek, start + 27)
128
129
    def _check_beyond_range(self, builder, start):
130
        f = builder()
131
        f.seek(start + 20)
132
        # Will try to read past the end of the range
133
        self.assertRaises(errors.InvalidRange, f.read, 10)
134
135
    def _check_seek_backwards(self, f, start=0):
136
        f.read(start + 12)
137
        # Can't seek backwards
138
        self.assertRaises(errors.InvalidRange, f.seek, start + 5)
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
139
140
    def test_invalid_accesses(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
141
        """Test errors triggered by invalid accesses."""
142
143
        f =  self._file_size_unknown()
144
        self.assertRaises(errors.InvalidRange, f.seek, -1, 2)
145
146
        for builder, start in self.files:
147
            self._check_seek_backwards(builder(), start)
148
149
        for builder, start in self.files[1:3]:
150
            self._check_file_boundaries(builder(), start)
151
152
        f =  self._file_multi_ranges()
153
        self._check_accesses_inside_range(f, start=10)
154
        f.seek(40) # Will trigger the decoding and setting of the second range
155
        self.assertEquals(100, f.tell())
156
        self._check_accesses_inside_range(f, 100)
157
158
159
        self._check_beyond_range(self._file_single_range, start=10)
160
        self._check_beyond_range(self._file_multi_ranges, start=10)
161
162
        f =  self._file_multi_ranges()
163
        f.seek(40) # Past the first range but before the second
164
        # Now the file is positioned at the second range start (100)
165
        self.assertRaises(errors.InvalidRange, f.seek, 41)
166
167
        f =  self._file_multi_ranges()
168
        # We can seek across ranges but not beyond
169
        self.assertRaises(errors.InvalidRange, f.read, 127)
170
171
172
class TestRanges(TestCase):
173
174
    def test_range_syntax(self):
175
3059.2.6 by Vincent Ladeuil
Light modifications after a failed attempt at making RangeFile iterable.
176
        rf = response.RangeFile('foo', StringIO())
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
177
178
        def ok(expected, header_value):
179
            rf.set_range_from_header(header_value)
180
            # Slightly peek under the covers to get the size
181
            self.assertEquals(expected, (rf.tell(), rf._size))
182
183
        ok((1, 10), 'bytes 1-10/11')
184
        ok((1, 10), 'bytes 1-10/*')
185
        ok((12, 2), '\tbytes 12-13/*')
186
        ok((28, 1), '  bytes 28-28/*')
187
        ok((2123, 2120), 'bytes  2123-4242/12310')
188
        ok((1, 10), 'bytes 1-10/xxx') # We don't check total (xxx)
189
190
        def nok(header_value):
191
            self.assertRaises(errors.InvalidHttpRange,
192
                              rf.set_range_from_header, header_value)
193
194
        nok('chars 1-2/3')
195
        nok('bytes xx-yyy/zzz')
196
        nok('bytes xx-12/zzz')
197
        nok('bytes 11-yy/zzz')
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
198
199
# Taken from real request responses
1786.1.26 by John Arbash Meinel
Update and test handle_response.
200
_full_text_response = (200, """HTTP/1.1 200 OK\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
201
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
202
Server: Apache/2.0.54 (Fedora)\r
203
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
204
ETag: "56691-23-38e9ae00"\r
205
Accept-Ranges: bytes\r
206
Content-Length: 35\r
207
Connection: close\r
208
Content-Type: text/plain; charset=UTF-8\r
209
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
210
""", """Bazaar-NG meta directory, format 1
211
""")
212
213
1786.1.26 by John Arbash Meinel
Update and test handle_response.
214
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
215
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
216
Server: Apache/2.0.54 (Fedora)\r
217
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
218
ETag: "238a3c-16ec2-805c5540"\r
219
Accept-Ranges: bytes\r
220
Content-Length: 100\r
1786.1.26 by John Arbash Meinel
Update and test handle_response.
221
Content-Range: bytes 100-199/93890\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
222
Connection: close\r
223
Content-Type: text/plain; charset=UTF-8\r
224
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
225
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
1786.1.26 by John Arbash Meinel
Update and test handle_response.
226
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
227
228
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
229
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
230
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
231
Server: Apache/2.0.54 (Fedora)\r
232
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
233
ETag: "238a3c-16ec2-805c5540"\r
234
Accept-Ranges: bytes\r
235
Content-Length: 100\r
236
Content-Range: bytes 100-199/93890\r
237
Connection: close\r
238
\r
239
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
240
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
241
242
1786.1.26 by John Arbash Meinel
Update and test handle_response.
243
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
244
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
245
Server: Apache/2.0.54 (Fedora)\r
246
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
247
ETag: "238a3c-16ec2-805c5540"\r
248
Accept-Ranges: bytes\r
249
Content-Length: 1534\r
250
Connection: close\r
251
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
252
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
253
\r""", """--418470f848b63279b\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
254
Content-type: text/plain; charset=UTF-8\r
255
Content-range: bytes 0-254/93890\r
256
\r
257
mbp@sourcefrog.net-20050309040815-13242001617e4a06
258
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627
259
mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8
260
mbp@sourcefrog.net-20050309041501-c840e09071de3b67
261
mbp@sourcefrog.net-20050309044615-c24a3250be83220a
262
\r
263
--418470f848b63279b\r
264
Content-type: text/plain; charset=UTF-8\r
265
Content-range: bytes 1000-2049/93890\r
266
\r
267
40-fd4ec249b6b139ab
268
mbp@sourcefrog.net-20050311063625-07858525021f270b
269
mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9
270
mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a
271
mbp@sourcefrog.net-20050311232353-f5e33da490872c6a
272
mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0
273
mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb
274
mbp@sourcefrog.net-20050312073831-a47c3335ece1920f
275
mbp@sourcefrog.net-20050312085412-13373aa129ccbad3
276
mbp@sourcefrog.net-20050313052251-2bf004cb96b39933
277
mbp@sourcefrog.net-20050313052856-3edd84094687cb11
278
mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d
279
mbp@sourcefrog.net-20050313053853-7c64085594ff3072
280
mbp@sourcefrog.net-20050313054757-a86c3f5871069e22
281
mbp@sourcefrog.net-20050313061422-418f1f73b94879b9
282
mbp@sourcefrog.net-20050313120651-497bd231b19df600
283
mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a
284
mbp@sourcefrog.net-20050314025438-d52099f915fe65fc
285
mbp@sourcefrog.net-20050314025539-637a636692c055cf
286
mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba
287
mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62
288
mbp@source\r
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
289
--418470f848b63279b--\r
290
""")
291
292
_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
293
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
294
Server: Apache/2.2.2 (Unix) DAV/2\r
295
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
296
Accept-Ranges: bytes\r
297
Content-Type: multipart/byteranges; boundary="squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196"\r
298
Content-Length: 598\r
299
X-Cache: MISS from localhost.localdomain\r
300
X-Cache-Lookup: HIT from localhost.localdomain:3128\r
301
Proxy-Connection: keep-alive\r
302
\r
303
""",
304
"""\r
305
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
306
Content-Type: text/plain\r
307
Content-Range: bytes 0-99/18672\r
308
\r
309
# bzr knit index 8
310
311
scott@netsplit.com-20050708230047-47c7868f276b939f fulltext 0 863  :
312
scott@netsp\r
313
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
314
Content-Type: text/plain\r
315
Content-Range: bytes 300-499/18672\r
316
\r
317
com-20050708231537-2b124b835395399a :
318
scott@netsplit.com-20050820234126-551311dbb7435b51 line-delta 1803 479 .scott@netsplit.com-20050820232911-dc4322a084eadf7e :
319
scott@netsplit.com-20050821213706-c86\r
320
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196--\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
321
""")
322
323
1786.1.26 by John Arbash Meinel
Update and test handle_response.
324
# This is made up
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
325
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
326
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
327
Server: Apache/2.0.54 (Fedora)\r
328
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
329
ETag: "56691-23-38e9ae00"\r
330
Accept-Ranges: bytes\r
331
Content-Length: 35\r
332
Connection: close\r
333
\r
334
""", """Bazaar-NG meta directory, format 1
335
""")
336
337
338
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
339
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
340
Server: Apache/2.0.54 (Fedora)\r
341
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
342
ETag: "238a3c-16ec2-805c5540"\r
343
Accept-Ranges: bytes\r
344
Content-Length: 100\r
345
Connection: close\r
346
\r
347
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
348
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
349
350
1786.1.26 by John Arbash Meinel
Update and test handle_response.
351
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
352
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
353
Connection: close\r
354
Content-Type: text/html; charset=iso-8859-1\r
355
\r
356
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
357
<html><head>
358
<title>404 Not Found</title>
359
</head><body>
360
<h1>Not Found</h1>
361
<p>I don't know what I'm doing</p>
362
<hr>
363
</body></html>
364
""")
365
366
367
class TestHandleResponse(TestCase):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
368
369
    def _build_HTTPMessage(self, raw_headers):
370
        status_and_headers = StringIO(raw_headers)
371
        # Get read of the status line
372
        status_and_headers.readline()
373
        msg = httplib.HTTPMessage(status_and_headers)
374
        return msg
375
1786.1.26 by John Arbash Meinel
Update and test handle_response.
376
    def get_response(self, a_response):
377
        """Process a supplied response, and return the result."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
378
        code, raw_headers, body = a_response
379
        msg = self._build_HTTPMessage(raw_headers)
380
        return response.handle_response('http://foo', code, msg,
1786.1.26 by John Arbash Meinel
Update and test handle_response.
381
                                        StringIO(a_response[2]))
382
383
    def test_full_text(self):
384
        out = self.get_response(_full_text_response)
385
        # It is a StringIO from the original data
386
        self.assertEqual(_full_text_response[2], out.read())
387
388
    def test_single_range(self):
389
        out = self.get_response(_single_range_response)
390
391
        out.seek(100)
392
        self.assertEqual(_single_range_response[2], out.read(100))
393
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
394
    def test_single_range_no_content(self):
395
        out = self.get_response(_single_range_no_content_type)
396
397
        out.seek(100)
398
        self.assertEqual(_single_range_no_content_type[2], out.read(100))
399
1786.1.26 by John Arbash Meinel
Update and test handle_response.
400
    def test_multi_range(self):
401
        out = self.get_response(_multipart_range_response)
402
403
        # Just make sure we can read the right contents
404
        out.seek(0)
405
        out.read(255)
406
407
        out.seek(1000)
408
        out.read(1050)
409
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
410
    def test_multi_squid_range(self):
411
        out = self.get_response(_multipart_squid_range_response)
412
413
        # Just make sure we can read the right contents
414
        out.seek(0)
415
        out.read(100)
416
417
        out.seek(300)
418
        out.read(200)
419
1786.1.26 by John Arbash Meinel
Update and test handle_response.
420
    def test_invalid_response(self):
421
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
422
                          self.get_response, _invalid_response)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
423
424
    def test_full_text_no_content_type(self):
425
        # We should not require Content-Type for a full response
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
426
        code, raw_headers, body = _full_text_response_no_content_type
427
        msg = self._build_HTTPMessage(raw_headers)
428
        out = response.handle_response('http://foo', code, msg, StringIO(body))
429
        self.assertEqual(body, out.read())
1786.1.26 by John Arbash Meinel
Update and test handle_response.
430
431
    def test_missing_content_range(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
432
        code, raw_headers, body = _single_range_no_content_range
433
        msg = self._build_HTTPMessage(raw_headers)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
434
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
435
                          response.handle_response,
436
                          'http://nocontent', code, msg, StringIO(body))