/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Tests from HTTP response parsing."""
18
19
from cStringIO import StringIO
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
20
import httplib
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
21
1786.1.13 by John Arbash Meinel
Found a few bugs in error handling code, updated tests
22
from bzrlib import errors
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
23
from bzrlib.transport import http
1786.1.13 by John Arbash Meinel
Found a few bugs in error handling code, updated tests
24
from bzrlib.transport.http import response
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
25
from bzrlib.tests import TestCase
26
27
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
28
class TestRangeFileAccess(TestCase):
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
29
    """Test RangeFile."""
30
31
    def setUp(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
32
        self.alpha = 'abcdefghijklmnopqrstuvwxyz'
33
        # Each file is defined as a tuple (builder, start), 'builder' is a
34
        # callable returning a RangeFile and 'start' the start of the first (or
35
        # unique) range.
36
        self.files = [(self._file_size_unknown, 0),
37
                      (self._file_size_known, 0),
38
                      (self._file_single_range, 10),
39
                      (self._file_multi_ranges, 10),]
40
41
42
    def _file_size_unknown(self):
43
        return response.RangeFile('Whole_file_size_unknown',
44
                                  StringIO(self.alpha))
45
46
    def _file_size_known(self):
47
        alpha = self.alpha
48
        f = response.RangeFile('Whole_file_size_known', StringIO(alpha))
49
        f.set_range(0, len(alpha))
50
        return f
51
52
    def _file_single_range(self):
53
        alpha = self.alpha
54
        f = response.RangeFile('Single_range_file', StringIO(alpha))
55
        f.set_range(10, len(alpha))
56
        return f
57
58
    def _file_multi_ranges(self):
59
        alpha = self.alpha
60
61
        boundary = 'separation'
62
        bline = '--' + boundary + '\r\n'
63
        content = []
64
        content += bline
65
        file_size = 200
66
        for (start, part) in [(10, alpha), (100, alpha)]:
67
            plen = len(part)
68
            content += 'Content-Range: bytes %d-%d/%d\r\n' % (start,
69
                                                              start+plen-1,
70
                                                              file_size)
71
            content += '\r\n'
72
            content += part
73
            content += bline
74
75
        data = ''.join(content)
76
        f = response.RangeFile('Multiple_ranges_file', StringIO(data))
77
        # Ranges are set by decoding the headers
78
        f.set_boundary(boundary)
79
        return f
80
81
    def _check_accesses_inside_range(self, f, start=0):
82
        self.assertEquals(start, f.tell())
83
        self.assertEquals('abc', f.read(3))
84
        self.assertEquals('def', f.read(3))
85
        self.assertEquals(start + 6, f.tell())
86
        f.seek(start + 10)
87
        self.assertEquals('klm', f.read(3))
88
        self.assertEquals('no', f.read(2))
89
        self.assertEquals(start + 15, f.tell())
90
        # Unbounded read, should not cross range
91
        self.assertEquals('pqrstuvwxyz', f.read())
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
92
93
    def test_valid_accesses(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
94
        """Test valid accesses: inside one or more ranges"""
95
        alpha = 'abcdefghijklmnopqrstuvwxyz'
96
97
        for builder, start in self.files[:3]:
98
            self._check_accesses_inside_range(builder(), start)
99
100
        f =  self._file_multi_ranges()
101
        self._check_accesses_inside_range(f, start=10)
102
        f.seek(100) # Will trigger the decoding and setting of the second range
103
        self._check_accesses_inside_range(f, 100)
104
105
        f =  self._file_multi_ranges()
106
        f.seek(10)
107
        # Seeking to a point between two ranges is possible (only once) but
108
        # reading there is forbidden
109
        f.seek(40)
110
        # We crossed a range boundary, so now the file is positioned at the
111
        # start of the new range (i.e. trying to seek below 100 will error out)
112
        f.seek(100)
113
        f.seek(126)
114
115
    def _check_file_boundaries(self, f, start=0):
116
        f.seek(start)
117
        self.assertRaises(errors.InvalidRange, f.read, 27)
118
        # Will seek past the range and then errors out
119
        self.assertRaises(errors.InvalidRange, f.seek, start + 27)
120
121
    def _check_beyond_range(self, builder, start):
122
        f = builder()
123
        f.seek(start + 20)
124
        # Will try to read past the end of the range
125
        self.assertRaises(errors.InvalidRange, f.read, 10)
126
127
    def _check_seek_backwards(self, f, start=0):
128
        f.read(start + 12)
129
        # Can't seek backwards
130
        self.assertRaises(errors.InvalidRange, f.seek, start + 5)
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
131
132
    def test_invalid_accesses(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
133
        """Test errors triggered by invalid accesses."""
134
135
        f =  self._file_size_unknown()
136
        self.assertRaises(errors.InvalidRange, f.seek, -1, 2)
137
138
        for builder, start in self.files:
139
            self._check_seek_backwards(builder(), start)
140
141
        for builder, start in self.files[1:3]:
142
            self._check_file_boundaries(builder(), start)
143
144
        f =  self._file_multi_ranges()
145
        self._check_accesses_inside_range(f, start=10)
146
        f.seek(40) # Will trigger the decoding and setting of the second range
147
        self.assertEquals(100, f.tell())
148
        self._check_accesses_inside_range(f, 100)
149
150
151
        self._check_beyond_range(self._file_single_range, start=10)
152
        self._check_beyond_range(self._file_multi_ranges, start=10)
153
154
        f =  self._file_multi_ranges()
155
        f.seek(40) # Past the first range but before the second
156
        # Now the file is positioned at the second range start (100)
157
        self.assertRaises(errors.InvalidRange, f.seek, 41)
158
159
        f =  self._file_multi_ranges()
160
        # We can seek across ranges but not beyond
161
        self.assertRaises(errors.InvalidRange, f.read, 127)
162
163
164
class TestRanges(TestCase):
165
166
    def test_range_syntax(self):
167
168
        rf = response.RangeFile('foo', None)
169
170
        def ok(expected, header_value):
171
            rf.set_range_from_header(header_value)
172
            # Slightly peek under the covers to get the size
173
            self.assertEquals(expected, (rf.tell(), rf._size))
174
175
        ok((1, 10), 'bytes 1-10/11')
176
        ok((1, 10), 'bytes 1-10/*')
177
        ok((12, 2), '\tbytes 12-13/*')
178
        ok((28, 1), '  bytes 28-28/*')
179
        ok((2123, 2120), 'bytes  2123-4242/12310')
180
        ok((1, 10), 'bytes 1-10/xxx') # We don't check total (xxx)
181
182
        def nok(header_value):
183
            self.assertRaises(errors.InvalidHttpRange,
184
                              rf.set_range_from_header, header_value)
185
186
        nok('chars 1-2/3')
187
        nok('bytes xx-yyy/zzz')
188
        nok('bytes xx-12/zzz')
189
        nok('bytes 11-yy/zzz')
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
190
191
# Taken from real request responses
1786.1.26 by John Arbash Meinel
Update and test handle_response.
192
_full_text_response = (200, """HTTP/1.1 200 OK\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
193
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
194
Server: Apache/2.0.54 (Fedora)\r
195
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
196
ETag: "56691-23-38e9ae00"\r
197
Accept-Ranges: bytes\r
198
Content-Length: 35\r
199
Connection: close\r
200
Content-Type: text/plain; charset=UTF-8\r
201
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
202
""", """Bazaar-NG meta directory, format 1
203
""")
204
205
1786.1.26 by John Arbash Meinel
Update and test handle_response.
206
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
207
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
208
Server: Apache/2.0.54 (Fedora)\r
209
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
210
ETag: "238a3c-16ec2-805c5540"\r
211
Accept-Ranges: bytes\r
212
Content-Length: 100\r
1786.1.26 by John Arbash Meinel
Update and test handle_response.
213
Content-Range: bytes 100-199/93890\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
214
Connection: close\r
215
Content-Type: text/plain; charset=UTF-8\r
216
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
217
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
1786.1.26 by John Arbash Meinel
Update and test handle_response.
218
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
219
220
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
221
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
222
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
223
Server: Apache/2.0.54 (Fedora)\r
224
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
225
ETag: "238a3c-16ec2-805c5540"\r
226
Accept-Ranges: bytes\r
227
Content-Length: 100\r
228
Content-Range: bytes 100-199/93890\r
229
Connection: close\r
230
\r
231
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
232
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
233
234
1786.1.26 by John Arbash Meinel
Update and test handle_response.
235
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
236
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
237
Server: Apache/2.0.54 (Fedora)\r
238
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
239
ETag: "238a3c-16ec2-805c5540"\r
240
Accept-Ranges: bytes\r
241
Content-Length: 1534\r
242
Connection: close\r
243
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
244
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
245
\r""", """--418470f848b63279b\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
246
Content-type: text/plain; charset=UTF-8\r
247
Content-range: bytes 0-254/93890\r
248
\r
249
mbp@sourcefrog.net-20050309040815-13242001617e4a06
250
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627
251
mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8
252
mbp@sourcefrog.net-20050309041501-c840e09071de3b67
253
mbp@sourcefrog.net-20050309044615-c24a3250be83220a
254
\r
255
--418470f848b63279b\r
256
Content-type: text/plain; charset=UTF-8\r
257
Content-range: bytes 1000-2049/93890\r
258
\r
259
40-fd4ec249b6b139ab
260
mbp@sourcefrog.net-20050311063625-07858525021f270b
261
mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9
262
mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a
263
mbp@sourcefrog.net-20050311232353-f5e33da490872c6a
264
mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0
265
mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb
266
mbp@sourcefrog.net-20050312073831-a47c3335ece1920f
267
mbp@sourcefrog.net-20050312085412-13373aa129ccbad3
268
mbp@sourcefrog.net-20050313052251-2bf004cb96b39933
269
mbp@sourcefrog.net-20050313052856-3edd84094687cb11
270
mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d
271
mbp@sourcefrog.net-20050313053853-7c64085594ff3072
272
mbp@sourcefrog.net-20050313054757-a86c3f5871069e22
273
mbp@sourcefrog.net-20050313061422-418f1f73b94879b9
274
mbp@sourcefrog.net-20050313120651-497bd231b19df600
275
mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a
276
mbp@sourcefrog.net-20050314025438-d52099f915fe65fc
277
mbp@sourcefrog.net-20050314025539-637a636692c055cf
278
mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba
279
mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62
280
mbp@source\r
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
281
--418470f848b63279b--\r
282
""")
283
284
_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
285
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
286
Server: Apache/2.2.2 (Unix) DAV/2\r
287
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
288
Accept-Ranges: bytes\r
289
Content-Type: multipart/byteranges; boundary="squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196"\r
290
Content-Length: 598\r
291
X-Cache: MISS from localhost.localdomain\r
292
X-Cache-Lookup: HIT from localhost.localdomain:3128\r
293
Proxy-Connection: keep-alive\r
294
\r
295
""",
296
"""\r
297
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
298
Content-Type: text/plain\r
299
Content-Range: bytes 0-99/18672\r
300
\r
301
# bzr knit index 8
302
303
scott@netsplit.com-20050708230047-47c7868f276b939f fulltext 0 863  :
304
scott@netsp\r
305
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
306
Content-Type: text/plain\r
307
Content-Range: bytes 300-499/18672\r
308
\r
309
com-20050708231537-2b124b835395399a :
310
scott@netsplit.com-20050820234126-551311dbb7435b51 line-delta 1803 479 .scott@netsplit.com-20050820232911-dc4322a084eadf7e :
311
scott@netsplit.com-20050821213706-c86\r
312
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196--\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
313
""")
314
315
1786.1.26 by John Arbash Meinel
Update and test handle_response.
316
# This is made up
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
317
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
318
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
319
Server: Apache/2.0.54 (Fedora)\r
320
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
321
ETag: "56691-23-38e9ae00"\r
322
Accept-Ranges: bytes\r
323
Content-Length: 35\r
324
Connection: close\r
325
\r
326
""", """Bazaar-NG meta directory, format 1
327
""")
328
329
330
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
331
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
332
Server: Apache/2.0.54 (Fedora)\r
333
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
334
ETag: "238a3c-16ec2-805c5540"\r
335
Accept-Ranges: bytes\r
336
Content-Length: 100\r
337
Connection: close\r
338
\r
339
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
340
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
341
342
1786.1.26 by John Arbash Meinel
Update and test handle_response.
343
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
344
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
345
Connection: close\r
346
Content-Type: text/html; charset=iso-8859-1\r
347
\r
348
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
349
<html><head>
350
<title>404 Not Found</title>
351
</head><body>
352
<h1>Not Found</h1>
353
<p>I don't know what I'm doing</p>
354
<hr>
355
</body></html>
356
""")
357
358
359
class TestHandleResponse(TestCase):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
360
361
    def _build_HTTPMessage(self, raw_headers):
362
        status_and_headers = StringIO(raw_headers)
363
        # Get read of the status line
364
        status_and_headers.readline()
365
        msg = httplib.HTTPMessage(status_and_headers)
366
        return msg
367
1786.1.26 by John Arbash Meinel
Update and test handle_response.
368
    def get_response(self, a_response):
369
        """Process a supplied response, and return the result."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
370
        code, raw_headers, body = a_response
371
        msg = self._build_HTTPMessage(raw_headers)
372
        return response.handle_response('http://foo', code, msg,
1786.1.26 by John Arbash Meinel
Update and test handle_response.
373
                                        StringIO(a_response[2]))
374
375
    def test_full_text(self):
376
        out = self.get_response(_full_text_response)
377
        # It is a StringIO from the original data
378
        self.assertEqual(_full_text_response[2], out.read())
379
380
    def test_single_range(self):
381
        out = self.get_response(_single_range_response)
382
383
        out.seek(100)
384
        self.assertEqual(_single_range_response[2], out.read(100))
385
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
386
    def test_single_range_no_content(self):
387
        out = self.get_response(_single_range_no_content_type)
388
389
        out.seek(100)
390
        self.assertEqual(_single_range_no_content_type[2], out.read(100))
391
1786.1.26 by John Arbash Meinel
Update and test handle_response.
392
    def test_multi_range(self):
393
        out = self.get_response(_multipart_range_response)
394
395
        # Just make sure we can read the right contents
396
        out.seek(0)
397
        out.read(255)
398
399
        out.seek(1000)
400
        out.read(1050)
401
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
402
    def test_multi_squid_range(self):
403
        out = self.get_response(_multipart_squid_range_response)
404
405
        # Just make sure we can read the right contents
406
        out.seek(0)
407
        out.read(100)
408
409
        out.seek(300)
410
        out.read(200)
411
1786.1.26 by John Arbash Meinel
Update and test handle_response.
412
    def test_invalid_response(self):
413
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
414
                          self.get_response, _invalid_response)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
415
416
    def test_full_text_no_content_type(self):
417
        # We should not require Content-Type for a full response
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
418
        code, raw_headers, body = _full_text_response_no_content_type
419
        msg = self._build_HTTPMessage(raw_headers)
420
        out = response.handle_response('http://foo', code, msg, StringIO(body))
421
        self.assertEqual(body, out.read())
1786.1.26 by John Arbash Meinel
Update and test handle_response.
422
423
    def test_missing_content_range(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
424
        code, raw_headers, body = _single_range_no_content_range
425
        msg = self._build_HTTPMessage(raw_headers)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
426
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
427
                          response.handle_response,
428
                          'http://nocontent', code, msg, StringIO(body))