/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Robert Collins
  • Date: 2010-05-05 00:05:29 UTC
  • mto: This revision was merged to the branch mainline in revision 5206.
  • Revision ID: robertc@robertcollins.net-20100505000529-ltmllyms5watqj5u
Make 'pydoc bzrlib.tests.build_tree_shape' useful.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008-2011 Canonical Ltd
 
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
18
18
 
19
19
import zlib
20
20
 
21
 
from .. import (
22
 
    config,
 
21
from bzrlib import (
 
22
    btree_index,
 
23
    groupcompress,
23
24
    errors,
 
25
    index as _mod_index,
24
26
    osutils,
25
27
    tests,
26
28
    trace,
27
 
    )
28
 
from ..bzr import (
29
 
    btree_index,
30
 
    groupcompress,
31
 
    knit,
32
 
    index as _mod_index,
33
29
    versionedfile,
34
30
    )
35
 
from ..osutils import sha_string
36
 
from .test__groupcompress import compiled_groupcompress_feature
37
 
from .scenarios import load_tests_apply_scenarios
38
 
 
39
 
 
40
 
def group_compress_implementation_scenarios():
 
31
from bzrlib.osutils import sha_string
 
32
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
 
33
 
 
34
 
 
35
def load_tests(standard_tests, module, loader):
 
36
    """Parameterize tests for all versions of groupcompress."""
 
37
    to_adapt, result = tests.split_suite_by_condition(
 
38
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
41
39
    scenarios = [
42
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
43
41
        ]
44
42
    if compiled_groupcompress_feature.available():
45
43
        scenarios.append(('C',
46
 
                          {'compressor': groupcompress.PyrexGroupCompressor}))
47
 
    return scenarios
48
 
 
49
 
 
50
 
load_tests = load_tests_apply_scenarios
 
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
45
    return tests.multiply_tests(to_adapt, scenarios, result)
51
46
 
52
47
 
53
48
class TestGroupCompressor(tests.TestCase):
54
49
 
55
50
    def _chunks_to_repr_lines(self, chunks):
56
 
        return '\n'.join(map(repr, b''.join(chunks).split(b'\n')))
 
51
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
57
52
 
58
53
    def assertEqualDiffEncoded(self, expected, actual):
59
54
        """Compare the actual content to the expected content.
71
66
class TestAllGroupCompressors(TestGroupCompressor):
72
67
    """Tests for GroupCompressor"""
73
68
 
74
 
    scenarios = group_compress_implementation_scenarios()
75
 
    compressor = None  # Set by scenario
 
69
    compressor = None # Set by multiply_tests
76
70
 
77
71
    def test_empty_delta(self):
78
72
        compressor = self.compressor()
82
76
        # diff against NUKK
83
77
        compressor = self.compressor()
84
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
85
 
                                                              b'strange\ncommon\n', None)
86
 
        self.assertEqual(sha_string(b'strange\ncommon\n'), sha1)
87
 
        expected_lines = b'f\x0fstrange\ncommon\n'
88
 
        self.assertEqual(expected_lines, b''.join(compressor.chunks))
 
79
            'strange\ncommon\n', None)
 
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
82
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
89
83
        self.assertEqual(0, start_point)
90
 
        self.assertEqual(len(expected_lines), end_point)
 
84
        self.assertEqual(sum(map(len, expected_lines)), end_point)
91
85
 
92
86
    def test_empty_content(self):
93
87
        compressor = self.compressor()
94
88
        # Adding empty bytes should return the 'null' record
95
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
96
 
                                                                 b'', None)
 
90
                                                                 '', None)
97
91
        self.assertEqual(0, start_point)
98
92
        self.assertEqual(0, end_point)
99
93
        self.assertEqual('fulltext', kind)
101
95
        self.assertEqual(0, compressor.endpoint)
102
96
        self.assertEqual([], compressor.chunks)
103
97
        # Even after adding some content
104
 
        compressor.compress(('content',), b'some\nbytes\n', None)
 
98
        compressor.compress(('content',), 'some\nbytes\n', None)
105
99
        self.assertTrue(compressor.endpoint > 0)
106
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
107
 
                                                                 b'', None)
 
101
                                                                 '', None)
108
102
        self.assertEqual(0, start_point)
109
103
        self.assertEqual(0, end_point)
110
104
        self.assertEqual('fulltext', kind)
115
109
        # reading something that is in the compressor stream already.
116
110
        compressor = self.compressor()
117
111
        sha1_1, _, _, _ = compressor.compress(('label',),
118
 
                                              b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
119
113
        expected_lines = list(compressor.chunks)
120
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
121
 
                                                      b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
122
116
        # get the first out
123
 
        self.assertEqual((b'strange\ncommon long line\n'
124
 
                          b'that needs a 16 byte match\n', sha1_1),
 
117
        self.assertEqual(('strange\ncommon long line\n'
 
118
                          'that needs a 16 byte match\n', sha1_1),
125
119
                         compressor.extract(('label',)))
126
120
        # and the second
127
 
        self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
128
 
                          b'different\n', sha1_2),
 
121
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
122
                          'different\n', sha1_2),
129
123
                         compressor.extract(('newlabel',)))
130
124
 
131
125
    def test_pop_last(self):
132
126
        compressor = self.compressor()
133
127
        _, _, _, _ = compressor.compress(('key1',),
134
 
                                         b'some text\nfor the first entry\n', None)
 
128
            'some text\nfor the first entry\n', None)
135
129
        expected_lines = list(compressor.chunks)
136
130
        _, _, _, _ = compressor.compress(('key2',),
137
 
                                         b'some text\nfor the second entry\n', None)
 
131
            'some text\nfor the second entry\n', None)
138
132
        compressor.pop_last()
139
133
        self.assertEqual(expected_lines, compressor.chunks)
140
134
 
147
141
    def test_stats(self):
148
142
        compressor = self.compressor()
149
143
        compressor.compress(('label',),
150
 
                            b'strange\n'
151
 
                            b'common very very long line\n'
152
 
                            b'plus more text\n', None)
 
144
                            'strange\n'
 
145
                            'common very very long line\n'
 
146
                            'plus more text\n', None)
153
147
        compressor.compress(('newlabel',),
154
 
                            b'common very very long line\n'
155
 
                            b'plus more text\n'
156
 
                            b'different\n'
157
 
                            b'moredifferent\n', None)
 
148
                            'common very very long line\n'
 
149
                            'plus more text\n'
 
150
                            'different\n'
 
151
                            'moredifferent\n', None)
158
152
        compressor.compress(('label3',),
159
 
                            b'new\n'
160
 
                            b'common very very long line\n'
161
 
                            b'plus more text\n'
162
 
                            b'different\n'
163
 
                            b'moredifferent\n', None)
 
153
                            'new\n'
 
154
                            'common very very long line\n'
 
155
                            'plus more text\n'
 
156
                            'different\n'
 
157
                            'moredifferent\n', None)
164
158
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
165
159
 
166
160
    def test_two_nosha_delta(self):
167
161
        compressor = self.compressor()
168
162
        sha1_1, _, _, _ = compressor.compress(('label',),
169
 
                                              b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
170
164
        expected_lines = list(compressor.chunks)
171
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
172
 
                                                                b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
 
        self.assertEqual(sha_string(b'common long line\n'
174
 
                                    b'that needs a 16 byte match\n'
175
 
                                    b'different\n'), sha1_2)
 
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
167
        self.assertEqual(sha_string('common long line\n'
 
168
                                    'that needs a 16 byte match\n'
 
169
                                    'different\n'), sha1_2)
176
170
        expected_lines.extend([
177
171
            # 'delta', delta length
178
 
            b'd\x0f',
 
172
            'd\x0f',
179
173
            # source and target length
180
 
            b'\x36',
 
174
            '\x36',
181
175
            # copy the line common
182
 
            b'\x91\x0a\x2c',  # copy, offset 0x0a, len 0x2c
 
176
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
183
177
            # add the line different, and the trailing newline
184
 
            b'\x0adifferent\n',  # insert 10 bytes
 
178
            '\x0adifferent\n', # insert 10 bytes
185
179
            ])
186
180
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
187
181
        self.assertEqual(sum(map(len, expected_lines)), end_point)
191
185
        # both parents.
192
186
        compressor = self.compressor()
193
187
        sha1_1, _, _, _ = compressor.compress(('label',),
194
 
                                              b'strange\ncommon very very long line\nwith some extra text\n', None)
 
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
195
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
196
 
                                              b'different\nmoredifferent\nand then some more\n', None)
 
190
            'different\nmoredifferent\nand then some more\n', None)
197
191
        expected_lines = list(compressor.chunks)
198
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
199
 
                                                                b'new\ncommon very very long line\nwith some extra text\n'
200
 
                                                                b'different\nmoredifferent\nand then some more\n',
201
 
                                                                None)
 
193
            'new\ncommon very very long line\nwith some extra text\n'
 
194
            'different\nmoredifferent\nand then some more\n',
 
195
            None)
202
196
        self.assertEqual(
203
 
            sha_string(b'new\ncommon very very long line\nwith some extra text\n'
204
 
                       b'different\nmoredifferent\nand then some more\n'),
 
197
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
198
                       'different\nmoredifferent\nand then some more\n'),
205
199
            sha1_3)
206
200
        expected_lines.extend([
207
201
            # 'delta', delta length
208
 
            b'd\x0b',
 
202
            'd\x0b',
209
203
            # source and target length
210
 
            b'\x5f'
 
204
            '\x5f'
211
205
            # insert new
212
 
            b'\x03new',
 
206
            '\x03new',
213
207
            # Copy of first parent 'common' range
214
 
            b'\x91\x09\x31'  # copy, offset 0x09, 0x31 bytes
 
208
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
215
209
            # Copy of second parent 'different' range
216
 
            b'\x91\x3c\x2b'  # copy, offset 0x3c, 0x2b bytes
 
210
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
217
211
            ])
218
212
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
219
213
        self.assertEqual(sum(map(len, expected_lines)), end_point)
226
220
    def test_stats(self):
227
221
        compressor = self.compressor()
228
222
        compressor.compress(('label',),
229
 
                            b'strange\n'
230
 
                            b'common very very long line\n'
231
 
                            b'plus more text\n', None)
 
223
                            'strange\n'
 
224
                            'common very very long line\n'
 
225
                            'plus more text\n', None)
232
226
        compressor.compress(('newlabel',),
233
 
                            b'common very very long line\n'
234
 
                            b'plus more text\n'
235
 
                            b'different\n'
236
 
                            b'moredifferent\n', None)
 
227
                            'common very very long line\n'
 
228
                            'plus more text\n'
 
229
                            'different\n'
 
230
                            'moredifferent\n', None)
237
231
        compressor.compress(('label3',),
238
 
                            b'new\n'
239
 
                            b'common very very long line\n'
240
 
                            b'plus more text\n'
241
 
                            b'different\n'
242
 
                            b'moredifferent\n', None)
 
232
                            'new\n'
 
233
                            'common very very long line\n'
 
234
                            'plus more text\n'
 
235
                            'different\n'
 
236
                            'moredifferent\n', None)
243
237
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
244
238
 
245
239
    def test_two_nosha_delta(self):
246
240
        compressor = self.compressor()
247
241
        sha1_1, _, _, _ = compressor.compress(('label',),
248
 
                                              b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
249
243
        expected_lines = list(compressor.chunks)
250
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
251
 
                                                                b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
 
        self.assertEqual(sha_string(b'common long line\n'
253
 
                                    b'that needs a 16 byte match\n'
254
 
                                    b'different\n'), sha1_2)
 
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
246
        self.assertEqual(sha_string('common long line\n'
 
247
                                    'that needs a 16 byte match\n'
 
248
                                    'different\n'), sha1_2)
255
249
        expected_lines.extend([
256
250
            # 'delta', delta length
257
 
            b'd\x0f',
 
251
            'd\x0f',
258
252
            # target length
259
 
            b'\x36',
 
253
            '\x36',
260
254
            # copy the line common
261
 
            b'\x91\x0a\x2c',  # copy, offset 0x0a, len 0x2c
 
255
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
262
256
            # add the line different, and the trailing newline
263
 
            b'\x0adifferent\n',  # insert 10 bytes
 
257
            '\x0adifferent\n', # insert 10 bytes
264
258
            ])
265
259
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
266
260
        self.assertEqual(sum(map(len, expected_lines)), end_point)
270
264
        # both parents.
271
265
        compressor = self.compressor()
272
266
        sha1_1, _, _, _ = compressor.compress(('label',),
273
 
                                              b'strange\ncommon very very long line\nwith some extra text\n', None)
 
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
274
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
275
 
                                              b'different\nmoredifferent\nand then some more\n', None)
 
269
            'different\nmoredifferent\nand then some more\n', None)
276
270
        expected_lines = list(compressor.chunks)
277
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
278
 
                                                                b'new\ncommon very very long line\nwith some extra text\n'
279
 
                                                                b'different\nmoredifferent\nand then some more\n',
280
 
                                                                None)
 
272
            'new\ncommon very very long line\nwith some extra text\n'
 
273
            'different\nmoredifferent\nand then some more\n',
 
274
            None)
281
275
        self.assertEqual(
282
 
            sha_string(b'new\ncommon very very long line\nwith some extra text\n'
283
 
                       b'different\nmoredifferent\nand then some more\n'),
 
276
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
277
                       'different\nmoredifferent\nand then some more\n'),
284
278
            sha1_3)
285
279
        expected_lines.extend([
286
280
            # 'delta', delta length
287
 
            b'd\x0c',
 
281
            'd\x0c',
288
282
            # target length
289
 
            b'\x5f'
 
283
            '\x5f'
290
284
            # insert new
291
 
            b'\x04new\n',
 
285
            '\x04new\n',
292
286
            # Copy of first parent 'common' range
293
 
            b'\x91\x0a\x30'  # copy, offset 0x0a, 0x30 bytes
 
287
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
294
288
            # Copy of second parent 'different' range
295
 
            b'\x91\x3c\x2b'  # copy, offset 0x3c, 0x2b bytes
 
289
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
296
290
            ])
297
291
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
298
292
        self.assertEqual(sum(map(len, expected_lines)), end_point)
307
301
        for key in sorted(key_to_text):
308
302
            compressor.compress(key, key_to_text[key], None)
309
303
        locs = dict((key, (start, end)) for key, (start, _, end, _)
310
 
                    in compressor.labels_deltas.items())
 
304
                    in compressor.labels_deltas.iteritems())
311
305
        block = compressor.flush()
312
306
        raw_bytes = block.to_bytes()
313
307
        # Go through from_bytes(to_bytes()) so that we start with a compressed
316
310
 
317
311
    def test_from_empty_bytes(self):
318
312
        self.assertRaises(ValueError,
319
 
                          groupcompress.GroupCompressBlock.from_bytes, b'')
 
313
                          groupcompress.GroupCompressBlock.from_bytes, '')
320
314
 
321
315
    def test_from_minimal_bytes(self):
322
316
        block = groupcompress.GroupCompressBlock.from_bytes(
323
 
            b'gcb1z\n0\n0\n')
 
317
            'gcb1z\n0\n0\n')
324
318
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
325
319
        self.assertIs(None, block._content)
326
 
        self.assertEqual(b'', block._z_content)
 
320
        self.assertEqual('', block._z_content)
327
321
        block._ensure_content()
328
 
        self.assertEqual(b'', block._content)
329
 
        self.assertEqual(b'', block._z_content)
330
 
        block._ensure_content()  # Ensure content is safe to call 2x
 
322
        self.assertEqual('', block._content)
 
323
        self.assertEqual('', block._z_content)
 
324
        block._ensure_content() # Ensure content is safe to call 2x
331
325
 
332
326
    def test_from_invalid(self):
333
327
        self.assertRaises(ValueError,
334
328
                          groupcompress.GroupCompressBlock.from_bytes,
335
 
                          b'this is not a valid header')
 
329
                          'this is not a valid header')
336
330
 
337
331
    def test_from_bytes(self):
338
 
        content = (b'a tiny bit of content\n')
 
332
        content = ('a tiny bit of content\n')
339
333
        z_content = zlib.compress(content)
340
334
        z_bytes = (
341
 
            b'gcb1z\n'  # group compress block v1 plain
342
 
            b'%d\n'  # Length of compressed content
343
 
            b'%d\n'  # Length of uncompressed content
344
 
            b'%s'   # Compressed content
 
335
            'gcb1z\n' # group compress block v1 plain
 
336
            '%d\n' # Length of compressed content
 
337
            '%d\n' # Length of uncompressed content
 
338
            '%s'   # Compressed content
345
339
            ) % (len(z_content), len(content), z_content)
346
340
        block = groupcompress.GroupCompressBlock.from_bytes(
347
341
            z_bytes)
353
347
        self.assertEqual(z_content, block._z_content)
354
348
        self.assertEqual(content, block._content)
355
349
 
356
 
    def test_to_chunks(self):
357
 
        content_chunks = [b'this is some content\n',
358
 
                          b'this content will be compressed\n']
359
 
        content_len = sum(map(len, content_chunks))
360
 
        content = b''.join(content_chunks)
361
 
        gcb = groupcompress.GroupCompressBlock()
362
 
        gcb.set_chunked_content(content_chunks, content_len)
363
 
        total_len, block_chunks = gcb.to_chunks()
364
 
        block_bytes = b''.join(block_chunks)
365
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
366
 
        self.assertEqual(total_len, len(block_bytes))
367
 
        self.assertEqual(gcb._content_length, content_len)
368
 
        expected_header = (b'gcb1z\n'  # group compress block v1 zlib
369
 
                           b'%d\n'  # Length of compressed content
370
 
                           b'%d\n'  # Length of uncompressed content
371
 
                           ) % (gcb._z_content_length, gcb._content_length)
372
 
        # The first chunk should be the header chunk. It is small, fixed size,
373
 
        # and there is no compelling reason to split it up
374
 
        self.assertEqual(expected_header, block_chunks[0])
375
 
        self.assertStartsWith(block_bytes, expected_header)
376
 
        remaining_bytes = block_bytes[len(expected_header):]
377
 
        raw_bytes = zlib.decompress(remaining_bytes)
378
 
        self.assertEqual(content, raw_bytes)
379
 
 
380
350
    def test_to_bytes(self):
381
 
        content = (b'this is some content\n'
382
 
                   b'this content will be compressed\n')
 
351
        content = ('this is some content\n'
 
352
                   'this content will be compressed\n')
383
353
        gcb = groupcompress.GroupCompressBlock()
384
354
        gcb.set_content(content)
385
 
        data = gcb.to_bytes()
 
355
        bytes = gcb.to_bytes()
386
356
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
387
357
        self.assertEqual(gcb._content_length, len(content))
388
 
        expected_header = (b'gcb1z\n'  # group compress block v1 zlib
389
 
                           b'%d\n'  # Length of compressed content
390
 
                           b'%d\n'  # Length of uncompressed content
391
 
                           ) % (gcb._z_content_length, gcb._content_length)
392
 
        self.assertStartsWith(data, expected_header)
393
 
        remaining_bytes = data[len(expected_header):]
 
358
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
359
                          '%d\n' # Length of compressed content
 
360
                          '%d\n' # Length of uncompressed content
 
361
                         ) % (gcb._z_content_length, gcb._content_length)
 
362
        self.assertStartsWith(bytes, expected_header)
 
363
        remaining_bytes = bytes[len(expected_header):]
394
364
        raw_bytes = zlib.decompress(remaining_bytes)
395
365
        self.assertEqual(content, raw_bytes)
396
366
 
397
367
        # we should get the same results if using the chunked version
398
368
        gcb = groupcompress.GroupCompressBlock()
399
 
        gcb.set_chunked_content([b'this is some content\n'
400
 
                                 b'this content will be compressed\n'],
401
 
                                len(content))
402
 
        old_data = data
403
 
        data = gcb.to_bytes()
404
 
        self.assertEqual(old_data, data)
 
369
        gcb.set_chunked_content(['this is some content\n'
 
370
                                 'this content will be compressed\n'],
 
371
                                 len(content))
 
372
        old_bytes = bytes
 
373
        bytes = gcb.to_bytes()
 
374
        self.assertEqual(old_bytes, bytes)
405
375
 
406
376
    def test_partial_decomp(self):
407
377
        content_chunks = []
409
379
        # partial decompression to work with. Most auto-generated data
410
380
        # compresses a bit too well, we want a combination, so we combine a sha
411
381
        # hash with compressible data.
412
 
        for i in range(2048):
413
 
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
 
382
        for i in xrange(2048):
 
383
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
414
384
            content_chunks.append(next_content)
415
385
            next_sha1 = osutils.sha_string(next_content)
416
 
            content_chunks.append(next_sha1 + b'\n')
417
 
        content = b''.join(content_chunks)
 
386
            content_chunks.append(next_sha1 + '\n')
 
387
        content = ''.join(content_chunks)
418
388
        self.assertEqual(158634, len(content))
419
389
        z_content = zlib.compress(content)
420
390
        self.assertEqual(57182, len(z_content))
421
391
        block = groupcompress.GroupCompressBlock()
422
 
        block._z_content_chunks = (z_content,)
 
392
        block._z_content = z_content
423
393
        block._z_content_length = len(z_content)
424
394
        block._compressor_name = 'zlib'
425
395
        block._content_length = 158634
454
424
        # partial decompression to work with. Most auto-generated data
455
425
        # compresses a bit too well, we want a combination, so we combine a sha
456
426
        # hash with compressible data.
457
 
        for i in range(2048):
458
 
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
 
427
        for i in xrange(2048):
 
428
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
459
429
            content_chunks.append(next_content)
460
430
            next_sha1 = osutils.sha_string(next_content)
461
 
            content_chunks.append(next_sha1 + b'\n')
462
 
        content = b''.join(content_chunks)
 
431
            content_chunks.append(next_sha1 + '\n')
 
432
        content = ''.join(content_chunks)
463
433
        self.assertEqual(158634, len(content))
464
434
        z_content = zlib.compress(content)
465
435
        self.assertEqual(57182, len(z_content))
466
436
        block = groupcompress.GroupCompressBlock()
467
 
        block._z_content_chunks = (z_content,)
 
437
        block._z_content = z_content
468
438
        block._z_content_length = len(z_content)
469
439
        block._compressor_name = 'zlib'
470
440
        block._content_length = 158634
477
447
        self.assertIs(None, block._z_content_decompressor)
478
448
 
479
449
    def test__dump(self):
480
 
        dup_content = b'some duplicate content\nwhich is sufficiently long\n'
481
 
        key_to_text = {(b'1',): dup_content + b'1 unique\n',
482
 
                       (b'2',): dup_content + b'2 extra special\n'}
 
450
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
451
        key_to_text = {('1',): dup_content + '1 unique\n',
 
452
                       ('2',): dup_content + '2 extra special\n'}
483
453
        locs, block = self.make_block(key_to_text)
484
 
        self.assertEqual([(b'f', len(key_to_text[(b'1',)])),
485
 
                          (b'd', 21, len(key_to_text[(b'2',)]),
486
 
                           [(b'c', 2, len(dup_content)),
487
 
                            (b'i', len(b'2 extra special\n'), b'')
488
 
                            ]),
489
 
                          ], block._dump())
 
454
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
455
                          ('d', 21, len(key_to_text[('2',)]),
 
456
                           [('c', 2, len(dup_content)),
 
457
                            ('i', len('2 extra special\n'), '')
 
458
                           ]),
 
459
                         ], block._dump())
490
460
 
491
461
 
492
462
class TestCaseWithGroupCompressVersionedFiles(
497
467
        t = self.get_transport(dir)
498
468
        t.ensure_base()
499
469
        vf = groupcompress.make_pack_factory(graph=create_graph,
500
 
                                             delta=False, keylength=keylength,
501
 
                                             inconsistency_fatal=inconsistency_fatal)(t)
 
470
            delta=False, keylength=keylength,
 
471
            inconsistency_fatal=inconsistency_fatal)(t)
502
472
        if do_cleanup:
503
473
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
504
474
        return vf
517
487
 
518
488
    def make_g_index_missing_parent(self):
519
489
        graph_index = self.make_g_index('missing_parent', 1,
520
 
                                        [((b'parent', ), b'2 78 2 10', ([],)),
521
 
                                         ((b'tip', ), b'2 78 2 10',
522
 
                                            ([(b'parent', ), (b'missing-parent', )],)),
523
 
                                         ])
 
490
            [(('parent', ), '2 78 2 10', ([],)),
 
491
             (('tip', ), '2 78 2 10',
 
492
              ([('parent', ), ('missing-parent', )],)),
 
493
              ])
524
494
        return graph_index
525
495
 
526
496
    def test_get_record_stream_as_requested(self):
527
497
        # Consider promoting 'as-requested' to general availability, and
528
498
        # make this a VF interface test
529
499
        vf = self.make_test_vf(False, dir='source')
530
 
        vf.add_lines((b'a',), (), [b'lines\n'])
531
 
        vf.add_lines((b'b',), (), [b'lines\n'])
532
 
        vf.add_lines((b'c',), (), [b'lines\n'])
533
 
        vf.add_lines((b'd',), (), [b'lines\n'])
 
500
        vf.add_lines(('a',), (), ['lines\n'])
 
501
        vf.add_lines(('b',), (), ['lines\n'])
 
502
        vf.add_lines(('c',), (), ['lines\n'])
 
503
        vf.add_lines(('d',), (), ['lines\n'])
534
504
        vf.writer.end()
535
505
        keys = [record.key for record in vf.get_record_stream(
536
 
            [(b'a',), (b'b',), (b'c',), (b'd',)],
537
 
            'as-requested', False)]
538
 
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
 
506
                    [('a',), ('b',), ('c',), ('d',)],
 
507
                    'as-requested', False)]
 
508
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
539
509
        keys = [record.key for record in vf.get_record_stream(
540
 
            [(b'b',), (b'a',), (b'd',), (b'c',)],
541
 
            'as-requested', False)]
542
 
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
 
510
                    [('b',), ('a',), ('d',), ('c',)],
 
511
                    'as-requested', False)]
 
512
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
543
513
 
544
514
        # It should work even after being repacked into another VF
545
515
        vf2 = self.make_test_vf(False, dir='target')
546
516
        vf2.insert_record_stream(vf.get_record_stream(
547
 
            [(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
 
517
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
548
518
        vf2.writer.end()
549
519
 
550
520
        keys = [record.key for record in vf2.get_record_stream(
551
 
            [(b'a',), (b'b',), (b'c',), (b'd',)],
552
 
            'as-requested', False)]
553
 
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
 
521
                    [('a',), ('b',), ('c',), ('d',)],
 
522
                    'as-requested', False)]
 
523
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
554
524
        keys = [record.key for record in vf2.get_record_stream(
555
 
            [(b'b',), (b'a',), (b'd',), (b'c',)],
556
 
            'as-requested', False)]
557
 
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
558
 
 
559
 
    def test_get_record_stream_max_bytes_to_index_default(self):
560
 
        vf = self.make_test_vf(True, dir='source')
561
 
        vf.add_lines((b'a',), (), [b'lines\n'])
562
 
        vf.writer.end()
563
 
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
564
 
        self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
565
 
                         record._manager._get_compressor_settings())
566
 
 
567
 
    def test_get_record_stream_accesses_compressor_settings(self):
568
 
        vf = self.make_test_vf(True, dir='source')
569
 
        vf.add_lines((b'a',), (), [b'lines\n'])
570
 
        vf.writer.end()
571
 
        vf._max_bytes_to_index = 1234
572
 
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
573
 
        self.assertEqual(dict(max_bytes_to_index=1234),
574
 
                         record._manager._get_compressor_settings())
575
 
 
576
 
    @staticmethod
577
 
    def grouped_stream(revision_ids, first_parents=()):
578
 
        parents = first_parents
579
 
        for revision_id in revision_ids:
580
 
            key = (revision_id,)
581
 
            record = versionedfile.FulltextContentFactory(
582
 
                key, parents, None,
583
 
                b'some content that is\n'
584
 
                b'identical except for\n'
585
 
                b'revision_id:%s\n' % (revision_id,))
586
 
            yield record
587
 
            parents = (key,)
 
525
                    [('b',), ('a',), ('d',), ('c',)],
 
526
                    'as-requested', False)]
 
527
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
588
528
 
589
529
    def test_insert_record_stream_reuses_blocks(self):
590
530
        vf = self.make_test_vf(True, dir='source')
 
531
        def grouped_stream(revision_ids, first_parents=()):
 
532
            parents = first_parents
 
533
            for revision_id in revision_ids:
 
534
                key = (revision_id,)
 
535
                record = versionedfile.FulltextContentFactory(
 
536
                    key, parents, None,
 
537
                    'some content that is\n'
 
538
                    'identical except for\n'
 
539
                    'revision_id:%s\n' % (revision_id,))
 
540
                yield record
 
541
                parents = (key,)
591
542
        # One group, a-d
592
 
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
 
543
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
593
544
        # Second group, e-h
594
 
        vf.insert_record_stream(self.grouped_stream(
595
 
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
 
545
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
546
                                               first_parents=(('d',),)))
596
547
        block_bytes = {}
597
 
        stream = vf.get_record_stream(
598
 
            [(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
 
548
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
549
                                      'unordered', False)
599
550
        num_records = 0
600
551
        for record in stream:
601
 
            if record.key in [(b'a',), (b'e',)]:
 
552
            if record.key in [('a',), ('e',)]:
602
553
                self.assertEqual('groupcompress-block', record.storage_kind)
603
554
            else:
604
555
                self.assertEqual('groupcompress-block-ref',
607
558
            num_records += 1
608
559
        self.assertEqual(8, num_records)
609
560
        for r in 'abcd':
610
 
            key = (r.encode(),)
611
 
            self.assertIs(block_bytes[key], block_bytes[(b'a',)])
612
 
            self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
 
561
            key = (r,)
 
562
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
563
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
613
564
        for r in 'efgh':
614
 
            key = (r.encode(),)
615
 
            self.assertIs(block_bytes[key], block_bytes[(b'e',)])
616
 
            self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
 
565
            key = (r,)
 
566
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
567
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
617
568
        # Now copy the blocks into another vf, and ensure that the blocks are
618
569
        # preserved without creating new entries
619
570
        vf2 = self.make_test_vf(True, dir='target')
620
 
        keys = [(r.encode(),) for r in 'abcdefgh']
621
571
        # ordering in 'groupcompress' order, should actually swap the groups in
622
572
        # the target vf, but the groups themselves should not be disturbed.
623
 
 
624
573
        def small_size_stream():
625
 
            for record in vf.get_record_stream(keys, 'groupcompress', False):
 
574
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
575
                                               'groupcompress', False):
626
576
                record._manager._full_enough_block_size = \
627
577
                    record._manager._block._content_length
628
578
                yield record
629
 
 
 
579
                        
630
580
        vf2.insert_record_stream(small_size_stream())
631
 
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
 
581
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
582
                                       'groupcompress', False)
632
583
        vf2.writer.end()
633
584
        num_records = 0
634
585
        for record in stream:
639
590
 
640
591
    def test_insert_record_stream_packs_on_the_fly(self):
641
592
        vf = self.make_test_vf(True, dir='source')
 
593
        def grouped_stream(revision_ids, first_parents=()):
 
594
            parents = first_parents
 
595
            for revision_id in revision_ids:
 
596
                key = (revision_id,)
 
597
                record = versionedfile.FulltextContentFactory(
 
598
                    key, parents, None,
 
599
                    'some content that is\n'
 
600
                    'identical except for\n'
 
601
                    'revision_id:%s\n' % (revision_id,))
 
602
                yield record
 
603
                parents = (key,)
642
604
        # One group, a-d
643
 
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
 
605
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
644
606
        # Second group, e-h
645
 
        vf.insert_record_stream(self.grouped_stream(
646
 
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
 
607
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
608
                                               first_parents=(('d',),)))
647
609
        # Now copy the blocks into another vf, and see that the
648
610
        # insert_record_stream rebuilt a new block on-the-fly because of
649
611
        # under-utilization
650
612
        vf2 = self.make_test_vf(True, dir='target')
651
 
        keys = [(r.encode(),) for r in 'abcdefgh']
652
613
        vf2.insert_record_stream(vf.get_record_stream(
653
 
            keys, 'groupcompress', False))
654
 
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
 
614
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
615
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
616
                                       'groupcompress', False)
655
617
        vf2.writer.end()
656
618
        num_records = 0
657
619
        # All of the records should be recombined into a single block
666
628
 
667
629
    def test__insert_record_stream_no_reuse_block(self):
668
630
        vf = self.make_test_vf(True, dir='source')
 
631
        def grouped_stream(revision_ids, first_parents=()):
 
632
            parents = first_parents
 
633
            for revision_id in revision_ids:
 
634
                key = (revision_id,)
 
635
                record = versionedfile.FulltextContentFactory(
 
636
                    key, parents, None,
 
637
                    'some content that is\n'
 
638
                    'identical except for\n'
 
639
                    'revision_id:%s\n' % (revision_id,))
 
640
                yield record
 
641
                parents = (key,)
669
642
        # One group, a-d
670
 
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
 
643
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
671
644
        # Second group, e-h
672
 
        vf.insert_record_stream(self.grouped_stream(
673
 
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
 
645
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
646
                                               first_parents=(('d',),)))
674
647
        vf.writer.end()
675
 
        keys = [(r.encode(),) for r in 'abcdefgh']
676
 
        self.assertEqual(8, len(list(
677
 
            vf.get_record_stream(keys, 'unordered', False))))
 
648
        self.assertEqual(8, len(list(vf.get_record_stream(
 
649
                                        [(r,) for r in 'abcdefgh'],
 
650
                                        'unordered', False))))
678
651
        # Now copy the blocks into another vf, and ensure that the blocks are
679
652
        # preserved without creating new entries
680
653
        vf2 = self.make_test_vf(True, dir='target')
681
654
        # ordering in 'groupcompress' order, should actually swap the groups in
682
655
        # the target vf, but the groups themselves should not be disturbed.
683
656
        list(vf2._insert_record_stream(vf.get_record_stream(
684
 
            keys, 'groupcompress', False),
 
657
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
685
658
            reuse_blocks=False))
686
659
        vf2.writer.end()
687
660
        # After inserting with reuse_blocks=False, we should have everything in
688
661
        # a single new block.
689
 
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
 
662
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
663
                                       'groupcompress', False)
690
664
        block = None
691
665
        for record in stream:
692
666
            if block is None:
698
672
        unvalidated = self.make_g_index_missing_parent()
699
673
        combined = _mod_index.CombinedGraphIndex([unvalidated])
700
674
        index = groupcompress._GCGraphIndex(combined,
701
 
                                            is_locked=lambda: True, parents=True,
702
 
                                            track_external_parent_refs=True)
 
675
            is_locked=lambda: True, parents=True,
 
676
            track_external_parent_refs=True)
703
677
        index.scan_unvalidated_index(unvalidated)
704
678
        self.assertEqual(
705
 
            frozenset([(b'missing-parent',)]), index.get_missing_parents())
 
679
            frozenset([('missing-parent',)]), index.get_missing_parents())
706
680
 
707
681
    def test_track_external_parent_refs(self):
708
682
        g_index = self.make_g_index('empty', 1, [])
709
683
        mod_index = btree_index.BTreeBuilder(1, 1)
710
684
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
711
685
        index = groupcompress._GCGraphIndex(combined,
712
 
                                            is_locked=lambda: True, parents=True,
713
 
                                            add_callback=mod_index.add_nodes,
714
 
                                            track_external_parent_refs=True)
 
686
            is_locked=lambda: True, parents=True,
 
687
            add_callback=mod_index.add_nodes,
 
688
            track_external_parent_refs=True)
715
689
        index.add_records([
716
 
            ((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
 
690
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
717
691
        self.assertEqual(
718
 
            frozenset([(b'parent-1',), (b'parent-2',)]),
 
692
            frozenset([('parent-1',), ('parent-2',)]),
719
693
            index.get_missing_parents())
720
694
 
721
695
    def make_source_with_b(self, a_parent, path):
722
696
        source = self.make_test_vf(True, dir=path)
723
 
        source.add_lines((b'a',), (), [b'lines\n'])
 
697
        source.add_lines(('a',), (), ['lines\n'])
724
698
        if a_parent:
725
 
            b_parents = ((b'a',),)
 
699
            b_parents = (('a',),)
726
700
        else:
727
701
            b_parents = ()
728
 
        source.add_lines((b'b',), b_parents, [b'lines\n'])
 
702
        source.add_lines(('b',), b_parents, ['lines\n'])
729
703
        return source
730
704
 
731
705
    def do_inconsistent_inserts(self, inconsistency_fatal):
732
706
        target = self.make_test_vf(True, dir='target',
733
707
                                   inconsistency_fatal=inconsistency_fatal)
734
708
        for x in range(2):
735
 
            source = self.make_source_with_b(x == 1, 'source%s' % x)
 
709
            source = self.make_source_with_b(x==1, 'source%s' % x)
736
710
            target.insert_record_stream(source.get_record_stream(
737
 
                [(b'b',)], 'unordered', False))
 
711
                [('b',)], 'unordered', False))
738
712
 
739
713
    def test_inconsistent_redundant_inserts_warn(self):
740
714
        """Should not insert a record that is already present."""
741
715
        warnings = []
742
 
 
743
716
        def warning(template, args):
744
717
            warnings.append(template % args)
745
718
        _trace_warning = trace.warning
748
721
            self.do_inconsistent_inserts(inconsistency_fatal=False)
749
722
        finally:
750
723
            trace.warning = _trace_warning
751
 
        self.assertContainsRe(
752
 
            "\n".join(warnings),
753
 
            r"^inconsistent details in skipped record: \(b?'b',\)"
754
 
            r" \(b?'42 32 0 8', \(\(\),\)\)"
755
 
            r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
 
724
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
725
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
726
                         warnings)
756
727
 
757
728
    def test_inconsistent_redundant_inserts_raises(self):
758
 
        e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
 
729
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
759
730
                              inconsistency_fatal=True)
760
 
        self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
761
 
                              r" in add_records:"
762
 
                              r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
763
 
                              r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
 
731
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
732
                              " in add_records:"
 
733
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
734
                              " 0 8', \(\(\('a',\),\),\)\)")
764
735
 
765
736
    def test_clear_cache(self):
766
737
        vf = self.make_source_with_b(True, 'source')
767
738
        vf.writer.end()
768
 
        for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
 
739
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
769
740
                                           True):
770
741
            pass
771
742
        self.assertTrue(len(vf._group_cache) > 0)
773
744
        self.assertEqual(0, len(vf._group_cache))
774
745
 
775
746
 
776
 
class TestGroupCompressConfig(tests.TestCaseWithTransport):
777
 
 
778
 
    def make_test_vf(self):
779
 
        t = self.get_transport('.')
780
 
        t.ensure_base()
781
 
        factory = groupcompress.make_pack_factory(graph=True,
782
 
                                                  delta=False, keylength=1, inconsistency_fatal=True)
783
 
        vf = factory(t)
784
 
        self.addCleanup(groupcompress.cleanup_pack_group, vf)
785
 
        return vf
786
 
 
787
 
    def test_max_bytes_to_index_default(self):
788
 
        vf = self.make_test_vf()
789
 
        gc = vf._make_group_compressor()
790
 
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
791
 
                         vf._max_bytes_to_index)
792
 
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
793
 
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
794
 
                             gc._delta_index._max_bytes_to_index)
795
 
 
796
 
    def test_max_bytes_to_index_in_config(self):
797
 
        c = config.GlobalConfig()
798
 
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
799
 
        vf = self.make_test_vf()
800
 
        gc = vf._make_group_compressor()
801
 
        self.assertEqual(10000, vf._max_bytes_to_index)
802
 
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
803
 
            self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
804
 
 
805
 
    def test_max_bytes_to_index_bad_config(self):
806
 
        c = config.GlobalConfig()
807
 
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
808
 
        vf = self.make_test_vf()
809
 
        # TODO: This is triggering a warning, we might want to trap and make
810
 
        #       sure it is readable.
811
 
        gc = vf._make_group_compressor()
812
 
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
813
 
                         vf._max_bytes_to_index)
814
 
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
815
 
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
816
 
                             gc._delta_index._max_bytes_to_index)
817
 
 
818
747
 
819
748
class StubGCVF(object):
820
749
    def __init__(self, canned_get_blocks=None):
821
750
        self._group_cache = {}
822
751
        self._canned_get_blocks = canned_get_blocks or []
823
 
 
824
752
    def _get_blocks(self, read_memos):
825
753
        return iter(self._canned_get_blocks)
826
 
 
 
754
    
827
755
 
828
756
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
829
757
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
830
 
 
 
758
    
831
759
    def test_add_key_new_read_memo(self):
832
760
        """Adding a key with an uncached read_memo new to this batch adds that
833
761
        read_memo to the list of memos to fetch.
891
819
                (read_memo1, groupcompress.GroupCompressBlock()),
892
820
                (read_memo2, groupcompress.GroupCompressBlock())])
893
821
        locations = {
894
 
            ('key1',): (read_memo1 + (0, 0), None, None, None),
895
 
            ('key2',): (read_memo2 + (0, 0), None, None, None)}
 
822
            ('key1',): (read_memo1 + (None, None), None, None, None),
 
823
            ('key2',): (read_memo2 + (None, None), None, None, None)}
896
824
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
897
825
        batcher.add_key(('key1',))
898
826
        batcher.add_key(('key2',))
912
840
        gcvf = StubGCVF()
913
841
        gcvf._group_cache[read_memo] = fake_block
914
842
        locations = {
915
 
            ('key',): (read_memo + (0, 0), None, None, None)}
 
843
            ('key',): (read_memo + (None, None), None, None, None)}
916
844
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
917
845
        batcher.add_key(('key',))
918
846
        self.assertEqual([], list(batcher.yield_factories()))
925
853
class TestLazyGroupCompress(tests.TestCaseWithTransport):
926
854
 
927
855
    _texts = {
928
 
        (b'key1',): b"this is a text\n"
929
 
        b"with a reasonable amount of compressible bytes\n"
930
 
        b"which can be shared between various other texts\n",
931
 
        (b'key2',): b"another text\n"
932
 
        b"with a reasonable amount of compressible bytes\n"
933
 
        b"which can be shared between various other texts\n",
934
 
        (b'key3',): b"yet another text which won't be extracted\n"
935
 
        b"with a reasonable amount of compressible bytes\n"
936
 
        b"which can be shared between various other texts\n",
937
 
        (b'key4',): b"this will be extracted\n"
938
 
        b"but references most of its bytes from\n"
939
 
        b"yet another text which won't be extracted\n"
940
 
        b"with a reasonable amount of compressible bytes\n"
941
 
        b"which can be shared between various other texts\n",
 
856
        ('key1',): "this is a text\n"
 
857
                   "with a reasonable amount of compressible bytes\n"
 
858
                   "which can be shared between various other texts\n",
 
859
        ('key2',): "another text\n"
 
860
                   "with a reasonable amount of compressible bytes\n"
 
861
                   "which can be shared between various other texts\n",
 
862
        ('key3',): "yet another text which won't be extracted\n"
 
863
                   "with a reasonable amount of compressible bytes\n"
 
864
                   "which can be shared between various other texts\n",
 
865
        ('key4',): "this will be extracted\n"
 
866
                   "but references most of its bytes from\n"
 
867
                   "yet another text which won't be extracted\n"
 
868
                   "with a reasonable amount of compressible bytes\n"
 
869
                   "which can be shared between various other texts\n",
942
870
    }
943
 
 
944
871
    def make_block(self, key_to_text):
945
872
        """Create a GroupCompressBlock, filling it with the given texts."""
946
873
        compressor = groupcompress.GroupCompressor()
948
875
        for key in sorted(key_to_text):
949
876
            compressor.compress(key, key_to_text[key], None)
950
877
        locs = dict((key, (start, end)) for key, (start, _, end, _)
951
 
                    in compressor.labels_deltas.items())
 
878
                    in compressor.labels_deltas.iteritems())
952
879
        block = compressor.flush()
953
880
        raw_bytes = block.to_bytes()
954
881
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
967
894
    def test_get_fulltexts(self):
968
895
        locations, block = self.make_block(self._texts)
969
896
        manager = groupcompress._LazyGroupContentManager(block)
970
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
971
 
        self.add_key_to_manager((b'key2',), locations, block, manager)
 
897
        self.add_key_to_manager(('key1',), locations, block, manager)
 
898
        self.add_key_to_manager(('key2',), locations, block, manager)
972
899
        result_order = []
973
900
        for record in manager.get_record_stream():
974
901
            result_order.append(record.key)
975
902
            text = self._texts[record.key]
976
903
            self.assertEqual(text, record.get_bytes_as('fulltext'))
977
 
        self.assertEqual([(b'key1',), (b'key2',)], result_order)
 
904
        self.assertEqual([('key1',), ('key2',)], result_order)
978
905
 
979
906
        # If we build the manager in the opposite order, we should get them
980
907
        # back in the opposite order
981
908
        manager = groupcompress._LazyGroupContentManager(block)
982
 
        self.add_key_to_manager((b'key2',), locations, block, manager)
983
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
909
        self.add_key_to_manager(('key2',), locations, block, manager)
 
910
        self.add_key_to_manager(('key1',), locations, block, manager)
984
911
        result_order = []
985
912
        for record in manager.get_record_stream():
986
913
            result_order.append(record.key)
987
914
            text = self._texts[record.key]
988
915
            self.assertEqual(text, record.get_bytes_as('fulltext'))
989
 
        self.assertEqual([(b'key2',), (b'key1',)], result_order)
 
916
        self.assertEqual([('key2',), ('key1',)], result_order)
990
917
 
991
918
    def test__wire_bytes_no_keys(self):
992
919
        locations, block = self.make_block(self._texts)
996
923
        # We should have triggered a strip, since we aren't using any content
997
924
        stripped_block = manager._block.to_bytes()
998
925
        self.assertTrue(block_length > len(stripped_block))
999
 
        empty_z_header = zlib.compress(b'')
1000
 
        self.assertEqual(b'groupcompress-block\n'
1001
 
                         b'8\n'  # len(compress(''))
1002
 
                         b'0\n'  # len('')
1003
 
                         b'%d\n'  # compressed block len
1004
 
                         b'%s'  # zheader
1005
 
                         b'%s'  # block
 
926
        empty_z_header = zlib.compress('')
 
927
        self.assertEqual('groupcompress-block\n'
 
928
                         '8\n' # len(compress(''))
 
929
                         '0\n' # len('')
 
930
                         '%d\n'# compressed block len
 
931
                         '%s'  # zheader
 
932
                         '%s'  # block
1006
933
                         % (len(stripped_block), empty_z_header,
1007
934
                            stripped_block),
1008
935
                         wire_bytes)
1010
937
    def test__wire_bytes(self):
1011
938
        locations, block = self.make_block(self._texts)
1012
939
        manager = groupcompress._LazyGroupContentManager(block)
1013
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
1014
 
        self.add_key_to_manager((b'key4',), locations, block, manager)
 
940
        self.add_key_to_manager(('key1',), locations, block, manager)
 
941
        self.add_key_to_manager(('key4',), locations, block, manager)
1015
942
        block_bytes = block.to_bytes()
1016
943
        wire_bytes = manager._wire_bytes()
1017
944
        (storage_kind, z_header_len, header_len,
1018
 
         block_len, rest) = wire_bytes.split(b'\n', 4)
 
945
         block_len, rest) = wire_bytes.split('\n', 4)
1019
946
        z_header_len = int(z_header_len)
1020
947
        header_len = int(header_len)
1021
948
        block_len = int(block_len)
1022
 
        self.assertEqual(b'groupcompress-block', storage_kind)
 
949
        self.assertEqual('groupcompress-block', storage_kind)
1023
950
        self.assertEqual(34, z_header_len)
1024
951
        self.assertEqual(26, header_len)
1025
952
        self.assertEqual(len(block_bytes), block_len)
1026
953
        z_header = rest[:z_header_len]
1027
954
        header = zlib.decompress(z_header)
1028
955
        self.assertEqual(header_len, len(header))
1029
 
        entry1 = locations[(b'key1',)]
1030
 
        entry4 = locations[(b'key4',)]
1031
 
        self.assertEqualDiff(b'key1\n'
1032
 
                             b'\n'  # no parents
1033
 
                             b'%d\n'  # start offset
1034
 
                             b'%d\n'  # end offset
1035
 
                             b'key4\n'
1036
 
                             b'\n'
1037
 
                             b'%d\n'
1038
 
                             b'%d\n'
 
956
        entry1 = locations[('key1',)]
 
957
        entry4 = locations[('key4',)]
 
958
        self.assertEqualDiff('key1\n'
 
959
                             '\n'  # no parents
 
960
                             '%d\n' # start offset
 
961
                             '%d\n' # end offset
 
962
                             'key4\n'
 
963
                             '\n'
 
964
                             '%d\n'
 
965
                             '%d\n'
1039
966
                             % (entry1[0], entry1[1],
1040
967
                                entry4[0], entry4[1]),
1041
 
                             header)
 
968
                            header)
1042
969
        z_block = rest[z_header_len:]
1043
970
        self.assertEqual(block_bytes, z_block)
1044
971
 
1045
972
    def test_from_bytes(self):
1046
973
        locations, block = self.make_block(self._texts)
1047
974
        manager = groupcompress._LazyGroupContentManager(block)
1048
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
1049
 
        self.add_key_to_manager((b'key4',), locations, block, manager)
 
975
        self.add_key_to_manager(('key1',), locations, block, manager)
 
976
        self.add_key_to_manager(('key4',), locations, block, manager)
1050
977
        wire_bytes = manager._wire_bytes()
1051
 
        self.assertStartsWith(wire_bytes, b'groupcompress-block\n')
 
978
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1052
979
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
1053
980
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
1054
981
        self.assertEqual(2, len(manager._factories))
1058
985
            result_order.append(record.key)
1059
986
            text = self._texts[record.key]
1060
987
            self.assertEqual(text, record.get_bytes_as('fulltext'))
1061
 
        self.assertEqual([(b'key1',), (b'key4',)], result_order)
 
988
        self.assertEqual([('key1',), ('key4',)], result_order)
1062
989
 
1063
990
    def test__check_rebuild_no_changes(self):
1064
991
        block, manager = self.make_block_and_full_manager(self._texts)
1069
996
        locations, block = self.make_block(self._texts)
1070
997
        manager = groupcompress._LazyGroupContentManager(block)
1071
998
        # Request just the first key, which should trigger a 'strip' action
1072
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
999
        self.add_key_to_manager(('key1',), locations, block, manager)
1073
1000
        manager._check_rebuild_block()
1074
1001
        self.assertIsNot(block, manager._block)
1075
1002
        self.assertTrue(block._content_length > manager._block._content_length)
1076
1003
        # We should be able to still get the content out of this block, though
1077
1004
        # it should only have 1 entry
1078
1005
        for record in manager.get_record_stream():
1079
 
            self.assertEqual((b'key1',), record.key)
 
1006
            self.assertEqual(('key1',), record.key)
1080
1007
            self.assertEqual(self._texts[record.key],
1081
1008
                             record.get_bytes_as('fulltext'))
1082
1009
 
1084
1011
        locations, block = self.make_block(self._texts)
1085
1012
        manager = groupcompress._LazyGroupContentManager(block)
1086
1013
        # Request a small key in the middle should trigger a 'rebuild'
1087
 
        self.add_key_to_manager((b'key4',), locations, block, manager)
 
1014
        self.add_key_to_manager(('key4',), locations, block, manager)
1088
1015
        manager._check_rebuild_block()
1089
1016
        self.assertIsNot(block, manager._block)
1090
1017
        self.assertTrue(block._content_length > manager._block._content_length)
1091
1018
        for record in manager.get_record_stream():
1092
 
            self.assertEqual((b'key4',), record.key)
 
1019
            self.assertEqual(('key4',), record.key)
1093
1020
            self.assertEqual(self._texts[record.key],
1094
1021
                             record.get_bytes_as('fulltext'))
1095
1022
 
1096
 
    def test_manager_default_compressor_settings(self):
1097
 
        locations, old_block = self.make_block(self._texts)
1098
 
        manager = groupcompress._LazyGroupContentManager(old_block)
1099
 
        gcvf = groupcompress.GroupCompressVersionedFiles
1100
 
        # It doesn't greedily evaluate _max_bytes_to_index
1101
 
        self.assertIs(None, manager._compressor_settings)
1102
 
        self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1103
 
                         manager._get_compressor_settings())
1104
 
 
1105
 
    def test_manager_custom_compressor_settings(self):
1106
 
        locations, old_block = self.make_block(self._texts)
1107
 
        called = []
1108
 
 
1109
 
        def compressor_settings():
1110
 
            called.append('called')
1111
 
            return (10,)
1112
 
        manager = groupcompress._LazyGroupContentManager(old_block,
1113
 
                                                         get_compressor_settings=compressor_settings)
1114
 
        gcvf = groupcompress.GroupCompressVersionedFiles
1115
 
        # It doesn't greedily evaluate compressor_settings
1116
 
        self.assertIs(None, manager._compressor_settings)
1117
 
        self.assertEqual((10,), manager._get_compressor_settings())
1118
 
        self.assertEqual((10,), manager._get_compressor_settings())
1119
 
        self.assertEqual((10,), manager._compressor_settings)
1120
 
        # Only called 1 time
1121
 
        self.assertEqual(['called'], called)
1122
 
 
1123
 
    def test__rebuild_handles_compressor_settings(self):
1124
 
        if not isinstance(groupcompress.GroupCompressor,
1125
 
                          groupcompress.PyrexGroupCompressor):
1126
 
            raise tests.TestNotApplicable('pure-python compressor'
1127
 
                                          ' does not handle compressor_settings')
1128
 
        locations, old_block = self.make_block(self._texts)
1129
 
        manager = groupcompress._LazyGroupContentManager(old_block,
1130
 
                                                         get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1131
 
        gc = manager._make_group_compressor()
1132
 
        self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1133
 
        self.add_key_to_manager((b'key3',), locations, old_block, manager)
1134
 
        self.add_key_to_manager((b'key4',), locations, old_block, manager)
1135
 
        action, last_byte, total_bytes = manager._check_rebuild_action()
1136
 
        self.assertEqual('rebuild', action)
1137
 
        manager._rebuild_block()
1138
 
        new_block = manager._block
1139
 
        self.assertIsNot(old_block, new_block)
1140
 
        # Because of the new max_bytes_to_index, we do a poor job of
1141
 
        # rebuilding. This is a side-effect of the change, but at least it does
1142
 
        # show the setting had an effect.
1143
 
        self.assertTrue(old_block._content_length < new_block._content_length)
1144
 
 
1145
1023
    def test_check_is_well_utilized_all_keys(self):
1146
1024
        block, manager = self.make_block_and_full_manager(self._texts)
1147
1025
        self.assertFalse(manager.check_is_well_utilized())
1158
1036
 
1159
1037
    def test_check_is_well_utilized_mixed_keys(self):
1160
1038
        texts = {}
1161
 
        f1k1 = (b'f1', b'k1')
1162
 
        f1k2 = (b'f1', b'k2')
1163
 
        f2k1 = (b'f2', b'k1')
1164
 
        f2k2 = (b'f2', b'k2')
1165
 
        texts[f1k1] = self._texts[(b'key1',)]
1166
 
        texts[f1k2] = self._texts[(b'key2',)]
1167
 
        texts[f2k1] = self._texts[(b'key3',)]
1168
 
        texts[f2k2] = self._texts[(b'key4',)]
 
1039
        f1k1 = ('f1', 'k1')
 
1040
        f1k2 = ('f1', 'k2')
 
1041
        f2k1 = ('f2', 'k1')
 
1042
        f2k2 = ('f2', 'k2')
 
1043
        texts[f1k1] = self._texts[('key1',)]
 
1044
        texts[f1k2] = self._texts[('key2',)]
 
1045
        texts[f2k1] = self._texts[('key3',)]
 
1046
        texts[f2k2] = self._texts[('key4',)]
1169
1047
        block, manager = self.make_block_and_full_manager(texts)
1170
1048
        self.assertFalse(manager.check_is_well_utilized())
1171
1049
        manager._full_enough_block_size = block._content_length
1179
1057
        locations, block = self.make_block(self._texts)
1180
1058
        manager = groupcompress._LazyGroupContentManager(block)
1181
1059
        manager._full_enough_block_size = block._content_length
1182
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
1183
 
        self.add_key_to_manager((b'key2',), locations, block, manager)
 
1060
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1061
        self.add_key_to_manager(('key2',), locations, block, manager)
1184
1062
        # Just using the content from key1 and 2 is not enough to be considered
1185
1063
        # 'complete'
1186
1064
        self.assertFalse(manager.check_is_well_utilized())
1187
1065
        # However if we add key3, then we have enough, as we only require 75%
1188
1066
        # consumption
1189
 
        self.add_key_to_manager((b'key4',), locations, block, manager)
 
1067
        self.add_key_to_manager(('key4',), locations, block, manager)
1190
1068
        self.assertTrue(manager.check_is_well_utilized())
1191
 
 
1192
 
 
1193
 
class Test_GCBuildDetails(tests.TestCase):
1194
 
 
1195
 
    def test_acts_like_tuple(self):
1196
 
        # _GCBuildDetails inlines some of the data that used to be spread out
1197
 
        # across a bunch of tuples
1198
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1199
 
                                           ('INDEX', 10, 20, 0, 5))
1200
 
        self.assertEqual(4, len(bd))
1201
 
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1202
 
        self.assertEqual(None, bd[1])  # Compression Parent is always None
1203
 
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1204
 
        self.assertEqual(('group', None), bd[3])  # Record details
1205
 
 
1206
 
    def test__repr__(self):
1207
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1208
 
                                           ('INDEX', 10, 20, 0, 5))
1209
 
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1210
 
                         " (('parent1',), ('parent2',)))",
1211
 
                         repr(bd))