/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
1
# Copyright (C) 2008, 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Tests for group compression."""
18
19
import zlib
20
21
from bzrlib import (
22
    groupcompress,
23
    errors,
24
    osutils,
25
    tests,
26
    versionedfile,
27
    )
28
from bzrlib.osutils import sha_string
29
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
30
31
32
def load_tests(standard_tests, module, loader):
33
    """Parameterize tests for all versions of groupcompress."""
34
    to_adapt, result = tests.split_suite_by_condition(
35
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
36
    scenarios = [
37
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
38
        ]
39
    if CompiledGroupCompressFeature.available():
40
        scenarios.append(('C',
41
            {'compressor': groupcompress.PyrexGroupCompressor}))
42
    return tests.multiply_tests(to_adapt, scenarios, result)
43
44
45
class TestGroupCompressor(tests.TestCase):
46
47
    def _chunks_to_repr_lines(self, chunks):
48
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
49
50
    def assertEqualDiffEncoded(self, expected, actual):
51
        """Compare the actual content to the expected content.
52
53
        :param expected: A group of chunks that we expect to see
54
        :param actual: The measured 'chunks'
55
56
        We will transform the chunks back into lines, and then run 'repr()'
57
        over them to handle non-ascii characters.
58
        """
59
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
60
                             self._chunks_to_repr_lines(actual))
61
62
63
class TestAllGroupCompressors(TestGroupCompressor):
64
    """Tests for GroupCompressor"""
65
66
    compressor = None # Set by multiply_tests
67
68
    def test_empty_delta(self):
69
        compressor = self.compressor()
70
        self.assertEqual([], compressor.chunks)
71
72
    def test_one_nosha_delta(self):
73
        # diff against NUKK
74
        compressor = self.compressor()
75
        sha1, start_point, end_point, _ = compressor.compress(('label',),
76
            'strange\ncommon\n', None)
77
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
78
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
79
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
80
        self.assertEqual(0, start_point)
81
        self.assertEqual(sum(map(len, expected_lines)), end_point)
82
83
    def test_empty_content(self):
84
        compressor = self.compressor()
85
        # Adding empty bytes should return the 'null' record
86
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
87
                                                                 '', None)
88
        self.assertEqual(0, start_point)
89
        self.assertEqual(0, end_point)
90
        self.assertEqual('fulltext', kind)
91
        self.assertEqual(groupcompress._null_sha1, sha1)
92
        self.assertEqual(0, compressor.endpoint)
93
        self.assertEqual([], compressor.chunks)
94
        # Even after adding some content
95
        compressor.compress(('content',), 'some\nbytes\n', None)
96
        self.assertTrue(compressor.endpoint > 0)
97
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
98
                                                                 '', None)
99
        self.assertEqual(0, start_point)
100
        self.assertEqual(0, end_point)
101
        self.assertEqual('fulltext', kind)
102
        self.assertEqual(groupcompress._null_sha1, sha1)
103
104
    def test_extract_from_compressor(self):
105
        # Knit fetching will try to reconstruct texts locally which results in
106
        # reading something that is in the compressor stream already.
107
        compressor = self.compressor()
108
        sha1_1, _, _, _ = compressor.compress(('label',),
109
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
110
        expected_lines = list(compressor.chunks)
111
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
112
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
113
        # get the first out
114
        self.assertEqual(('strange\ncommon long line\n'
115
                          'that needs a 16 byte match\n', sha1_1),
116
                         compressor.extract(('label',)))
117
        # and the second
118
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
119
                          'different\n', sha1_2),
120
                         compressor.extract(('newlabel',)))
121
122
123
class TestPyrexGroupCompressor(TestGroupCompressor):
124
125
    _test_needs_features = [CompiledGroupCompressFeature]
126
    compressor = groupcompress.PyrexGroupCompressor
127
128
    def test_stats(self):
129
        compressor = self.compressor()
130
        compressor.compress(('label',),
131
                            'strange\n'
132
                            'common very very long line\n'
133
                            'plus more text\n', None)
134
        compressor.compress(('newlabel',),
135
                            'common very very long line\n'
136
                            'plus more text\n'
137
                            'different\n'
138
                            'moredifferent\n', None)
139
        compressor.compress(('label3',),
140
                            'new\n'
141
                            'common very very long line\n'
142
                            'plus more text\n'
143
                            'different\n'
144
                            'moredifferent\n', None)
145
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
146
147
    def test_two_nosha_delta(self):
148
        compressor = self.compressor()
149
        sha1_1, _, _, _ = compressor.compress(('label',),
150
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
151
        expected_lines = list(compressor.chunks)
152
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
153
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
154
        self.assertEqual(sha_string('common long line\n'
155
                                    'that needs a 16 byte match\n'
156
                                    'different\n'), sha1_2)
157
        expected_lines.extend([
158
            # 'delta', delta length
159
            'd\x0f',
160
            # source and target length
161
            '\x36',
162
            # copy the line common
163
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
164
            # add the line different, and the trailing newline
165
            '\x0adifferent\n', # insert 10 bytes
166
            ])
167
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
168
        self.assertEqual(sum(map(len, expected_lines)), end_point)
169
170
    def test_three_nosha_delta(self):
171
        # The first interesting test: make a change that should use lines from
172
        # both parents.
173
        compressor = self.compressor()
174
        sha1_1, _, _, _ = compressor.compress(('label',),
175
            'strange\ncommon very very long line\nwith some extra text\n', None)
176
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
177
            'different\nmoredifferent\nand then some more\n', None)
178
        expected_lines = list(compressor.chunks)
179
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
180
            'new\ncommon very very long line\nwith some extra text\n'
181
            'different\nmoredifferent\nand then some more\n',
182
            None)
183
        self.assertEqual(
184
            sha_string('new\ncommon very very long line\nwith some extra text\n'
185
                       'different\nmoredifferent\nand then some more\n'),
186
            sha1_3)
187
        expected_lines.extend([
188
            # 'delta', delta length
189
            'd\x0b',
190
            # source and target length
191
            '\x5f'
192
            # insert new
193
            '\x03new',
194
            # Copy of first parent 'common' range
195
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
196
            # Copy of second parent 'different' range
197
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
198
            ])
199
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
200
        self.assertEqual(sum(map(len, expected_lines)), end_point)
201
202
203
class TestPythonGroupCompressor(TestGroupCompressor):
204
205
    compressor = groupcompress.PythonGroupCompressor
206
207
    def test_stats(self):
208
        compressor = self.compressor()
209
        compressor.compress(('label',),
210
                            'strange\n'
211
                            'common very very long line\n'
212
                            'plus more text\n', None)
213
        compressor.compress(('newlabel',),
214
                            'common very very long line\n'
215
                            'plus more text\n'
216
                            'different\n'
217
                            'moredifferent\n', None)
218
        compressor.compress(('label3',),
219
                            'new\n'
220
                            'common very very long line\n'
221
                            'plus more text\n'
222
                            'different\n'
223
                            'moredifferent\n', None)
224
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
225
226
    def test_two_nosha_delta(self):
227
        compressor = self.compressor()
228
        sha1_1, _, _, _ = compressor.compress(('label',),
229
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
230
        expected_lines = list(compressor.chunks)
231
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
232
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
233
        self.assertEqual(sha_string('common long line\n'
234
                                    'that needs a 16 byte match\n'
235
                                    'different\n'), sha1_2)
236
        expected_lines.extend([
237
            # 'delta', delta length
238
            'd\x0f',
239
            # target length
240
            '\x36',
241
            # copy the line common
242
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
243
            # add the line different, and the trailing newline
244
            '\x0adifferent\n', # insert 10 bytes
245
            ])
246
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
247
        self.assertEqual(sum(map(len, expected_lines)), end_point)
248
249
    def test_three_nosha_delta(self):
250
        # The first interesting test: make a change that should use lines from
251
        # both parents.
252
        compressor = self.compressor()
253
        sha1_1, _, _, _ = compressor.compress(('label',),
254
            'strange\ncommon very very long line\nwith some extra text\n', None)
255
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
256
            'different\nmoredifferent\nand then some more\n', None)
257
        expected_lines = list(compressor.chunks)
258
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
259
            'new\ncommon very very long line\nwith some extra text\n'
260
            'different\nmoredifferent\nand then some more\n',
261
            None)
262
        self.assertEqual(
263
            sha_string('new\ncommon very very long line\nwith some extra text\n'
264
                       'different\nmoredifferent\nand then some more\n'),
265
            sha1_3)
266
        expected_lines.extend([
267
            # 'delta', delta length
268
            'd\x0c',
269
            # target length
270
            '\x5f'
271
            # insert new
272
            '\x04new\n',
273
            # Copy of first parent 'common' range
274
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
275
            # Copy of second parent 'different' range
276
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
277
            ])
278
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
279
        self.assertEqual(sum(map(len, expected_lines)), end_point)
280
281
282
class TestGroupCompressBlock(tests.TestCase):
283
284
    def make_block(self, key_to_text):
285
        """Create a GroupCompressBlock, filling it with the given texts."""
286
        compressor = groupcompress.GroupCompressor()
287
        start = 0
288
        for key in sorted(key_to_text):
289
            compressor.compress(key, key_to_text[key], None)
290
        locs = dict((key, (start, end)) for key, (start, _, end, _)
291
                    in compressor.labels_deltas.iteritems())
292
        block = compressor.flush()
293
        raw_bytes = block.to_bytes()
294
        # Go through from_bytes(to_bytes()) so that we start with a compressed
295
        # content object
296
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
297
298
    def test_from_empty_bytes(self):
299
        self.assertRaises(ValueError,
300
                          groupcompress.GroupCompressBlock.from_bytes, '')
301
302
    def test_from_minimal_bytes(self):
303
        block = groupcompress.GroupCompressBlock.from_bytes(
304
            'gcb1z\n0\n0\n')
305
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
306
        self.assertIs(None, block._content)
307
        self.assertEqual('', block._z_content)
308
        block._ensure_content()
309
        self.assertEqual('', block._content)
310
        self.assertEqual('', block._z_content)
311
        block._ensure_content() # Ensure content is safe to call 2x
312
313
    def test_from_invalid(self):
314
        self.assertRaises(ValueError,
315
                          groupcompress.GroupCompressBlock.from_bytes,
316
                          'this is not a valid header')
317
318
    def test_from_bytes(self):
319
        content = ('a tiny bit of content\n')
320
        z_content = zlib.compress(content)
321
        z_bytes = (
322
            'gcb1z\n' # group compress block v1 plain
323
            '%d\n' # Length of compressed content
324
            '%d\n' # Length of uncompressed content
325
            '%s'   # Compressed content
326
            ) % (len(z_content), len(content), z_content)
327
        block = groupcompress.GroupCompressBlock.from_bytes(
328
            z_bytes)
329
        self.assertEqual(z_content, block._z_content)
330
        self.assertIs(None, block._content)
331
        self.assertEqual(len(z_content), block._z_content_length)
332
        self.assertEqual(len(content), block._content_length)
333
        block._ensure_content()
334
        self.assertEqual(z_content, block._z_content)
335
        self.assertEqual(content, block._content)
336
337
    def test_to_bytes(self):
338
        content = ('this is some content\n'
339
                   'this content will be compressed\n')
340
        gcb = groupcompress.GroupCompressBlock()
341
        gcb.set_content(content)
342
        bytes = gcb.to_bytes()
343
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
344
        self.assertEqual(gcb._content_length, len(content))
345
        expected_header =('gcb1z\n' # group compress block v1 zlib
346
                          '%d\n' # Length of compressed content
347
                          '%d\n' # Length of uncompressed content
348
                         ) % (gcb._z_content_length, gcb._content_length)
349
        self.assertStartsWith(bytes, expected_header)
350
        remaining_bytes = bytes[len(expected_header):]
351
        raw_bytes = zlib.decompress(remaining_bytes)
352
        self.assertEqual(content, raw_bytes)
353
354
    def test_partial_decomp(self):
355
        content_chunks = []
356
        # We need a sufficient amount of data so that zlib.decompress has
357
        # partial decompression to work with. Most auto-generated data
358
        # compresses a bit too well, we want a combination, so we combine a sha
359
        # hash with compressible data.
360
        for i in xrange(2048):
361
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
362
            content_chunks.append(next_content)
363
            next_sha1 = osutils.sha_string(next_content)
364
            content_chunks.append(next_sha1 + '\n')
365
        content = ''.join(content_chunks)
366
        self.assertEqual(158634, len(content))
367
        z_content = zlib.compress(content)
368
        self.assertEqual(57182, len(z_content))
369
        block = groupcompress.GroupCompressBlock()
370
        block._z_content = z_content
371
        block._z_content_length = len(z_content)
372
        block._compressor_name = 'zlib'
373
        block._content_length = 158634
374
        self.assertIs(None, block._content)
375
        block._ensure_content(100)
376
        self.assertIsNot(None, block._content)
377
        # We have decompressed at least 100 bytes
378
        self.assertTrue(len(block._content) >= 100)
379
        # We have not decompressed the whole content
380
        self.assertTrue(len(block._content) < 158634)
381
        self.assertEqualDiff(content[:len(block._content)], block._content)
382
        # ensuring content that we already have shouldn't cause any more data
383
        # to be extracted
384
        cur_len = len(block._content)
385
        block._ensure_content(cur_len - 10)
386
        self.assertEqual(cur_len, len(block._content))
387
        # Now we want a bit more content
388
        cur_len += 10
389
        block._ensure_content(cur_len)
390
        self.assertTrue(len(block._content) >= cur_len)
391
        self.assertTrue(len(block._content) < 158634)
392
        self.assertEqualDiff(content[:len(block._content)], block._content)
393
        # And now lets finish
394
        block._ensure_content(158634)
395
        self.assertEqualDiff(content, block._content)
396
        # And the decompressor is finalized
397
        self.assertIs(None, block._z_content_decompressor)
398
399
    def test_partial_decomp_no_known_length(self):
400
        content_chunks = []
401
        for i in xrange(2048):
402
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
403
            content_chunks.append(next_content)
404
            next_sha1 = osutils.sha_string(next_content)
405
            content_chunks.append(next_sha1 + '\n')
406
        content = ''.join(content_chunks)
407
        self.assertEqual(158634, len(content))
408
        z_content = zlib.compress(content)
409
        self.assertEqual(57182, len(z_content))
410
        block = groupcompress.GroupCompressBlock()
411
        block._z_content = z_content
412
        block._z_content_length = len(z_content)
413
        block._compressor_name = 'zlib'
414
        block._content_length = None # Don't tell the decompressed length
415
        self.assertIs(None, block._content)
416
        block._ensure_content(100)
417
        self.assertIsNot(None, block._content)
418
        # We have decompressed at least 100 bytes
419
        self.assertTrue(len(block._content) >= 100)
420
        # We have not decompressed the whole content
421
        self.assertTrue(len(block._content) < 158634)
422
        self.assertEqualDiff(content[:len(block._content)], block._content)
423
        # ensuring content that we already have shouldn't cause any more data
424
        # to be extracted
425
        cur_len = len(block._content)
426
        block._ensure_content(cur_len - 10)
427
        self.assertEqual(cur_len, len(block._content))
428
        # Now we want a bit more content
429
        cur_len += 10
430
        block._ensure_content(cur_len)
431
        self.assertTrue(len(block._content) >= cur_len)
432
        self.assertTrue(len(block._content) < 158634)
433
        self.assertEqualDiff(content[:len(block._content)], block._content)
434
        # And now lets finish
435
        block._ensure_content()
436
        self.assertEqualDiff(content, block._content)
437
        # And the decompressor is finalized
438
        self.assertIs(None, block._z_content_decompressor)
439
440
441
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
442
443
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
444
                     dir='.'):
445
        t = self.get_transport(dir)
446
        t.ensure_base()
447
        vf = groupcompress.make_pack_factory(graph=create_graph,
448
            delta=False, keylength=keylength)(t)
449
        if do_cleanup:
450
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
451
        return vf
452
453
454
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
455
456
    def test_get_record_stream_as_requested(self):
457
        # Consider promoting 'as-requested' to general availability, and
458
        # make this a VF interface test
459
        vf = self.make_test_vf(False, dir='source')
460
        vf.add_lines(('a',), (), ['lines\n'])
461
        vf.add_lines(('b',), (), ['lines\n'])
462
        vf.add_lines(('c',), (), ['lines\n'])
463
        vf.add_lines(('d',), (), ['lines\n'])
464
        vf.writer.end()
465
        keys = [record.key for record in vf.get_record_stream(
466
                    [('a',), ('b',), ('c',), ('d',)],
467
                    'as-requested', False)]
468
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
469
        keys = [record.key for record in vf.get_record_stream(
470
                    [('b',), ('a',), ('d',), ('c',)],
471
                    'as-requested', False)]
472
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
473
474
        # It should work even after being repacked into another VF
475
        vf2 = self.make_test_vf(False, dir='target')
476
        vf2.insert_record_stream(vf.get_record_stream(
477
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
478
        vf2.writer.end()
479
480
        keys = [record.key for record in vf2.get_record_stream(
481
                    [('a',), ('b',), ('c',), ('d',)],
482
                    'as-requested', False)]
483
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
484
        keys = [record.key for record in vf2.get_record_stream(
485
                    [('b',), ('a',), ('d',), ('c',)],
486
                    'as-requested', False)]
487
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
488
489
    def test_insert_record_stream_re_uses_blocks(self):
490
        vf = self.make_test_vf(True, dir='source')
491
        def grouped_stream(revision_ids, first_parents=()):
492
            parents = first_parents
493
            for revision_id in revision_ids:
494
                key = (revision_id,)
495
                record = versionedfile.FulltextContentFactory(
496
                    key, parents, None,
497
                    'some content that is\n'
498
                    'identical except for\n'
499
                    'revision_id:%s\n' % (revision_id,))
500
                yield record
501
                parents = (key,)
502
        # One group, a-d
503
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
504
        # Second group, e-h
505
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
506
                                               first_parents=(('d',),)))
507
        block_bytes = {}
508
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
509
                                      'unordered', False)
510
        num_records = 0
511
        for record in stream:
512
            if record.key in [('a',), ('e',)]:
513
                self.assertEqual('groupcompress-block', record.storage_kind)
514
            else:
515
                self.assertEqual('groupcompress-block-ref',
516
                                 record.storage_kind)
517
            block_bytes[record.key] = record._manager._block._z_content
518
            num_records += 1
519
        self.assertEqual(8, num_records)
520
        for r in 'abcd':
521
            key = (r,)
522
            self.assertIs(block_bytes[key], block_bytes[('a',)])
523
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
524
        for r in 'efgh':
525
            key = (r,)
526
            self.assertIs(block_bytes[key], block_bytes[('e',)])
527
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
528
        # Now copy the blocks into another vf, and ensure that the blocks are
529
        # preserved without creating new entries
530
        vf2 = self.make_test_vf(True, dir='target')
531
        # ordering in 'groupcompress' order, should actually swap the groups in
532
        # the target vf, but the groups themselves should not be disturbed.
533
        vf2.insert_record_stream(vf.get_record_stream(
534
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
535
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
536
                                       'groupcompress', False)
537
        vf2.writer.end()
538
        num_records = 0
539
        for record in stream:
540
            num_records += 1
541
            self.assertEqual(block_bytes[record.key],
542
                             record._manager._block._z_content)
543
        self.assertEqual(8, num_records)
544
545
    def test__insert_record_stream_no_reuse_block(self):
546
        vf = self.make_test_vf(True, dir='source')
547
        def grouped_stream(revision_ids, first_parents=()):
548
            parents = first_parents
549
            for revision_id in revision_ids:
550
                key = (revision_id,)
551
                record = versionedfile.FulltextContentFactory(
552
                    key, parents, None,
553
                    'some content that is\n'
554
                    'identical except for\n'
555
                    'revision_id:%s\n' % (revision_id,))
556
                yield record
557
                parents = (key,)
558
        # One group, a-d
559
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
560
        # Second group, e-h
561
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
562
                                               first_parents=(('d',),)))
563
        vf.writer.end()
564
        self.assertEqual(8, len(list(vf.get_record_stream(
565
                                        [(r,) for r in 'abcdefgh'],
566
                                        'unordered', False))))
567
        # Now copy the blocks into another vf, and ensure that the blocks are
568
        # preserved without creating new entries
569
        vf2 = self.make_test_vf(True, dir='target')
570
        # ordering in 'groupcompress' order, should actually swap the groups in
571
        # the target vf, but the groups themselves should not be disturbed.
572
        list(vf2._insert_record_stream(vf.get_record_stream(
573
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
574
            reuse_blocks=False))
575
        vf2.writer.end()
576
        # After inserting with reuse_blocks=False, we should have everything in
577
        # a single new block.
578
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
579
                                       'groupcompress', False)
580
        block = None
581
        for record in stream:
582
            if block is None:
583
                block = record._manager._block
584
            else:
585
                self.assertIs(block, record._manager._block)
586
587
588
class TestLazyGroupCompress(tests.TestCaseWithTransport):
589
590
    _texts = {
591
        ('key1',): "this is a text\n"
592
                   "with a reasonable amount of compressible bytes\n",
593
        ('key2',): "another text\n"
594
                   "with a reasonable amount of compressible bytes\n",
595
        ('key3',): "yet another text which won't be extracted\n"
596
                   "with a reasonable amount of compressible bytes\n",
597
        ('key4',): "this will be extracted\n"
598
                   "but references most of its bytes from\n"
599
                   "yet another text which won't be extracted\n"
600
                   "with a reasonable amount of compressible bytes\n",
601
    }
602
    def make_block(self, key_to_text):
603
        """Create a GroupCompressBlock, filling it with the given texts."""
604
        compressor = groupcompress.GroupCompressor()
605
        start = 0
606
        for key in sorted(key_to_text):
607
            compressor.compress(key, key_to_text[key], None)
608
        locs = dict((key, (start, end)) for key, (start, _, end, _)
609
                    in compressor.labels_deltas.iteritems())
610
        block = compressor.flush()
611
        raw_bytes = block.to_bytes()
612
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
613
614
    def add_key_to_manager(self, key, locations, block, manager):
615
        start, end = locations[key]
616
        manager.add_factory(key, (), start, end)
617
618
    def test_get_fulltexts(self):
619
        locations, block = self.make_block(self._texts)
620
        manager = groupcompress._LazyGroupContentManager(block)
621
        self.add_key_to_manager(('key1',), locations, block, manager)
622
        self.add_key_to_manager(('key2',), locations, block, manager)
623
        result_order = []
624
        for record in manager.get_record_stream():
625
            result_order.append(record.key)
626
            text = self._texts[record.key]
627
            self.assertEqual(text, record.get_bytes_as('fulltext'))
628
        self.assertEqual([('key1',), ('key2',)], result_order)
629
630
        # If we build the manager in the opposite order, we should get them
631
        # back in the opposite order
632
        manager = groupcompress._LazyGroupContentManager(block)
633
        self.add_key_to_manager(('key2',), locations, block, manager)
634
        self.add_key_to_manager(('key1',), locations, block, manager)
635
        result_order = []
636
        for record in manager.get_record_stream():
637
            result_order.append(record.key)
638
            text = self._texts[record.key]
639
            self.assertEqual(text, record.get_bytes_as('fulltext'))
640
        self.assertEqual([('key2',), ('key1',)], result_order)
641
642
    def test__wire_bytes_no_keys(self):
643
        locations, block = self.make_block(self._texts)
644
        manager = groupcompress._LazyGroupContentManager(block)
645
        wire_bytes = manager._wire_bytes()
646
        block_length = len(block.to_bytes())
647
        # We should have triggered a strip, since we aren't using any content
648
        stripped_block = manager._block.to_bytes()
649
        self.assertTrue(block_length > len(stripped_block))
650
        empty_z_header = zlib.compress('')
651
        self.assertEqual('groupcompress-block\n'
652
                         '8\n' # len(compress(''))
653
                         '0\n' # len('')
654
                         '%d\n'# compressed block len
655
                         '%s'  # zheader
656
                         '%s'  # block
657
                         % (len(stripped_block), empty_z_header,
658
                            stripped_block),
659
                         wire_bytes)
660
661
    def test__wire_bytes(self):
662
        locations, block = self.make_block(self._texts)
663
        manager = groupcompress._LazyGroupContentManager(block)
664
        self.add_key_to_manager(('key1',), locations, block, manager)
665
        self.add_key_to_manager(('key4',), locations, block, manager)
666
        block_bytes = block.to_bytes()
667
        wire_bytes = manager._wire_bytes()
668
        (storage_kind, z_header_len, header_len,
669
         block_len, rest) = wire_bytes.split('\n', 4)
670
        z_header_len = int(z_header_len)
671
        header_len = int(header_len)
672
        block_len = int(block_len)
673
        self.assertEqual('groupcompress-block', storage_kind)
674
        self.assertEqual(33, z_header_len)
675
        self.assertEqual(25, header_len)
676
        self.assertEqual(len(block_bytes), block_len)
677
        z_header = rest[:z_header_len]
678
        header = zlib.decompress(z_header)
679
        self.assertEqual(header_len, len(header))
680
        entry1 = locations[('key1',)]
681
        entry4 = locations[('key4',)]
682
        self.assertEqualDiff('key1\n'
683
                             '\n'  # no parents
684
                             '%d\n' # start offset
685
                             '%d\n' # end offset
686
                             'key4\n'
687
                             '\n'
688
                             '%d\n'
689
                             '%d\n'
690
                             % (entry1[0], entry1[1],
691
                                entry4[0], entry4[1]),
692
                            header)
693
        z_block = rest[z_header_len:]
694
        self.assertEqual(block_bytes, z_block)
695
696
    def test_from_bytes(self):
697
        locations, block = self.make_block(self._texts)
698
        manager = groupcompress._LazyGroupContentManager(block)
699
        self.add_key_to_manager(('key1',), locations, block, manager)
700
        self.add_key_to_manager(('key4',), locations, block, manager)
701
        wire_bytes = manager._wire_bytes()
702
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
703
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
704
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
705
        self.assertEqual(2, len(manager._factories))
706
        self.assertEqual(block._z_content, manager._block._z_content)
707
        result_order = []
708
        for record in manager.get_record_stream():
709
            result_order.append(record.key)
710
            text = self._texts[record.key]
711
            self.assertEqual(text, record.get_bytes_as('fulltext'))
712
        self.assertEqual([('key1',), ('key4',)], result_order)
713
714
    def test__check_rebuild_no_changes(self):
715
        locations, block = self.make_block(self._texts)
716
        manager = groupcompress._LazyGroupContentManager(block)
717
        # Request all the keys, which ensures that we won't rebuild
718
        self.add_key_to_manager(('key1',), locations, block, manager)
719
        self.add_key_to_manager(('key2',), locations, block, manager)
720
        self.add_key_to_manager(('key3',), locations, block, manager)
721
        self.add_key_to_manager(('key4',), locations, block, manager)
722
        manager._check_rebuild_block()
723
        self.assertIs(block, manager._block)
724
725
    def test__check_rebuild_only_one(self):
726
        locations, block = self.make_block(self._texts)
727
        manager = groupcompress._LazyGroupContentManager(block)
728
        # Request just the first key, which should trigger a 'strip' action
729
        self.add_key_to_manager(('key1',), locations, block, manager)
730
        manager._check_rebuild_block()
731
        self.assertIsNot(block, manager._block)
732
        self.assertTrue(block._content_length > manager._block._content_length)
733
        # We should be able to still get the content out of this block, though
734
        # it should only have 1 entry
735
        for record in manager.get_record_stream():
736
            self.assertEqual(('key1',), record.key)
737
            self.assertEqual(self._texts[record.key],
738
                             record.get_bytes_as('fulltext'))
739
740
    def test__check_rebuild_middle(self):
741
        locations, block = self.make_block(self._texts)
742
        manager = groupcompress._LazyGroupContentManager(block)
743
        # Request a small key in the middle should trigger a 'rebuild'
744
        self.add_key_to_manager(('key4',), locations, block, manager)
745
        manager._check_rebuild_block()
746
        self.assertIsNot(block, manager._block)
747
        self.assertTrue(block._content_length > manager._block._content_length)
748
        for record in manager.get_record_stream():
749
            self.assertEqual(('key4',), record.key)
750
            self.assertEqual(self._texts[record.key],
751
                             record.get_bytes_as('fulltext'))