/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Aaron Bentley
  • Date: 2009-06-26 17:24:01 UTC
  • mto: This revision was merged to the branch mainline in revision 4490.
  • Revision ID: aaron@aaronbentley.com-20090626172401-ykvzf3ifxoi5nvvj
Make inconsistency in skipped records fatal for revisions.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from bzrlib import (
 
22
    btree_index,
 
23
    groupcompress,
 
24
    errors,
 
25
    index as _mod_index,
 
26
    osutils,
 
27
    tests,
 
28
    trace,
 
29
    versionedfile,
 
30
    )
 
31
from bzrlib.osutils import sha_string
 
32
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
33
 
 
34
 
 
35
def load_tests(standard_tests, module, loader):
 
36
    """Parameterize tests for all versions of groupcompress."""
 
37
    to_adapt, result = tests.split_suite_by_condition(
 
38
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
39
    scenarios = [
 
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
41
        ]
 
42
    if CompiledGroupCompressFeature.available():
 
43
        scenarios.append(('C',
 
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
45
    return tests.multiply_tests(to_adapt, scenarios, result)
 
46
 
 
47
 
 
48
class TestGroupCompressor(tests.TestCase):
 
49
 
 
50
    def _chunks_to_repr_lines(self, chunks):
 
51
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
52
 
 
53
    def assertEqualDiffEncoded(self, expected, actual):
 
54
        """Compare the actual content to the expected content.
 
55
 
 
56
        :param expected: A group of chunks that we expect to see
 
57
        :param actual: The measured 'chunks'
 
58
 
 
59
        We will transform the chunks back into lines, and then run 'repr()'
 
60
        over them to handle non-ascii characters.
 
61
        """
 
62
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
63
                             self._chunks_to_repr_lines(actual))
 
64
 
 
65
 
 
66
class TestAllGroupCompressors(TestGroupCompressor):
 
67
    """Tests for GroupCompressor"""
 
68
 
 
69
    compressor = None # Set by multiply_tests
 
70
 
 
71
    def test_empty_delta(self):
 
72
        compressor = self.compressor()
 
73
        self.assertEqual([], compressor.chunks)
 
74
 
 
75
    def test_one_nosha_delta(self):
 
76
        # diff against NUKK
 
77
        compressor = self.compressor()
 
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
79
            'strange\ncommon\n', None)
 
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
82
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
83
        self.assertEqual(0, start_point)
 
84
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
85
 
 
86
    def test_empty_content(self):
 
87
        compressor = self.compressor()
 
88
        # Adding empty bytes should return the 'null' record
 
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
90
                                                                 '', None)
 
91
        self.assertEqual(0, start_point)
 
92
        self.assertEqual(0, end_point)
 
93
        self.assertEqual('fulltext', kind)
 
94
        self.assertEqual(groupcompress._null_sha1, sha1)
 
95
        self.assertEqual(0, compressor.endpoint)
 
96
        self.assertEqual([], compressor.chunks)
 
97
        # Even after adding some content
 
98
        compressor.compress(('content',), 'some\nbytes\n', None)
 
99
        self.assertTrue(compressor.endpoint > 0)
 
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
101
                                                                 '', None)
 
102
        self.assertEqual(0, start_point)
 
103
        self.assertEqual(0, end_point)
 
104
        self.assertEqual('fulltext', kind)
 
105
        self.assertEqual(groupcompress._null_sha1, sha1)
 
106
 
 
107
    def test_extract_from_compressor(self):
 
108
        # Knit fetching will try to reconstruct texts locally which results in
 
109
        # reading something that is in the compressor stream already.
 
110
        compressor = self.compressor()
 
111
        sha1_1, _, _, _ = compressor.compress(('label',),
 
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
113
        expected_lines = list(compressor.chunks)
 
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
116
        # get the first out
 
117
        self.assertEqual(('strange\ncommon long line\n'
 
118
                          'that needs a 16 byte match\n', sha1_1),
 
119
                         compressor.extract(('label',)))
 
120
        # and the second
 
121
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
122
                          'different\n', sha1_2),
 
123
                         compressor.extract(('newlabel',)))
 
124
 
 
125
    def test_pop_last(self):
 
126
        compressor = self.compressor()
 
127
        _, _, _, _ = compressor.compress(('key1',),
 
128
            'some text\nfor the first entry\n', None)
 
129
        expected_lines = list(compressor.chunks)
 
130
        _, _, _, _ = compressor.compress(('key2',),
 
131
            'some text\nfor the second entry\n', None)
 
132
        compressor.pop_last()
 
133
        self.assertEqual(expected_lines, compressor.chunks)
 
134
 
 
135
 
 
136
class TestPyrexGroupCompressor(TestGroupCompressor):
 
137
 
 
138
    _test_needs_features = [CompiledGroupCompressFeature]
 
139
    compressor = groupcompress.PyrexGroupCompressor
 
140
 
 
141
    def test_stats(self):
 
142
        compressor = self.compressor()
 
143
        compressor.compress(('label',),
 
144
                            'strange\n'
 
145
                            'common very very long line\n'
 
146
                            'plus more text\n', None)
 
147
        compressor.compress(('newlabel',),
 
148
                            'common very very long line\n'
 
149
                            'plus more text\n'
 
150
                            'different\n'
 
151
                            'moredifferent\n', None)
 
152
        compressor.compress(('label3',),
 
153
                            'new\n'
 
154
                            'common very very long line\n'
 
155
                            'plus more text\n'
 
156
                            'different\n'
 
157
                            'moredifferent\n', None)
 
158
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
159
 
 
160
    def test_two_nosha_delta(self):
 
161
        compressor = self.compressor()
 
162
        sha1_1, _, _, _ = compressor.compress(('label',),
 
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
164
        expected_lines = list(compressor.chunks)
 
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
167
        self.assertEqual(sha_string('common long line\n'
 
168
                                    'that needs a 16 byte match\n'
 
169
                                    'different\n'), sha1_2)
 
170
        expected_lines.extend([
 
171
            # 'delta', delta length
 
172
            'd\x0f',
 
173
            # source and target length
 
174
            '\x36',
 
175
            # copy the line common
 
176
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
177
            # add the line different, and the trailing newline
 
178
            '\x0adifferent\n', # insert 10 bytes
 
179
            ])
 
180
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
181
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
182
 
 
183
    def test_three_nosha_delta(self):
 
184
        # The first interesting test: make a change that should use lines from
 
185
        # both parents.
 
186
        compressor = self.compressor()
 
187
        sha1_1, _, _, _ = compressor.compress(('label',),
 
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
190
            'different\nmoredifferent\nand then some more\n', None)
 
191
        expected_lines = list(compressor.chunks)
 
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
193
            'new\ncommon very very long line\nwith some extra text\n'
 
194
            'different\nmoredifferent\nand then some more\n',
 
195
            None)
 
196
        self.assertEqual(
 
197
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
198
                       'different\nmoredifferent\nand then some more\n'),
 
199
            sha1_3)
 
200
        expected_lines.extend([
 
201
            # 'delta', delta length
 
202
            'd\x0b',
 
203
            # source and target length
 
204
            '\x5f'
 
205
            # insert new
 
206
            '\x03new',
 
207
            # Copy of first parent 'common' range
 
208
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
209
            # Copy of second parent 'different' range
 
210
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
211
            ])
 
212
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
213
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
214
 
 
215
 
 
216
class TestPythonGroupCompressor(TestGroupCompressor):
 
217
 
 
218
    compressor = groupcompress.PythonGroupCompressor
 
219
 
 
220
    def test_stats(self):
 
221
        compressor = self.compressor()
 
222
        compressor.compress(('label',),
 
223
                            'strange\n'
 
224
                            'common very very long line\n'
 
225
                            'plus more text\n', None)
 
226
        compressor.compress(('newlabel',),
 
227
                            'common very very long line\n'
 
228
                            'plus more text\n'
 
229
                            'different\n'
 
230
                            'moredifferent\n', None)
 
231
        compressor.compress(('label3',),
 
232
                            'new\n'
 
233
                            'common very very long line\n'
 
234
                            'plus more text\n'
 
235
                            'different\n'
 
236
                            'moredifferent\n', None)
 
237
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
238
 
 
239
    def test_two_nosha_delta(self):
 
240
        compressor = self.compressor()
 
241
        sha1_1, _, _, _ = compressor.compress(('label',),
 
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
243
        expected_lines = list(compressor.chunks)
 
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
246
        self.assertEqual(sha_string('common long line\n'
 
247
                                    'that needs a 16 byte match\n'
 
248
                                    'different\n'), sha1_2)
 
249
        expected_lines.extend([
 
250
            # 'delta', delta length
 
251
            'd\x0f',
 
252
            # target length
 
253
            '\x36',
 
254
            # copy the line common
 
255
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
256
            # add the line different, and the trailing newline
 
257
            '\x0adifferent\n', # insert 10 bytes
 
258
            ])
 
259
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
260
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
261
 
 
262
    def test_three_nosha_delta(self):
 
263
        # The first interesting test: make a change that should use lines from
 
264
        # both parents.
 
265
        compressor = self.compressor()
 
266
        sha1_1, _, _, _ = compressor.compress(('label',),
 
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
269
            'different\nmoredifferent\nand then some more\n', None)
 
270
        expected_lines = list(compressor.chunks)
 
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
272
            'new\ncommon very very long line\nwith some extra text\n'
 
273
            'different\nmoredifferent\nand then some more\n',
 
274
            None)
 
275
        self.assertEqual(
 
276
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
277
                       'different\nmoredifferent\nand then some more\n'),
 
278
            sha1_3)
 
279
        expected_lines.extend([
 
280
            # 'delta', delta length
 
281
            'd\x0c',
 
282
            # target length
 
283
            '\x5f'
 
284
            # insert new
 
285
            '\x04new\n',
 
286
            # Copy of first parent 'common' range
 
287
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
288
            # Copy of second parent 'different' range
 
289
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
290
            ])
 
291
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
292
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
293
 
 
294
 
 
295
class TestGroupCompressBlock(tests.TestCase):
 
296
 
 
297
    def make_block(self, key_to_text):
 
298
        """Create a GroupCompressBlock, filling it with the given texts."""
 
299
        compressor = groupcompress.GroupCompressor()
 
300
        start = 0
 
301
        for key in sorted(key_to_text):
 
302
            compressor.compress(key, key_to_text[key], None)
 
303
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
304
                    in compressor.labels_deltas.iteritems())
 
305
        block = compressor.flush()
 
306
        raw_bytes = block.to_bytes()
 
307
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
308
        # content object
 
309
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
310
 
 
311
    def test_from_empty_bytes(self):
 
312
        self.assertRaises(ValueError,
 
313
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
314
 
 
315
    def test_from_minimal_bytes(self):
 
316
        block = groupcompress.GroupCompressBlock.from_bytes(
 
317
            'gcb1z\n0\n0\n')
 
318
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
319
        self.assertIs(None, block._content)
 
320
        self.assertEqual('', block._z_content)
 
321
        block._ensure_content()
 
322
        self.assertEqual('', block._content)
 
323
        self.assertEqual('', block._z_content)
 
324
        block._ensure_content() # Ensure content is safe to call 2x
 
325
 
 
326
    def test_from_invalid(self):
 
327
        self.assertRaises(ValueError,
 
328
                          groupcompress.GroupCompressBlock.from_bytes,
 
329
                          'this is not a valid header')
 
330
 
 
331
    def test_from_bytes(self):
 
332
        content = ('a tiny bit of content\n')
 
333
        z_content = zlib.compress(content)
 
334
        z_bytes = (
 
335
            'gcb1z\n' # group compress block v1 plain
 
336
            '%d\n' # Length of compressed content
 
337
            '%d\n' # Length of uncompressed content
 
338
            '%s'   # Compressed content
 
339
            ) % (len(z_content), len(content), z_content)
 
340
        block = groupcompress.GroupCompressBlock.from_bytes(
 
341
            z_bytes)
 
342
        self.assertEqual(z_content, block._z_content)
 
343
        self.assertIs(None, block._content)
 
344
        self.assertEqual(len(z_content), block._z_content_length)
 
345
        self.assertEqual(len(content), block._content_length)
 
346
        block._ensure_content()
 
347
        self.assertEqual(z_content, block._z_content)
 
348
        self.assertEqual(content, block._content)
 
349
 
 
350
    def test_to_bytes(self):
 
351
        content = ('this is some content\n'
 
352
                   'this content will be compressed\n')
 
353
        gcb = groupcompress.GroupCompressBlock()
 
354
        gcb.set_content(content)
 
355
        bytes = gcb.to_bytes()
 
356
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
357
        self.assertEqual(gcb._content_length, len(content))
 
358
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
359
                          '%d\n' # Length of compressed content
 
360
                          '%d\n' # Length of uncompressed content
 
361
                         ) % (gcb._z_content_length, gcb._content_length)
 
362
        self.assertStartsWith(bytes, expected_header)
 
363
        remaining_bytes = bytes[len(expected_header):]
 
364
        raw_bytes = zlib.decompress(remaining_bytes)
 
365
        self.assertEqual(content, raw_bytes)
 
366
 
 
367
    def test_partial_decomp(self):
 
368
        content_chunks = []
 
369
        # We need a sufficient amount of data so that zlib.decompress has
 
370
        # partial decompression to work with. Most auto-generated data
 
371
        # compresses a bit too well, we want a combination, so we combine a sha
 
372
        # hash with compressible data.
 
373
        for i in xrange(2048):
 
374
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
375
            content_chunks.append(next_content)
 
376
            next_sha1 = osutils.sha_string(next_content)
 
377
            content_chunks.append(next_sha1 + '\n')
 
378
        content = ''.join(content_chunks)
 
379
        self.assertEqual(158634, len(content))
 
380
        z_content = zlib.compress(content)
 
381
        self.assertEqual(57182, len(z_content))
 
382
        block = groupcompress.GroupCompressBlock()
 
383
        block._z_content = z_content
 
384
        block._z_content_length = len(z_content)
 
385
        block._compressor_name = 'zlib'
 
386
        block._content_length = 158634
 
387
        self.assertIs(None, block._content)
 
388
        block._ensure_content(100)
 
389
        self.assertIsNot(None, block._content)
 
390
        # We have decompressed at least 100 bytes
 
391
        self.assertTrue(len(block._content) >= 100)
 
392
        # We have not decompressed the whole content
 
393
        self.assertTrue(len(block._content) < 158634)
 
394
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
395
        # ensuring content that we already have shouldn't cause any more data
 
396
        # to be extracted
 
397
        cur_len = len(block._content)
 
398
        block._ensure_content(cur_len - 10)
 
399
        self.assertEqual(cur_len, len(block._content))
 
400
        # Now we want a bit more content
 
401
        cur_len += 10
 
402
        block._ensure_content(cur_len)
 
403
        self.assertTrue(len(block._content) >= cur_len)
 
404
        self.assertTrue(len(block._content) < 158634)
 
405
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
406
        # And now lets finish
 
407
        block._ensure_content(158634)
 
408
        self.assertEqualDiff(content, block._content)
 
409
        # And the decompressor is finalized
 
410
        self.assertIs(None, block._z_content_decompressor)
 
411
 
 
412
    def test_partial_decomp_no_known_length(self):
 
413
        content_chunks = []
 
414
        for i in xrange(2048):
 
415
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
416
            content_chunks.append(next_content)
 
417
            next_sha1 = osutils.sha_string(next_content)
 
418
            content_chunks.append(next_sha1 + '\n')
 
419
        content = ''.join(content_chunks)
 
420
        self.assertEqual(158634, len(content))
 
421
        z_content = zlib.compress(content)
 
422
        self.assertEqual(57182, len(z_content))
 
423
        block = groupcompress.GroupCompressBlock()
 
424
        block._z_content = z_content
 
425
        block._z_content_length = len(z_content)
 
426
        block._compressor_name = 'zlib'
 
427
        block._content_length = None # Don't tell the decompressed length
 
428
        self.assertIs(None, block._content)
 
429
        block._ensure_content(100)
 
430
        self.assertIsNot(None, block._content)
 
431
        # We have decompressed at least 100 bytes
 
432
        self.assertTrue(len(block._content) >= 100)
 
433
        # We have not decompressed the whole content
 
434
        self.assertTrue(len(block._content) < 158634)
 
435
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
436
        # ensuring content that we already have shouldn't cause any more data
 
437
        # to be extracted
 
438
        cur_len = len(block._content)
 
439
        block._ensure_content(cur_len - 10)
 
440
        self.assertEqual(cur_len, len(block._content))
 
441
        # Now we want a bit more content
 
442
        cur_len += 10
 
443
        block._ensure_content(cur_len)
 
444
        self.assertTrue(len(block._content) >= cur_len)
 
445
        self.assertTrue(len(block._content) < 158634)
 
446
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
447
        # And now lets finish
 
448
        block._ensure_content()
 
449
        self.assertEqualDiff(content, block._content)
 
450
        # And the decompressor is finalized
 
451
        self.assertIs(None, block._z_content_decompressor)
 
452
 
 
453
    def test__dump(self):
 
454
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
455
        key_to_text = {('1',): dup_content + '1 unique\n',
 
456
                       ('2',): dup_content + '2 extra special\n'}
 
457
        locs, block = self.make_block(key_to_text)
 
458
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
459
                          ('d', 21, len(key_to_text[('2',)]),
 
460
                           [('c', 2, len(dup_content)),
 
461
                            ('i', len('2 extra special\n'), '')
 
462
                           ]),
 
463
                         ], block._dump())
 
464
 
 
465
 
 
466
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
467
 
 
468
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
469
                     dir='.', inconsistency_fatal=True):
 
470
        t = self.get_transport(dir)
 
471
        t.ensure_base()
 
472
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
473
            delta=False, keylength=keylength,
 
474
            inconsistency_fatal=inconsistency_fatal)(t)
 
475
        if do_cleanup:
 
476
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
477
        return vf
 
478
 
 
479
 
 
480
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
481
 
 
482
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
483
        builder = btree_index.BTreeBuilder(ref_lists)
 
484
        for node, references, value in nodes:
 
485
            builder.add_node(node, references, value)
 
486
        stream = builder.finish()
 
487
        trans = self.get_transport()
 
488
        size = trans.put_file(name, stream)
 
489
        return btree_index.BTreeGraphIndex(trans, name, size)
 
490
 
 
491
    def make_g_index_missing_parent(self):
 
492
        graph_index = self.make_g_index('missing_parent', 1,
 
493
            [(('parent', ), '2 78 2 10', ([],)),
 
494
             (('tip', ), '2 78 2 10',
 
495
              ([('parent', ), ('missing-parent', )],)),
 
496
              ])
 
497
        return graph_index
 
498
 
 
499
    def test_get_record_stream_as_requested(self):
 
500
        # Consider promoting 'as-requested' to general availability, and
 
501
        # make this a VF interface test
 
502
        vf = self.make_test_vf(False, dir='source')
 
503
        vf.add_lines(('a',), (), ['lines\n'])
 
504
        vf.add_lines(('b',), (), ['lines\n'])
 
505
        vf.add_lines(('c',), (), ['lines\n'])
 
506
        vf.add_lines(('d',), (), ['lines\n'])
 
507
        vf.writer.end()
 
508
        keys = [record.key for record in vf.get_record_stream(
 
509
                    [('a',), ('b',), ('c',), ('d',)],
 
510
                    'as-requested', False)]
 
511
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
512
        keys = [record.key for record in vf.get_record_stream(
 
513
                    [('b',), ('a',), ('d',), ('c',)],
 
514
                    'as-requested', False)]
 
515
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
516
 
 
517
        # It should work even after being repacked into another VF
 
518
        vf2 = self.make_test_vf(False, dir='target')
 
519
        vf2.insert_record_stream(vf.get_record_stream(
 
520
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
521
        vf2.writer.end()
 
522
 
 
523
        keys = [record.key for record in vf2.get_record_stream(
 
524
                    [('a',), ('b',), ('c',), ('d',)],
 
525
                    'as-requested', False)]
 
526
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
527
        keys = [record.key for record in vf2.get_record_stream(
 
528
                    [('b',), ('a',), ('d',), ('c',)],
 
529
                    'as-requested', False)]
 
530
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
531
 
 
532
    def test_insert_record_stream_re_uses_blocks(self):
 
533
        vf = self.make_test_vf(True, dir='source')
 
534
        def grouped_stream(revision_ids, first_parents=()):
 
535
            parents = first_parents
 
536
            for revision_id in revision_ids:
 
537
                key = (revision_id,)
 
538
                record = versionedfile.FulltextContentFactory(
 
539
                    key, parents, None,
 
540
                    'some content that is\n'
 
541
                    'identical except for\n'
 
542
                    'revision_id:%s\n' % (revision_id,))
 
543
                yield record
 
544
                parents = (key,)
 
545
        # One group, a-d
 
546
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
547
        # Second group, e-h
 
548
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
549
                                               first_parents=(('d',),)))
 
550
        block_bytes = {}
 
551
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
552
                                      'unordered', False)
 
553
        num_records = 0
 
554
        for record in stream:
 
555
            if record.key in [('a',), ('e',)]:
 
556
                self.assertEqual('groupcompress-block', record.storage_kind)
 
557
            else:
 
558
                self.assertEqual('groupcompress-block-ref',
 
559
                                 record.storage_kind)
 
560
            block_bytes[record.key] = record._manager._block._z_content
 
561
            num_records += 1
 
562
        self.assertEqual(8, num_records)
 
563
        for r in 'abcd':
 
564
            key = (r,)
 
565
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
566
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
567
        for r in 'efgh':
 
568
            key = (r,)
 
569
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
570
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
571
        # Now copy the blocks into another vf, and ensure that the blocks are
 
572
        # preserved without creating new entries
 
573
        vf2 = self.make_test_vf(True, dir='target')
 
574
        # ordering in 'groupcompress' order, should actually swap the groups in
 
575
        # the target vf, but the groups themselves should not be disturbed.
 
576
        vf2.insert_record_stream(vf.get_record_stream(
 
577
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
578
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
579
                                       'groupcompress', False)
 
580
        vf2.writer.end()
 
581
        num_records = 0
 
582
        for record in stream:
 
583
            num_records += 1
 
584
            self.assertEqual(block_bytes[record.key],
 
585
                             record._manager._block._z_content)
 
586
        self.assertEqual(8, num_records)
 
587
 
 
588
    def test__insert_record_stream_no_reuse_block(self):
 
589
        vf = self.make_test_vf(True, dir='source')
 
590
        def grouped_stream(revision_ids, first_parents=()):
 
591
            parents = first_parents
 
592
            for revision_id in revision_ids:
 
593
                key = (revision_id,)
 
594
                record = versionedfile.FulltextContentFactory(
 
595
                    key, parents, None,
 
596
                    'some content that is\n'
 
597
                    'identical except for\n'
 
598
                    'revision_id:%s\n' % (revision_id,))
 
599
                yield record
 
600
                parents = (key,)
 
601
        # One group, a-d
 
602
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
603
        # Second group, e-h
 
604
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
605
                                               first_parents=(('d',),)))
 
606
        vf.writer.end()
 
607
        self.assertEqual(8, len(list(vf.get_record_stream(
 
608
                                        [(r,) for r in 'abcdefgh'],
 
609
                                        'unordered', False))))
 
610
        # Now copy the blocks into another vf, and ensure that the blocks are
 
611
        # preserved without creating new entries
 
612
        vf2 = self.make_test_vf(True, dir='target')
 
613
        # ordering in 'groupcompress' order, should actually swap the groups in
 
614
        # the target vf, but the groups themselves should not be disturbed.
 
615
        list(vf2._insert_record_stream(vf.get_record_stream(
 
616
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
617
            reuse_blocks=False))
 
618
        vf2.writer.end()
 
619
        # After inserting with reuse_blocks=False, we should have everything in
 
620
        # a single new block.
 
621
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
622
                                       'groupcompress', False)
 
623
        block = None
 
624
        for record in stream:
 
625
            if block is None:
 
626
                block = record._manager._block
 
627
            else:
 
628
                self.assertIs(block, record._manager._block)
 
629
 
 
630
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
631
        unvalidated = self.make_g_index_missing_parent()
 
632
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
633
        index = groupcompress._GCGraphIndex(combined,
 
634
            is_locked=lambda: True, parents=True,
 
635
            track_external_parent_refs=True)
 
636
        index.scan_unvalidated_index(unvalidated)
 
637
        self.assertEqual(
 
638
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
639
 
 
640
    def test_track_external_parent_refs(self):
 
641
        g_index = self.make_g_index('empty', 1, [])
 
642
        mod_index = btree_index.BTreeBuilder(1, 1)
 
643
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
644
        index = groupcompress._GCGraphIndex(combined,
 
645
            is_locked=lambda: True, parents=True,
 
646
            add_callback=mod_index.add_nodes,
 
647
            track_external_parent_refs=True)
 
648
        index.add_records([
 
649
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
650
        self.assertEqual(
 
651
            frozenset([('parent-1',), ('parent-2',)]),
 
652
            index.get_missing_parents())
 
653
 
 
654
    def make_source_with_b(self, a_parent, path):
 
655
        source = self.make_test_vf(True, dir=path)
 
656
        source.add_lines(('a',), (), ['lines\n'])
 
657
        if a_parent:
 
658
            b_parents = (('a',),)
 
659
        else:
 
660
            b_parents = ()
 
661
        source.add_lines(('b',), b_parents, ['lines\n'])
 
662
        return source
 
663
 
 
664
    def do_inconsistent_inserts(self, inconsistency_fatal):
 
665
        target = self.make_test_vf(True, dir='target',
 
666
                                   inconsistency_fatal=inconsistency_fatal)
 
667
        for x in range(2):
 
668
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
669
            target.insert_record_stream(source.get_record_stream(
 
670
                [('b',)], 'unordered', False))
 
671
 
 
672
    def test_inconsistent_redundant_inserts_warn(self):
 
673
        """Should not insert a record that is already present."""
 
674
        warnings = []
 
675
        def warning(template, args):
 
676
            warnings.append(template % args)
 
677
        _trace_warning = trace.warning
 
678
        trace.warning = warning
 
679
        try:
 
680
            self.do_inconsistent_inserts(inconsistency_fatal=False)
 
681
        finally:
 
682
            trace.warning = _trace_warning
 
683
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
684
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
685
                         warnings)
 
686
 
 
687
    def test_inconsistent_redundant_inserts_raises(self):
 
688
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
689
                              inconsistency_fatal=True)
 
690
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
691
                              " in add_records:"
 
692
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
693
                              " 0 8', \(\(\('a',\),\),\)\)")
 
694
 
 
695
 
 
696
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
697
 
 
698
    _texts = {
 
699
        ('key1',): "this is a text\n"
 
700
                   "with a reasonable amount of compressible bytes\n",
 
701
        ('key2',): "another text\n"
 
702
                   "with a reasonable amount of compressible bytes\n",
 
703
        ('key3',): "yet another text which won't be extracted\n"
 
704
                   "with a reasonable amount of compressible bytes\n",
 
705
        ('key4',): "this will be extracted\n"
 
706
                   "but references most of its bytes from\n"
 
707
                   "yet another text which won't be extracted\n"
 
708
                   "with a reasonable amount of compressible bytes\n",
 
709
    }
 
710
    def make_block(self, key_to_text):
 
711
        """Create a GroupCompressBlock, filling it with the given texts."""
 
712
        compressor = groupcompress.GroupCompressor()
 
713
        start = 0
 
714
        for key in sorted(key_to_text):
 
715
            compressor.compress(key, key_to_text[key], None)
 
716
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
717
                    in compressor.labels_deltas.iteritems())
 
718
        block = compressor.flush()
 
719
        raw_bytes = block.to_bytes()
 
720
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
721
 
 
722
    def add_key_to_manager(self, key, locations, block, manager):
 
723
        start, end = locations[key]
 
724
        manager.add_factory(key, (), start, end)
 
725
 
 
726
    def test_get_fulltexts(self):
 
727
        locations, block = self.make_block(self._texts)
 
728
        manager = groupcompress._LazyGroupContentManager(block)
 
729
        self.add_key_to_manager(('key1',), locations, block, manager)
 
730
        self.add_key_to_manager(('key2',), locations, block, manager)
 
731
        result_order = []
 
732
        for record in manager.get_record_stream():
 
733
            result_order.append(record.key)
 
734
            text = self._texts[record.key]
 
735
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
736
        self.assertEqual([('key1',), ('key2',)], result_order)
 
737
 
 
738
        # If we build the manager in the opposite order, we should get them
 
739
        # back in the opposite order
 
740
        manager = groupcompress._LazyGroupContentManager(block)
 
741
        self.add_key_to_manager(('key2',), locations, block, manager)
 
742
        self.add_key_to_manager(('key1',), locations, block, manager)
 
743
        result_order = []
 
744
        for record in manager.get_record_stream():
 
745
            result_order.append(record.key)
 
746
            text = self._texts[record.key]
 
747
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
748
        self.assertEqual([('key2',), ('key1',)], result_order)
 
749
 
 
750
    def test__wire_bytes_no_keys(self):
 
751
        locations, block = self.make_block(self._texts)
 
752
        manager = groupcompress._LazyGroupContentManager(block)
 
753
        wire_bytes = manager._wire_bytes()
 
754
        block_length = len(block.to_bytes())
 
755
        # We should have triggered a strip, since we aren't using any content
 
756
        stripped_block = manager._block.to_bytes()
 
757
        self.assertTrue(block_length > len(stripped_block))
 
758
        empty_z_header = zlib.compress('')
 
759
        self.assertEqual('groupcompress-block\n'
 
760
                         '8\n' # len(compress(''))
 
761
                         '0\n' # len('')
 
762
                         '%d\n'# compressed block len
 
763
                         '%s'  # zheader
 
764
                         '%s'  # block
 
765
                         % (len(stripped_block), empty_z_header,
 
766
                            stripped_block),
 
767
                         wire_bytes)
 
768
 
 
769
    def test__wire_bytes(self):
 
770
        locations, block = self.make_block(self._texts)
 
771
        manager = groupcompress._LazyGroupContentManager(block)
 
772
        self.add_key_to_manager(('key1',), locations, block, manager)
 
773
        self.add_key_to_manager(('key4',), locations, block, manager)
 
774
        block_bytes = block.to_bytes()
 
775
        wire_bytes = manager._wire_bytes()
 
776
        (storage_kind, z_header_len, header_len,
 
777
         block_len, rest) = wire_bytes.split('\n', 4)
 
778
        z_header_len = int(z_header_len)
 
779
        header_len = int(header_len)
 
780
        block_len = int(block_len)
 
781
        self.assertEqual('groupcompress-block', storage_kind)
 
782
        self.assertEqual(33, z_header_len)
 
783
        self.assertEqual(25, header_len)
 
784
        self.assertEqual(len(block_bytes), block_len)
 
785
        z_header = rest[:z_header_len]
 
786
        header = zlib.decompress(z_header)
 
787
        self.assertEqual(header_len, len(header))
 
788
        entry1 = locations[('key1',)]
 
789
        entry4 = locations[('key4',)]
 
790
        self.assertEqualDiff('key1\n'
 
791
                             '\n'  # no parents
 
792
                             '%d\n' # start offset
 
793
                             '%d\n' # end offset
 
794
                             'key4\n'
 
795
                             '\n'
 
796
                             '%d\n'
 
797
                             '%d\n'
 
798
                             % (entry1[0], entry1[1],
 
799
                                entry4[0], entry4[1]),
 
800
                            header)
 
801
        z_block = rest[z_header_len:]
 
802
        self.assertEqual(block_bytes, z_block)
 
803
 
 
804
    def test_from_bytes(self):
 
805
        locations, block = self.make_block(self._texts)
 
806
        manager = groupcompress._LazyGroupContentManager(block)
 
807
        self.add_key_to_manager(('key1',), locations, block, manager)
 
808
        self.add_key_to_manager(('key4',), locations, block, manager)
 
809
        wire_bytes = manager._wire_bytes()
 
810
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
811
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
812
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
813
        self.assertEqual(2, len(manager._factories))
 
814
        self.assertEqual(block._z_content, manager._block._z_content)
 
815
        result_order = []
 
816
        for record in manager.get_record_stream():
 
817
            result_order.append(record.key)
 
818
            text = self._texts[record.key]
 
819
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
820
        self.assertEqual([('key1',), ('key4',)], result_order)
 
821
 
 
822
    def test__check_rebuild_no_changes(self):
 
823
        locations, block = self.make_block(self._texts)
 
824
        manager = groupcompress._LazyGroupContentManager(block)
 
825
        # Request all the keys, which ensures that we won't rebuild
 
826
        self.add_key_to_manager(('key1',), locations, block, manager)
 
827
        self.add_key_to_manager(('key2',), locations, block, manager)
 
828
        self.add_key_to_manager(('key3',), locations, block, manager)
 
829
        self.add_key_to_manager(('key4',), locations, block, manager)
 
830
        manager._check_rebuild_block()
 
831
        self.assertIs(block, manager._block)
 
832
 
 
833
    def test__check_rebuild_only_one(self):
 
834
        locations, block = self.make_block(self._texts)
 
835
        manager = groupcompress._LazyGroupContentManager(block)
 
836
        # Request just the first key, which should trigger a 'strip' action
 
837
        self.add_key_to_manager(('key1',), locations, block, manager)
 
838
        manager._check_rebuild_block()
 
839
        self.assertIsNot(block, manager._block)
 
840
        self.assertTrue(block._content_length > manager._block._content_length)
 
841
        # We should be able to still get the content out of this block, though
 
842
        # it should only have 1 entry
 
843
        for record in manager.get_record_stream():
 
844
            self.assertEqual(('key1',), record.key)
 
845
            self.assertEqual(self._texts[record.key],
 
846
                             record.get_bytes_as('fulltext'))
 
847
 
 
848
    def test__check_rebuild_middle(self):
 
849
        locations, block = self.make_block(self._texts)
 
850
        manager = groupcompress._LazyGroupContentManager(block)
 
851
        # Request a small key in the middle should trigger a 'rebuild'
 
852
        self.add_key_to_manager(('key4',), locations, block, manager)
 
853
        manager._check_rebuild_block()
 
854
        self.assertIsNot(block, manager._block)
 
855
        self.assertTrue(block._content_length > manager._block._content_length)
 
856
        for record in manager.get_record_stream():
 
857
            self.assertEqual(('key4',), record.key)
 
858
            self.assertEqual(self._texts[record.key],
 
859
                             record.get_bytes_as('fulltext'))