/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Martin Pool
  • Date: 2009-06-26 03:29:57 UTC
  • mto: This revision was merged to the branch mainline in revision 4484.
  • Revision ID: mbp@sourcefrog.net-20090626032957-oe91zyznb3aztquu
(mbp) only show transport activity when progress is already visible

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from bzrlib import (
 
22
    btree_index,
 
23
    groupcompress,
 
24
    errors,
 
25
    index as _mod_index,
 
26
    osutils,
 
27
    tests,
 
28
    versionedfile,
 
29
    )
 
30
from bzrlib.osutils import sha_string
 
31
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
32
 
 
33
 
 
34
def load_tests(standard_tests, module, loader):
 
35
    """Parameterize tests for all versions of groupcompress."""
 
36
    to_adapt, result = tests.split_suite_by_condition(
 
37
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
38
    scenarios = [
 
39
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
40
        ]
 
41
    if CompiledGroupCompressFeature.available():
 
42
        scenarios.append(('C',
 
43
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
44
    return tests.multiply_tests(to_adapt, scenarios, result)
 
45
 
 
46
 
 
47
class TestGroupCompressor(tests.TestCase):
 
48
 
 
49
    def _chunks_to_repr_lines(self, chunks):
 
50
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
51
 
 
52
    def assertEqualDiffEncoded(self, expected, actual):
 
53
        """Compare the actual content to the expected content.
 
54
 
 
55
        :param expected: A group of chunks that we expect to see
 
56
        :param actual: The measured 'chunks'
 
57
 
 
58
        We will transform the chunks back into lines, and then run 'repr()'
 
59
        over them to handle non-ascii characters.
 
60
        """
 
61
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
62
                             self._chunks_to_repr_lines(actual))
 
63
 
 
64
 
 
65
class TestAllGroupCompressors(TestGroupCompressor):
 
66
    """Tests for GroupCompressor"""
 
67
 
 
68
    compressor = None # Set by multiply_tests
 
69
 
 
70
    def test_empty_delta(self):
 
71
        compressor = self.compressor()
 
72
        self.assertEqual([], compressor.chunks)
 
73
 
 
74
    def test_one_nosha_delta(self):
 
75
        # diff against NUKK
 
76
        compressor = self.compressor()
 
77
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
78
            'strange\ncommon\n', None)
 
79
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
80
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
81
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
82
        self.assertEqual(0, start_point)
 
83
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
84
 
 
85
    def test_empty_content(self):
 
86
        compressor = self.compressor()
 
87
        # Adding empty bytes should return the 'null' record
 
88
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
89
                                                                 '', None)
 
90
        self.assertEqual(0, start_point)
 
91
        self.assertEqual(0, end_point)
 
92
        self.assertEqual('fulltext', kind)
 
93
        self.assertEqual(groupcompress._null_sha1, sha1)
 
94
        self.assertEqual(0, compressor.endpoint)
 
95
        self.assertEqual([], compressor.chunks)
 
96
        # Even after adding some content
 
97
        compressor.compress(('content',), 'some\nbytes\n', None)
 
98
        self.assertTrue(compressor.endpoint > 0)
 
99
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
100
                                                                 '', None)
 
101
        self.assertEqual(0, start_point)
 
102
        self.assertEqual(0, end_point)
 
103
        self.assertEqual('fulltext', kind)
 
104
        self.assertEqual(groupcompress._null_sha1, sha1)
 
105
 
 
106
    def test_extract_from_compressor(self):
 
107
        # Knit fetching will try to reconstruct texts locally which results in
 
108
        # reading something that is in the compressor stream already.
 
109
        compressor = self.compressor()
 
110
        sha1_1, _, _, _ = compressor.compress(('label',),
 
111
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
112
        expected_lines = list(compressor.chunks)
 
113
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
114
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
115
        # get the first out
 
116
        self.assertEqual(('strange\ncommon long line\n'
 
117
                          'that needs a 16 byte match\n', sha1_1),
 
118
                         compressor.extract(('label',)))
 
119
        # and the second
 
120
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
121
                          'different\n', sha1_2),
 
122
                         compressor.extract(('newlabel',)))
 
123
 
 
124
    def test_pop_last(self):
 
125
        compressor = self.compressor()
 
126
        _, _, _, _ = compressor.compress(('key1',),
 
127
            'some text\nfor the first entry\n', None)
 
128
        expected_lines = list(compressor.chunks)
 
129
        _, _, _, _ = compressor.compress(('key2',),
 
130
            'some text\nfor the second entry\n', None)
 
131
        compressor.pop_last()
 
132
        self.assertEqual(expected_lines, compressor.chunks)
 
133
 
 
134
 
 
135
class TestPyrexGroupCompressor(TestGroupCompressor):
 
136
 
 
137
    _test_needs_features = [CompiledGroupCompressFeature]
 
138
    compressor = groupcompress.PyrexGroupCompressor
 
139
 
 
140
    def test_stats(self):
 
141
        compressor = self.compressor()
 
142
        compressor.compress(('label',),
 
143
                            'strange\n'
 
144
                            'common very very long line\n'
 
145
                            'plus more text\n', None)
 
146
        compressor.compress(('newlabel',),
 
147
                            'common very very long line\n'
 
148
                            'plus more text\n'
 
149
                            'different\n'
 
150
                            'moredifferent\n', None)
 
151
        compressor.compress(('label3',),
 
152
                            'new\n'
 
153
                            'common very very long line\n'
 
154
                            'plus more text\n'
 
155
                            'different\n'
 
156
                            'moredifferent\n', None)
 
157
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
158
 
 
159
    def test_two_nosha_delta(self):
 
160
        compressor = self.compressor()
 
161
        sha1_1, _, _, _ = compressor.compress(('label',),
 
162
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
163
        expected_lines = list(compressor.chunks)
 
164
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
165
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
166
        self.assertEqual(sha_string('common long line\n'
 
167
                                    'that needs a 16 byte match\n'
 
168
                                    'different\n'), sha1_2)
 
169
        expected_lines.extend([
 
170
            # 'delta', delta length
 
171
            'd\x0f',
 
172
            # source and target length
 
173
            '\x36',
 
174
            # copy the line common
 
175
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
176
            # add the line different, and the trailing newline
 
177
            '\x0adifferent\n', # insert 10 bytes
 
178
            ])
 
179
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
180
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
181
 
 
182
    def test_three_nosha_delta(self):
 
183
        # The first interesting test: make a change that should use lines from
 
184
        # both parents.
 
185
        compressor = self.compressor()
 
186
        sha1_1, _, _, _ = compressor.compress(('label',),
 
187
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
188
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
189
            'different\nmoredifferent\nand then some more\n', None)
 
190
        expected_lines = list(compressor.chunks)
 
191
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
192
            'new\ncommon very very long line\nwith some extra text\n'
 
193
            'different\nmoredifferent\nand then some more\n',
 
194
            None)
 
195
        self.assertEqual(
 
196
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
197
                       'different\nmoredifferent\nand then some more\n'),
 
198
            sha1_3)
 
199
        expected_lines.extend([
 
200
            # 'delta', delta length
 
201
            'd\x0b',
 
202
            # source and target length
 
203
            '\x5f'
 
204
            # insert new
 
205
            '\x03new',
 
206
            # Copy of first parent 'common' range
 
207
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
208
            # Copy of second parent 'different' range
 
209
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
210
            ])
 
211
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
212
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
213
 
 
214
 
 
215
class TestPythonGroupCompressor(TestGroupCompressor):
 
216
 
 
217
    compressor = groupcompress.PythonGroupCompressor
 
218
 
 
219
    def test_stats(self):
 
220
        compressor = self.compressor()
 
221
        compressor.compress(('label',),
 
222
                            'strange\n'
 
223
                            'common very very long line\n'
 
224
                            'plus more text\n', None)
 
225
        compressor.compress(('newlabel',),
 
226
                            'common very very long line\n'
 
227
                            'plus more text\n'
 
228
                            'different\n'
 
229
                            'moredifferent\n', None)
 
230
        compressor.compress(('label3',),
 
231
                            'new\n'
 
232
                            'common very very long line\n'
 
233
                            'plus more text\n'
 
234
                            'different\n'
 
235
                            'moredifferent\n', None)
 
236
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
237
 
 
238
    def test_two_nosha_delta(self):
 
239
        compressor = self.compressor()
 
240
        sha1_1, _, _, _ = compressor.compress(('label',),
 
241
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
242
        expected_lines = list(compressor.chunks)
 
243
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
244
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
245
        self.assertEqual(sha_string('common long line\n'
 
246
                                    'that needs a 16 byte match\n'
 
247
                                    'different\n'), sha1_2)
 
248
        expected_lines.extend([
 
249
            # 'delta', delta length
 
250
            'd\x0f',
 
251
            # target length
 
252
            '\x36',
 
253
            # copy the line common
 
254
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
255
            # add the line different, and the trailing newline
 
256
            '\x0adifferent\n', # insert 10 bytes
 
257
            ])
 
258
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
259
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
260
 
 
261
    def test_three_nosha_delta(self):
 
262
        # The first interesting test: make a change that should use lines from
 
263
        # both parents.
 
264
        compressor = self.compressor()
 
265
        sha1_1, _, _, _ = compressor.compress(('label',),
 
266
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
267
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
268
            'different\nmoredifferent\nand then some more\n', None)
 
269
        expected_lines = list(compressor.chunks)
 
270
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
271
            'new\ncommon very very long line\nwith some extra text\n'
 
272
            'different\nmoredifferent\nand then some more\n',
 
273
            None)
 
274
        self.assertEqual(
 
275
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
276
                       'different\nmoredifferent\nand then some more\n'),
 
277
            sha1_3)
 
278
        expected_lines.extend([
 
279
            # 'delta', delta length
 
280
            'd\x0c',
 
281
            # target length
 
282
            '\x5f'
 
283
            # insert new
 
284
            '\x04new\n',
 
285
            # Copy of first parent 'common' range
 
286
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
287
            # Copy of second parent 'different' range
 
288
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
289
            ])
 
290
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
291
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
292
 
 
293
 
 
294
class TestGroupCompressBlock(tests.TestCase):
 
295
 
 
296
    def make_block(self, key_to_text):
 
297
        """Create a GroupCompressBlock, filling it with the given texts."""
 
298
        compressor = groupcompress.GroupCompressor()
 
299
        start = 0
 
300
        for key in sorted(key_to_text):
 
301
            compressor.compress(key, key_to_text[key], None)
 
302
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
303
                    in compressor.labels_deltas.iteritems())
 
304
        block = compressor.flush()
 
305
        raw_bytes = block.to_bytes()
 
306
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
307
        # content object
 
308
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
309
 
 
310
    def test_from_empty_bytes(self):
 
311
        self.assertRaises(ValueError,
 
312
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
313
 
 
314
    def test_from_minimal_bytes(self):
 
315
        block = groupcompress.GroupCompressBlock.from_bytes(
 
316
            'gcb1z\n0\n0\n')
 
317
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
318
        self.assertIs(None, block._content)
 
319
        self.assertEqual('', block._z_content)
 
320
        block._ensure_content()
 
321
        self.assertEqual('', block._content)
 
322
        self.assertEqual('', block._z_content)
 
323
        block._ensure_content() # Ensure content is safe to call 2x
 
324
 
 
325
    def test_from_invalid(self):
 
326
        self.assertRaises(ValueError,
 
327
                          groupcompress.GroupCompressBlock.from_bytes,
 
328
                          'this is not a valid header')
 
329
 
 
330
    def test_from_bytes(self):
 
331
        content = ('a tiny bit of content\n')
 
332
        z_content = zlib.compress(content)
 
333
        z_bytes = (
 
334
            'gcb1z\n' # group compress block v1 plain
 
335
            '%d\n' # Length of compressed content
 
336
            '%d\n' # Length of uncompressed content
 
337
            '%s'   # Compressed content
 
338
            ) % (len(z_content), len(content), z_content)
 
339
        block = groupcompress.GroupCompressBlock.from_bytes(
 
340
            z_bytes)
 
341
        self.assertEqual(z_content, block._z_content)
 
342
        self.assertIs(None, block._content)
 
343
        self.assertEqual(len(z_content), block._z_content_length)
 
344
        self.assertEqual(len(content), block._content_length)
 
345
        block._ensure_content()
 
346
        self.assertEqual(z_content, block._z_content)
 
347
        self.assertEqual(content, block._content)
 
348
 
 
349
    def test_to_bytes(self):
 
350
        content = ('this is some content\n'
 
351
                   'this content will be compressed\n')
 
352
        gcb = groupcompress.GroupCompressBlock()
 
353
        gcb.set_content(content)
 
354
        bytes = gcb.to_bytes()
 
355
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
356
        self.assertEqual(gcb._content_length, len(content))
 
357
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
358
                          '%d\n' # Length of compressed content
 
359
                          '%d\n' # Length of uncompressed content
 
360
                         ) % (gcb._z_content_length, gcb._content_length)
 
361
        self.assertStartsWith(bytes, expected_header)
 
362
        remaining_bytes = bytes[len(expected_header):]
 
363
        raw_bytes = zlib.decompress(remaining_bytes)
 
364
        self.assertEqual(content, raw_bytes)
 
365
 
 
366
        # we should get the same results if using the chunked version
 
367
        gcb = groupcompress.GroupCompressBlock()
 
368
        gcb.set_chunked_content(['this is some content\n'
 
369
                                 'this content will be compressed\n'],
 
370
                                 len(content))
 
371
        old_bytes = bytes
 
372
        bytes = gcb.to_bytes()
 
373
        self.assertEqual(old_bytes, bytes)
 
374
 
 
375
    def test_partial_decomp(self):
 
376
        content_chunks = []
 
377
        # We need a sufficient amount of data so that zlib.decompress has
 
378
        # partial decompression to work with. Most auto-generated data
 
379
        # compresses a bit too well, we want a combination, so we combine a sha
 
380
        # hash with compressible data.
 
381
        for i in xrange(2048):
 
382
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
383
            content_chunks.append(next_content)
 
384
            next_sha1 = osutils.sha_string(next_content)
 
385
            content_chunks.append(next_sha1 + '\n')
 
386
        content = ''.join(content_chunks)
 
387
        self.assertEqual(158634, len(content))
 
388
        z_content = zlib.compress(content)
 
389
        self.assertEqual(57182, len(z_content))
 
390
        block = groupcompress.GroupCompressBlock()
 
391
        block._z_content = z_content
 
392
        block._z_content_length = len(z_content)
 
393
        block._compressor_name = 'zlib'
 
394
        block._content_length = 158634
 
395
        self.assertIs(None, block._content)
 
396
        block._ensure_content(100)
 
397
        self.assertIsNot(None, block._content)
 
398
        # We have decompressed at least 100 bytes
 
399
        self.assertTrue(len(block._content) >= 100)
 
400
        # We have not decompressed the whole content
 
401
        self.assertTrue(len(block._content) < 158634)
 
402
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
403
        # ensuring content that we already have shouldn't cause any more data
 
404
        # to be extracted
 
405
        cur_len = len(block._content)
 
406
        block._ensure_content(cur_len - 10)
 
407
        self.assertEqual(cur_len, len(block._content))
 
408
        # Now we want a bit more content
 
409
        cur_len += 10
 
410
        block._ensure_content(cur_len)
 
411
        self.assertTrue(len(block._content) >= cur_len)
 
412
        self.assertTrue(len(block._content) < 158634)
 
413
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
414
        # And now lets finish
 
415
        block._ensure_content(158634)
 
416
        self.assertEqualDiff(content, block._content)
 
417
        # And the decompressor is finalized
 
418
        self.assertIs(None, block._z_content_decompressor)
 
419
 
 
420
    def test_partial_decomp_no_known_length(self):
 
421
        content_chunks = []
 
422
        for i in xrange(2048):
 
423
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
424
            content_chunks.append(next_content)
 
425
            next_sha1 = osutils.sha_string(next_content)
 
426
            content_chunks.append(next_sha1 + '\n')
 
427
        content = ''.join(content_chunks)
 
428
        self.assertEqual(158634, len(content))
 
429
        z_content = zlib.compress(content)
 
430
        self.assertEqual(57182, len(z_content))
 
431
        block = groupcompress.GroupCompressBlock()
 
432
        block._z_content = z_content
 
433
        block._z_content_length = len(z_content)
 
434
        block._compressor_name = 'zlib'
 
435
        block._content_length = None # Don't tell the decompressed length
 
436
        self.assertIs(None, block._content)
 
437
        block._ensure_content(100)
 
438
        self.assertIsNot(None, block._content)
 
439
        # We have decompressed at least 100 bytes
 
440
        self.assertTrue(len(block._content) >= 100)
 
441
        # We have not decompressed the whole content
 
442
        self.assertTrue(len(block._content) < 158634)
 
443
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
444
        # ensuring content that we already have shouldn't cause any more data
 
445
        # to be extracted
 
446
        cur_len = len(block._content)
 
447
        block._ensure_content(cur_len - 10)
 
448
        self.assertEqual(cur_len, len(block._content))
 
449
        # Now we want a bit more content
 
450
        cur_len += 10
 
451
        block._ensure_content(cur_len)
 
452
        self.assertTrue(len(block._content) >= cur_len)
 
453
        self.assertTrue(len(block._content) < 158634)
 
454
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
455
        # And now lets finish
 
456
        block._ensure_content()
 
457
        self.assertEqualDiff(content, block._content)
 
458
        # And the decompressor is finalized
 
459
        self.assertIs(None, block._z_content_decompressor)
 
460
 
 
461
    def test__dump(self):
 
462
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
463
        key_to_text = {('1',): dup_content + '1 unique\n',
 
464
                       ('2',): dup_content + '2 extra special\n'}
 
465
        locs, block = self.make_block(key_to_text)
 
466
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
467
                          ('d', 21, len(key_to_text[('2',)]),
 
468
                           [('c', 2, len(dup_content)),
 
469
                            ('i', len('2 extra special\n'), '')
 
470
                           ]),
 
471
                         ], block._dump())
 
472
 
 
473
 
 
474
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
475
 
 
476
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
477
                     dir='.'):
 
478
        t = self.get_transport(dir)
 
479
        t.ensure_base()
 
480
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
481
            delta=False, keylength=keylength)(t)
 
482
        if do_cleanup:
 
483
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
484
        return vf
 
485
 
 
486
 
 
487
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
488
 
 
489
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
490
        builder = btree_index.BTreeBuilder(ref_lists)
 
491
        for node, references, value in nodes:
 
492
            builder.add_node(node, references, value)
 
493
        stream = builder.finish()
 
494
        trans = self.get_transport()
 
495
        size = trans.put_file(name, stream)
 
496
        return btree_index.BTreeGraphIndex(trans, name, size)
 
497
 
 
498
    def make_g_index_missing_parent(self):
 
499
        graph_index = self.make_g_index('missing_parent', 1,
 
500
            [(('parent', ), '2 78 2 10', ([],)),
 
501
             (('tip', ), '2 78 2 10',
 
502
              ([('parent', ), ('missing-parent', )],)),
 
503
              ])
 
504
        return graph_index
 
505
 
 
506
    def test_get_record_stream_as_requested(self):
 
507
        # Consider promoting 'as-requested' to general availability, and
 
508
        # make this a VF interface test
 
509
        vf = self.make_test_vf(False, dir='source')
 
510
        vf.add_lines(('a',), (), ['lines\n'])
 
511
        vf.add_lines(('b',), (), ['lines\n'])
 
512
        vf.add_lines(('c',), (), ['lines\n'])
 
513
        vf.add_lines(('d',), (), ['lines\n'])
 
514
        vf.writer.end()
 
515
        keys = [record.key for record in vf.get_record_stream(
 
516
                    [('a',), ('b',), ('c',), ('d',)],
 
517
                    'as-requested', False)]
 
518
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
519
        keys = [record.key for record in vf.get_record_stream(
 
520
                    [('b',), ('a',), ('d',), ('c',)],
 
521
                    'as-requested', False)]
 
522
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
523
 
 
524
        # It should work even after being repacked into another VF
 
525
        vf2 = self.make_test_vf(False, dir='target')
 
526
        vf2.insert_record_stream(vf.get_record_stream(
 
527
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
528
        vf2.writer.end()
 
529
 
 
530
        keys = [record.key for record in vf2.get_record_stream(
 
531
                    [('a',), ('b',), ('c',), ('d',)],
 
532
                    'as-requested', False)]
 
533
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
534
        keys = [record.key for record in vf2.get_record_stream(
 
535
                    [('b',), ('a',), ('d',), ('c',)],
 
536
                    'as-requested', False)]
 
537
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
538
 
 
539
    def test_insert_record_stream_re_uses_blocks(self):
 
540
        vf = self.make_test_vf(True, dir='source')
 
541
        def grouped_stream(revision_ids, first_parents=()):
 
542
            parents = first_parents
 
543
            for revision_id in revision_ids:
 
544
                key = (revision_id,)
 
545
                record = versionedfile.FulltextContentFactory(
 
546
                    key, parents, None,
 
547
                    'some content that is\n'
 
548
                    'identical except for\n'
 
549
                    'revision_id:%s\n' % (revision_id,))
 
550
                yield record
 
551
                parents = (key,)
 
552
        # One group, a-d
 
553
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
554
        # Second group, e-h
 
555
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
556
                                               first_parents=(('d',),)))
 
557
        block_bytes = {}
 
558
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
559
                                      'unordered', False)
 
560
        num_records = 0
 
561
        for record in stream:
 
562
            if record.key in [('a',), ('e',)]:
 
563
                self.assertEqual('groupcompress-block', record.storage_kind)
 
564
            else:
 
565
                self.assertEqual('groupcompress-block-ref',
 
566
                                 record.storage_kind)
 
567
            block_bytes[record.key] = record._manager._block._z_content
 
568
            num_records += 1
 
569
        self.assertEqual(8, num_records)
 
570
        for r in 'abcd':
 
571
            key = (r,)
 
572
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
573
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
574
        for r in 'efgh':
 
575
            key = (r,)
 
576
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
577
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
578
        # Now copy the blocks into another vf, and ensure that the blocks are
 
579
        # preserved without creating new entries
 
580
        vf2 = self.make_test_vf(True, dir='target')
 
581
        # ordering in 'groupcompress' order, should actually swap the groups in
 
582
        # the target vf, but the groups themselves should not be disturbed.
 
583
        vf2.insert_record_stream(vf.get_record_stream(
 
584
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
585
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
586
                                       'groupcompress', False)
 
587
        vf2.writer.end()
 
588
        num_records = 0
 
589
        for record in stream:
 
590
            num_records += 1
 
591
            self.assertEqual(block_bytes[record.key],
 
592
                             record._manager._block._z_content)
 
593
        self.assertEqual(8, num_records)
 
594
 
 
595
    def test__insert_record_stream_no_reuse_block(self):
 
596
        vf = self.make_test_vf(True, dir='source')
 
597
        def grouped_stream(revision_ids, first_parents=()):
 
598
            parents = first_parents
 
599
            for revision_id in revision_ids:
 
600
                key = (revision_id,)
 
601
                record = versionedfile.FulltextContentFactory(
 
602
                    key, parents, None,
 
603
                    'some content that is\n'
 
604
                    'identical except for\n'
 
605
                    'revision_id:%s\n' % (revision_id,))
 
606
                yield record
 
607
                parents = (key,)
 
608
        # One group, a-d
 
609
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
610
        # Second group, e-h
 
611
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
612
                                               first_parents=(('d',),)))
 
613
        vf.writer.end()
 
614
        self.assertEqual(8, len(list(vf.get_record_stream(
 
615
                                        [(r,) for r in 'abcdefgh'],
 
616
                                        'unordered', False))))
 
617
        # Now copy the blocks into another vf, and ensure that the blocks are
 
618
        # preserved without creating new entries
 
619
        vf2 = self.make_test_vf(True, dir='target')
 
620
        # ordering in 'groupcompress' order, should actually swap the groups in
 
621
        # the target vf, but the groups themselves should not be disturbed.
 
622
        list(vf2._insert_record_stream(vf.get_record_stream(
 
623
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
624
            reuse_blocks=False))
 
625
        vf2.writer.end()
 
626
        # After inserting with reuse_blocks=False, we should have everything in
 
627
        # a single new block.
 
628
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
629
                                       'groupcompress', False)
 
630
        block = None
 
631
        for record in stream:
 
632
            if block is None:
 
633
                block = record._manager._block
 
634
            else:
 
635
                self.assertIs(block, record._manager._block)
 
636
 
 
637
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
638
        unvalidated = self.make_g_index_missing_parent()
 
639
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
640
        index = groupcompress._GCGraphIndex(combined,
 
641
            is_locked=lambda: True, parents=True,
 
642
            track_external_parent_refs=True)
 
643
        index.scan_unvalidated_index(unvalidated)
 
644
        self.assertEqual(
 
645
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
646
 
 
647
    def test_track_external_parent_refs(self):
 
648
        g_index = self.make_g_index('empty', 1, [])
 
649
        mod_index = btree_index.BTreeBuilder(1, 1)
 
650
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
651
        index = groupcompress._GCGraphIndex(combined,
 
652
            is_locked=lambda: True, parents=True,
 
653
            add_callback=mod_index.add_nodes,
 
654
            track_external_parent_refs=True)
 
655
        index.add_records([
 
656
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
657
        self.assertEqual(
 
658
            frozenset([('parent-1',), ('parent-2',)]),
 
659
            index.get_missing_parents())
 
660
 
 
661
 
 
662
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
663
 
 
664
    _texts = {
 
665
        ('key1',): "this is a text\n"
 
666
                   "with a reasonable amount of compressible bytes\n",
 
667
        ('key2',): "another text\n"
 
668
                   "with a reasonable amount of compressible bytes\n",
 
669
        ('key3',): "yet another text which won't be extracted\n"
 
670
                   "with a reasonable amount of compressible bytes\n",
 
671
        ('key4',): "this will be extracted\n"
 
672
                   "but references most of its bytes from\n"
 
673
                   "yet another text which won't be extracted\n"
 
674
                   "with a reasonable amount of compressible bytes\n",
 
675
    }
 
676
    def make_block(self, key_to_text):
 
677
        """Create a GroupCompressBlock, filling it with the given texts."""
 
678
        compressor = groupcompress.GroupCompressor()
 
679
        start = 0
 
680
        for key in sorted(key_to_text):
 
681
            compressor.compress(key, key_to_text[key], None)
 
682
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
683
                    in compressor.labels_deltas.iteritems())
 
684
        block = compressor.flush()
 
685
        raw_bytes = block.to_bytes()
 
686
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
687
 
 
688
    def add_key_to_manager(self, key, locations, block, manager):
 
689
        start, end = locations[key]
 
690
        manager.add_factory(key, (), start, end)
 
691
 
 
692
    def test_get_fulltexts(self):
 
693
        locations, block = self.make_block(self._texts)
 
694
        manager = groupcompress._LazyGroupContentManager(block)
 
695
        self.add_key_to_manager(('key1',), locations, block, manager)
 
696
        self.add_key_to_manager(('key2',), locations, block, manager)
 
697
        result_order = []
 
698
        for record in manager.get_record_stream():
 
699
            result_order.append(record.key)
 
700
            text = self._texts[record.key]
 
701
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
702
        self.assertEqual([('key1',), ('key2',)], result_order)
 
703
 
 
704
        # If we build the manager in the opposite order, we should get them
 
705
        # back in the opposite order
 
706
        manager = groupcompress._LazyGroupContentManager(block)
 
707
        self.add_key_to_manager(('key2',), locations, block, manager)
 
708
        self.add_key_to_manager(('key1',), locations, block, manager)
 
709
        result_order = []
 
710
        for record in manager.get_record_stream():
 
711
            result_order.append(record.key)
 
712
            text = self._texts[record.key]
 
713
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
714
        self.assertEqual([('key2',), ('key1',)], result_order)
 
715
 
 
716
    def test__wire_bytes_no_keys(self):
 
717
        locations, block = self.make_block(self._texts)
 
718
        manager = groupcompress._LazyGroupContentManager(block)
 
719
        wire_bytes = manager._wire_bytes()
 
720
        block_length = len(block.to_bytes())
 
721
        # We should have triggered a strip, since we aren't using any content
 
722
        stripped_block = manager._block.to_bytes()
 
723
        self.assertTrue(block_length > len(stripped_block))
 
724
        empty_z_header = zlib.compress('')
 
725
        self.assertEqual('groupcompress-block\n'
 
726
                         '8\n' # len(compress(''))
 
727
                         '0\n' # len('')
 
728
                         '%d\n'# compressed block len
 
729
                         '%s'  # zheader
 
730
                         '%s'  # block
 
731
                         % (len(stripped_block), empty_z_header,
 
732
                            stripped_block),
 
733
                         wire_bytes)
 
734
 
 
735
    def test__wire_bytes(self):
 
736
        locations, block = self.make_block(self._texts)
 
737
        manager = groupcompress._LazyGroupContentManager(block)
 
738
        self.add_key_to_manager(('key1',), locations, block, manager)
 
739
        self.add_key_to_manager(('key4',), locations, block, manager)
 
740
        block_bytes = block.to_bytes()
 
741
        wire_bytes = manager._wire_bytes()
 
742
        (storage_kind, z_header_len, header_len,
 
743
         block_len, rest) = wire_bytes.split('\n', 4)
 
744
        z_header_len = int(z_header_len)
 
745
        header_len = int(header_len)
 
746
        block_len = int(block_len)
 
747
        self.assertEqual('groupcompress-block', storage_kind)
 
748
        self.assertEqual(33, z_header_len)
 
749
        self.assertEqual(25, header_len)
 
750
        self.assertEqual(len(block_bytes), block_len)
 
751
        z_header = rest[:z_header_len]
 
752
        header = zlib.decompress(z_header)
 
753
        self.assertEqual(header_len, len(header))
 
754
        entry1 = locations[('key1',)]
 
755
        entry4 = locations[('key4',)]
 
756
        self.assertEqualDiff('key1\n'
 
757
                             '\n'  # no parents
 
758
                             '%d\n' # start offset
 
759
                             '%d\n' # end offset
 
760
                             'key4\n'
 
761
                             '\n'
 
762
                             '%d\n'
 
763
                             '%d\n'
 
764
                             % (entry1[0], entry1[1],
 
765
                                entry4[0], entry4[1]),
 
766
                            header)
 
767
        z_block = rest[z_header_len:]
 
768
        self.assertEqual(block_bytes, z_block)
 
769
 
 
770
    def test_from_bytes(self):
 
771
        locations, block = self.make_block(self._texts)
 
772
        manager = groupcompress._LazyGroupContentManager(block)
 
773
        self.add_key_to_manager(('key1',), locations, block, manager)
 
774
        self.add_key_to_manager(('key4',), locations, block, manager)
 
775
        wire_bytes = manager._wire_bytes()
 
776
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
777
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
778
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
779
        self.assertEqual(2, len(manager._factories))
 
780
        self.assertEqual(block._z_content, manager._block._z_content)
 
781
        result_order = []
 
782
        for record in manager.get_record_stream():
 
783
            result_order.append(record.key)
 
784
            text = self._texts[record.key]
 
785
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
786
        self.assertEqual([('key1',), ('key4',)], result_order)
 
787
 
 
788
    def test__check_rebuild_no_changes(self):
 
789
        locations, block = self.make_block(self._texts)
 
790
        manager = groupcompress._LazyGroupContentManager(block)
 
791
        # Request all the keys, which ensures that we won't rebuild
 
792
        self.add_key_to_manager(('key1',), locations, block, manager)
 
793
        self.add_key_to_manager(('key2',), locations, block, manager)
 
794
        self.add_key_to_manager(('key3',), locations, block, manager)
 
795
        self.add_key_to_manager(('key4',), locations, block, manager)
 
796
        manager._check_rebuild_block()
 
797
        self.assertIs(block, manager._block)
 
798
 
 
799
    def test__check_rebuild_only_one(self):
 
800
        locations, block = self.make_block(self._texts)
 
801
        manager = groupcompress._LazyGroupContentManager(block)
 
802
        # Request just the first key, which should trigger a 'strip' action
 
803
        self.add_key_to_manager(('key1',), locations, block, manager)
 
804
        manager._check_rebuild_block()
 
805
        self.assertIsNot(block, manager._block)
 
806
        self.assertTrue(block._content_length > manager._block._content_length)
 
807
        # We should be able to still get the content out of this block, though
 
808
        # it should only have 1 entry
 
809
        for record in manager.get_record_stream():
 
810
            self.assertEqual(('key1',), record.key)
 
811
            self.assertEqual(self._texts[record.key],
 
812
                             record.get_bytes_as('fulltext'))
 
813
 
 
814
    def test__check_rebuild_middle(self):
 
815
        locations, block = self.make_block(self._texts)
 
816
        manager = groupcompress._LazyGroupContentManager(block)
 
817
        # Request a small key in the middle should trigger a 'rebuild'
 
818
        self.add_key_to_manager(('key4',), locations, block, manager)
 
819
        manager._check_rebuild_block()
 
820
        self.assertIsNot(block, manager._block)
 
821
        self.assertTrue(block._content_length > manager._block._content_length)
 
822
        for record in manager.get_record_stream():
 
823
            self.assertEqual(('key4',), record.key)
 
824
            self.assertEqual(self._texts[record.key],
 
825
                             record.get_bytes_as('fulltext'))