1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for group compression."""
31
from bzrlib.osutils import sha_string
32
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
35
def load_tests(standard_tests, module, loader):
36
"""Parameterize tests for all versions of groupcompress."""
37
to_adapt, result = tests.split_suite_by_condition(
38
standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
40
('python', {'compressor': groupcompress.PythonGroupCompressor}),
42
if CompiledGroupCompressFeature.available():
43
scenarios.append(('C',
44
{'compressor': groupcompress.PyrexGroupCompressor}))
45
return tests.multiply_tests(to_adapt, scenarios, result)
48
class TestGroupCompressor(tests.TestCase):
50
def _chunks_to_repr_lines(self, chunks):
51
return '\n'.join(map(repr, ''.join(chunks).split('\n')))
53
def assertEqualDiffEncoded(self, expected, actual):
54
"""Compare the actual content to the expected content.
56
:param expected: A group of chunks that we expect to see
57
:param actual: The measured 'chunks'
59
We will transform the chunks back into lines, and then run 'repr()'
60
over them to handle non-ascii characters.
62
self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
self._chunks_to_repr_lines(actual))
66
class TestAllGroupCompressors(TestGroupCompressor):
67
"""Tests for GroupCompressor"""
69
compressor = None # Set by multiply_tests
71
def test_empty_delta(self):
72
compressor = self.compressor()
73
self.assertEqual([], compressor.chunks)
75
def test_one_nosha_delta(self):
77
compressor = self.compressor()
78
sha1, start_point, end_point, _ = compressor.compress(('label',),
79
'strange\ncommon\n', None)
80
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
self.assertEqual(expected_lines, ''.join(compressor.chunks))
83
self.assertEqual(0, start_point)
84
self.assertEqual(sum(map(len, expected_lines)), end_point)
86
def test_empty_content(self):
87
compressor = self.compressor()
88
# Adding empty bytes should return the 'null' record
89
sha1, start_point, end_point, kind = compressor.compress(('empty',),
91
self.assertEqual(0, start_point)
92
self.assertEqual(0, end_point)
93
self.assertEqual('fulltext', kind)
94
self.assertEqual(groupcompress._null_sha1, sha1)
95
self.assertEqual(0, compressor.endpoint)
96
self.assertEqual([], compressor.chunks)
97
# Even after adding some content
98
compressor.compress(('content',), 'some\nbytes\n', None)
99
self.assertTrue(compressor.endpoint > 0)
100
sha1, start_point, end_point, kind = compressor.compress(('empty2',),
102
self.assertEqual(0, start_point)
103
self.assertEqual(0, end_point)
104
self.assertEqual('fulltext', kind)
105
self.assertEqual(groupcompress._null_sha1, sha1)
107
def test_extract_from_compressor(self):
108
# Knit fetching will try to reconstruct texts locally which results in
109
# reading something that is in the compressor stream already.
110
compressor = self.compressor()
111
sha1_1, _, _, _ = compressor.compress(('label',),
112
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
expected_lines = list(compressor.chunks)
114
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
117
self.assertEqual(('strange\ncommon long line\n'
118
'that needs a 16 byte match\n', sha1_1),
119
compressor.extract(('label',)))
121
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
'different\n', sha1_2),
123
compressor.extract(('newlabel',)))
125
def test_pop_last(self):
126
compressor = self.compressor()
127
_, _, _, _ = compressor.compress(('key1',),
128
'some text\nfor the first entry\n', None)
129
expected_lines = list(compressor.chunks)
130
_, _, _, _ = compressor.compress(('key2',),
131
'some text\nfor the second entry\n', None)
132
compressor.pop_last()
133
self.assertEqual(expected_lines, compressor.chunks)
136
class TestPyrexGroupCompressor(TestGroupCompressor):
138
_test_needs_features = [CompiledGroupCompressFeature]
139
compressor = groupcompress.PyrexGroupCompressor
141
def test_stats(self):
142
compressor = self.compressor()
143
compressor.compress(('label',),
145
'common very very long line\n'
146
'plus more text\n', None)
147
compressor.compress(('newlabel',),
148
'common very very long line\n'
151
'moredifferent\n', None)
152
compressor.compress(('label3',),
154
'common very very long line\n'
157
'moredifferent\n', None)
158
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
160
def test_two_nosha_delta(self):
161
compressor = self.compressor()
162
sha1_1, _, _, _ = compressor.compress(('label',),
163
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
expected_lines = list(compressor.chunks)
165
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
self.assertEqual(sha_string('common long line\n'
168
'that needs a 16 byte match\n'
169
'different\n'), sha1_2)
170
expected_lines.extend([
171
# 'delta', delta length
173
# source and target length
175
# copy the line common
176
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
# add the line different, and the trailing newline
178
'\x0adifferent\n', # insert 10 bytes
180
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
self.assertEqual(sum(map(len, expected_lines)), end_point)
183
def test_three_nosha_delta(self):
184
# The first interesting test: make a change that should use lines from
186
compressor = self.compressor()
187
sha1_1, _, _, _ = compressor.compress(('label',),
188
'strange\ncommon very very long line\nwith some extra text\n', None)
189
sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
'different\nmoredifferent\nand then some more\n', None)
191
expected_lines = list(compressor.chunks)
192
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
'new\ncommon very very long line\nwith some extra text\n'
194
'different\nmoredifferent\nand then some more\n',
197
sha_string('new\ncommon very very long line\nwith some extra text\n'
198
'different\nmoredifferent\nand then some more\n'),
200
expected_lines.extend([
201
# 'delta', delta length
203
# source and target length
207
# Copy of first parent 'common' range
208
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
# Copy of second parent 'different' range
210
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
212
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
self.assertEqual(sum(map(len, expected_lines)), end_point)
216
class TestPythonGroupCompressor(TestGroupCompressor):
218
compressor = groupcompress.PythonGroupCompressor
220
def test_stats(self):
221
compressor = self.compressor()
222
compressor.compress(('label',),
224
'common very very long line\n'
225
'plus more text\n', None)
226
compressor.compress(('newlabel',),
227
'common very very long line\n'
230
'moredifferent\n', None)
231
compressor.compress(('label3',),
233
'common very very long line\n'
236
'moredifferent\n', None)
237
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
239
def test_two_nosha_delta(self):
240
compressor = self.compressor()
241
sha1_1, _, _, _ = compressor.compress(('label',),
242
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
expected_lines = list(compressor.chunks)
244
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
self.assertEqual(sha_string('common long line\n'
247
'that needs a 16 byte match\n'
248
'different\n'), sha1_2)
249
expected_lines.extend([
250
# 'delta', delta length
254
# copy the line common
255
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
# add the line different, and the trailing newline
257
'\x0adifferent\n', # insert 10 bytes
259
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
self.assertEqual(sum(map(len, expected_lines)), end_point)
262
def test_three_nosha_delta(self):
263
# The first interesting test: make a change that should use lines from
265
compressor = self.compressor()
266
sha1_1, _, _, _ = compressor.compress(('label',),
267
'strange\ncommon very very long line\nwith some extra text\n', None)
268
sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
'different\nmoredifferent\nand then some more\n', None)
270
expected_lines = list(compressor.chunks)
271
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
'new\ncommon very very long line\nwith some extra text\n'
273
'different\nmoredifferent\nand then some more\n',
276
sha_string('new\ncommon very very long line\nwith some extra text\n'
277
'different\nmoredifferent\nand then some more\n'),
279
expected_lines.extend([
280
# 'delta', delta length
286
# Copy of first parent 'common' range
287
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
# Copy of second parent 'different' range
289
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
291
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
self.assertEqual(sum(map(len, expected_lines)), end_point)
295
class TestGroupCompressBlock(tests.TestCase):
297
def make_block(self, key_to_text):
298
"""Create a GroupCompressBlock, filling it with the given texts."""
299
compressor = groupcompress.GroupCompressor()
301
for key in sorted(key_to_text):
302
compressor.compress(key, key_to_text[key], None)
303
locs = dict((key, (start, end)) for key, (start, _, end, _)
304
in compressor.labels_deltas.iteritems())
305
block = compressor.flush()
306
raw_bytes = block.to_bytes()
307
# Go through from_bytes(to_bytes()) so that we start with a compressed
309
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
311
def test_from_empty_bytes(self):
312
self.assertRaises(ValueError,
313
groupcompress.GroupCompressBlock.from_bytes, '')
315
def test_from_minimal_bytes(self):
316
block = groupcompress.GroupCompressBlock.from_bytes(
318
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
self.assertIs(None, block._content)
320
self.assertEqual('', block._z_content)
321
block._ensure_content()
322
self.assertEqual('', block._content)
323
self.assertEqual('', block._z_content)
324
block._ensure_content() # Ensure content is safe to call 2x
326
def test_from_invalid(self):
327
self.assertRaises(ValueError,
328
groupcompress.GroupCompressBlock.from_bytes,
329
'this is not a valid header')
331
def test_from_bytes(self):
332
content = ('a tiny bit of content\n')
333
z_content = zlib.compress(content)
335
'gcb1z\n' # group compress block v1 plain
336
'%d\n' # Length of compressed content
337
'%d\n' # Length of uncompressed content
338
'%s' # Compressed content
339
) % (len(z_content), len(content), z_content)
340
block = groupcompress.GroupCompressBlock.from_bytes(
342
self.assertEqual(z_content, block._z_content)
343
self.assertIs(None, block._content)
344
self.assertEqual(len(z_content), block._z_content_length)
345
self.assertEqual(len(content), block._content_length)
346
block._ensure_content()
347
self.assertEqual(z_content, block._z_content)
348
self.assertEqual(content, block._content)
350
def test_to_bytes(self):
351
content = ('this is some content\n'
352
'this content will be compressed\n')
353
gcb = groupcompress.GroupCompressBlock()
354
gcb.set_content(content)
355
bytes = gcb.to_bytes()
356
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
self.assertEqual(gcb._content_length, len(content))
358
expected_header =('gcb1z\n' # group compress block v1 zlib
359
'%d\n' # Length of compressed content
360
'%d\n' # Length of uncompressed content
361
) % (gcb._z_content_length, gcb._content_length)
362
self.assertStartsWith(bytes, expected_header)
363
remaining_bytes = bytes[len(expected_header):]
364
raw_bytes = zlib.decompress(remaining_bytes)
365
self.assertEqual(content, raw_bytes)
367
# we should get the same results if using the chunked version
368
gcb = groupcompress.GroupCompressBlock()
369
gcb.set_chunked_content(['this is some content\n'
370
'this content will be compressed\n'],
373
bytes = gcb.to_bytes()
374
self.assertEqual(old_bytes, bytes)
376
def test_partial_decomp(self):
378
# We need a sufficient amount of data so that zlib.decompress has
379
# partial decompression to work with. Most auto-generated data
380
# compresses a bit too well, we want a combination, so we combine a sha
381
# hash with compressible data.
382
for i in xrange(2048):
383
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
384
content_chunks.append(next_content)
385
next_sha1 = osutils.sha_string(next_content)
386
content_chunks.append(next_sha1 + '\n')
387
content = ''.join(content_chunks)
388
self.assertEqual(158634, len(content))
389
z_content = zlib.compress(content)
390
self.assertEqual(57182, len(z_content))
391
block = groupcompress.GroupCompressBlock()
392
block._z_content = z_content
393
block._z_content_length = len(z_content)
394
block._compressor_name = 'zlib'
395
block._content_length = 158634
396
self.assertIs(None, block._content)
397
block._ensure_content(100)
398
self.assertIsNot(None, block._content)
399
# We have decompressed at least 100 bytes
400
self.assertTrue(len(block._content) >= 100)
401
# We have not decompressed the whole content
402
self.assertTrue(len(block._content) < 158634)
403
self.assertEqualDiff(content[:len(block._content)], block._content)
404
# ensuring content that we already have shouldn't cause any more data
406
cur_len = len(block._content)
407
block._ensure_content(cur_len - 10)
408
self.assertEqual(cur_len, len(block._content))
409
# Now we want a bit more content
411
block._ensure_content(cur_len)
412
self.assertTrue(len(block._content) >= cur_len)
413
self.assertTrue(len(block._content) < 158634)
414
self.assertEqualDiff(content[:len(block._content)], block._content)
415
# And now lets finish
416
block._ensure_content(158634)
417
self.assertEqualDiff(content, block._content)
418
# And the decompressor is finalized
419
self.assertIs(None, block._z_content_decompressor)
421
def test__ensure_all_content(self):
423
# We need a sufficient amount of data so that zlib.decompress has
424
# partial decompression to work with. Most auto-generated data
425
# compresses a bit too well, we want a combination, so we combine a sha
426
# hash with compressible data.
427
for i in xrange(2048):
428
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
429
content_chunks.append(next_content)
430
next_sha1 = osutils.sha_string(next_content)
431
content_chunks.append(next_sha1 + '\n')
432
content = ''.join(content_chunks)
433
self.assertEqual(158634, len(content))
434
z_content = zlib.compress(content)
435
self.assertEqual(57182, len(z_content))
436
block = groupcompress.GroupCompressBlock()
437
block._z_content = z_content
438
block._z_content_length = len(z_content)
439
block._compressor_name = 'zlib'
440
block._content_length = 158634
441
self.assertIs(None, block._content)
442
# The first _ensure_content got all of the required data
443
block._ensure_content(158634)
444
self.assertEqualDiff(content, block._content)
445
# And we should have released the _z_content_decompressor since it was
447
self.assertIs(None, block._z_content_decompressor)
449
def test__dump(self):
450
dup_content = 'some duplicate content\nwhich is sufficiently long\n'
451
key_to_text = {('1',): dup_content + '1 unique\n',
452
('2',): dup_content + '2 extra special\n'}
453
locs, block = self.make_block(key_to_text)
454
self.assertEqual([('f', len(key_to_text[('1',)])),
455
('d', 21, len(key_to_text[('2',)]),
456
[('c', 2, len(dup_content)),
457
('i', len('2 extra special\n'), '')
462
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
464
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
465
dir='.', inconsistency_fatal=True):
466
t = self.get_transport(dir)
468
vf = groupcompress.make_pack_factory(graph=create_graph,
469
delta=False, keylength=keylength,
470
inconsistency_fatal=inconsistency_fatal)(t)
472
self.addCleanup(groupcompress.cleanup_pack_group, vf)
476
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
478
def make_g_index(self, name, ref_lists=0, nodes=[]):
479
builder = btree_index.BTreeBuilder(ref_lists)
480
for node, references, value in nodes:
481
builder.add_node(node, references, value)
482
stream = builder.finish()
483
trans = self.get_transport()
484
size = trans.put_file(name, stream)
485
return btree_index.BTreeGraphIndex(trans, name, size)
487
def make_g_index_missing_parent(self):
488
graph_index = self.make_g_index('missing_parent', 1,
489
[(('parent', ), '2 78 2 10', ([],)),
490
(('tip', ), '2 78 2 10',
491
([('parent', ), ('missing-parent', )],)),
495
def test_get_record_stream_as_requested(self):
496
# Consider promoting 'as-requested' to general availability, and
497
# make this a VF interface test
498
vf = self.make_test_vf(False, dir='source')
499
vf.add_lines(('a',), (), ['lines\n'])
500
vf.add_lines(('b',), (), ['lines\n'])
501
vf.add_lines(('c',), (), ['lines\n'])
502
vf.add_lines(('d',), (), ['lines\n'])
504
keys = [record.key for record in vf.get_record_stream(
505
[('a',), ('b',), ('c',), ('d',)],
506
'as-requested', False)]
507
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
508
keys = [record.key for record in vf.get_record_stream(
509
[('b',), ('a',), ('d',), ('c',)],
510
'as-requested', False)]
511
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
513
# It should work even after being repacked into another VF
514
vf2 = self.make_test_vf(False, dir='target')
515
vf2.insert_record_stream(vf.get_record_stream(
516
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
519
keys = [record.key for record in vf2.get_record_stream(
520
[('a',), ('b',), ('c',), ('d',)],
521
'as-requested', False)]
522
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
523
keys = [record.key for record in vf2.get_record_stream(
524
[('b',), ('a',), ('d',), ('c',)],
525
'as-requested', False)]
526
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
528
def test_insert_record_stream_reuses_blocks(self):
529
vf = self.make_test_vf(True, dir='source')
530
def grouped_stream(revision_ids, first_parents=()):
531
parents = first_parents
532
for revision_id in revision_ids:
534
record = versionedfile.FulltextContentFactory(
536
'some content that is\n'
537
'identical except for\n'
538
'revision_id:%s\n' % (revision_id,))
542
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
544
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
545
first_parents=(('d',),)))
547
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
550
for record in stream:
551
if record.key in [('a',), ('e',)]:
552
self.assertEqual('groupcompress-block', record.storage_kind)
554
self.assertEqual('groupcompress-block-ref',
556
block_bytes[record.key] = record._manager._block._z_content
558
self.assertEqual(8, num_records)
561
self.assertIs(block_bytes[key], block_bytes[('a',)])
562
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
565
self.assertIs(block_bytes[key], block_bytes[('e',)])
566
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
567
# Now copy the blocks into another vf, and ensure that the blocks are
568
# preserved without creating new entries
569
vf2 = self.make_test_vf(True, dir='target')
570
# ordering in 'groupcompress' order, should actually swap the groups in
571
# the target vf, but the groups themselves should not be disturbed.
572
def small_size_stream():
573
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
574
'groupcompress', False):
575
record._manager._full_enough_block_size = \
576
record._manager._block._content_length
579
vf2.insert_record_stream(small_size_stream())
580
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
581
'groupcompress', False)
584
for record in stream:
586
self.assertEqual(block_bytes[record.key],
587
record._manager._block._z_content)
588
self.assertEqual(8, num_records)
590
def test_insert_record_stream_packs_on_the_fly(self):
591
vf = self.make_test_vf(True, dir='source')
592
def grouped_stream(revision_ids, first_parents=()):
593
parents = first_parents
594
for revision_id in revision_ids:
596
record = versionedfile.FulltextContentFactory(
598
'some content that is\n'
599
'identical except for\n'
600
'revision_id:%s\n' % (revision_id,))
604
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
606
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
607
first_parents=(('d',),)))
608
# Now copy the blocks into another vf, and see that the
609
# insert_record_stream rebuilt a new block on-the-fly because of
611
vf2 = self.make_test_vf(True, dir='target')
612
vf2.insert_record_stream(vf.get_record_stream(
613
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
614
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
615
'groupcompress', False)
618
# All of the records should be recombined into a single block
620
for record in stream:
623
block = record._manager._block
625
self.assertIs(block, record._manager._block)
626
self.assertEqual(8, num_records)
628
def test__insert_record_stream_no_reuse_block(self):
629
vf = self.make_test_vf(True, dir='source')
630
def grouped_stream(revision_ids, first_parents=()):
631
parents = first_parents
632
for revision_id in revision_ids:
634
record = versionedfile.FulltextContentFactory(
636
'some content that is\n'
637
'identical except for\n'
638
'revision_id:%s\n' % (revision_id,))
642
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
644
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
645
first_parents=(('d',),)))
647
self.assertEqual(8, len(list(vf.get_record_stream(
648
[(r,) for r in 'abcdefgh'],
649
'unordered', False))))
650
# Now copy the blocks into another vf, and ensure that the blocks are
651
# preserved without creating new entries
652
vf2 = self.make_test_vf(True, dir='target')
653
# ordering in 'groupcompress' order, should actually swap the groups in
654
# the target vf, but the groups themselves should not be disturbed.
655
list(vf2._insert_record_stream(vf.get_record_stream(
656
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
659
# After inserting with reuse_blocks=False, we should have everything in
660
# a single new block.
661
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
662
'groupcompress', False)
664
for record in stream:
666
block = record._manager._block
668
self.assertIs(block, record._manager._block)
670
def test_add_missing_noncompression_parent_unvalidated_index(self):
671
unvalidated = self.make_g_index_missing_parent()
672
combined = _mod_index.CombinedGraphIndex([unvalidated])
673
index = groupcompress._GCGraphIndex(combined,
674
is_locked=lambda: True, parents=True,
675
track_external_parent_refs=True)
676
index.scan_unvalidated_index(unvalidated)
678
frozenset([('missing-parent',)]), index.get_missing_parents())
680
def test_track_external_parent_refs(self):
681
g_index = self.make_g_index('empty', 1, [])
682
mod_index = btree_index.BTreeBuilder(1, 1)
683
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
684
index = groupcompress._GCGraphIndex(combined,
685
is_locked=lambda: True, parents=True,
686
add_callback=mod_index.add_nodes,
687
track_external_parent_refs=True)
689
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
691
frozenset([('parent-1',), ('parent-2',)]),
692
index.get_missing_parents())
694
def make_source_with_b(self, a_parent, path):
695
source = self.make_test_vf(True, dir=path)
696
source.add_lines(('a',), (), ['lines\n'])
698
b_parents = (('a',),)
701
source.add_lines(('b',), b_parents, ['lines\n'])
704
def do_inconsistent_inserts(self, inconsistency_fatal):
705
target = self.make_test_vf(True, dir='target',
706
inconsistency_fatal=inconsistency_fatal)
708
source = self.make_source_with_b(x==1, 'source%s' % x)
709
target.insert_record_stream(source.get_record_stream(
710
[('b',)], 'unordered', False))
712
def test_inconsistent_redundant_inserts_warn(self):
713
"""Should not insert a record that is already present."""
715
def warning(template, args):
716
warnings.append(template % args)
717
_trace_warning = trace.warning
718
trace.warning = warning
720
self.do_inconsistent_inserts(inconsistency_fatal=False)
722
trace.warning = _trace_warning
723
self.assertEqual(["inconsistent details in skipped record: ('b',)"
724
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
727
def test_inconsistent_redundant_inserts_raises(self):
728
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
729
inconsistency_fatal=True)
730
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
732
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
733
" 0 8', \(\(\('a',\),\),\)\)")
736
class StubGCVF(object):
737
def __init__(self, canned_get_blocks=None):
738
self._group_cache = {}
739
self._canned_get_blocks = canned_get_blocks or []
740
def _get_blocks(self, read_memos):
741
return iter(self._canned_get_blocks)
744
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
745
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
747
def test_add_key_new_read_memo(self):
748
"""Adding a key with an uncached read_memo new to this batch adds that
749
read_memo to the list of memos to fetch.
751
# locations are: index_memo, ignored, parents, ignored
752
# where index_memo is: (idx, offset, len, factory_start, factory_end)
753
# and (idx, offset, size) is known as the 'read_memo', identifying the
755
read_memo = ('fake index', 100, 50)
757
('key',): (read_memo + (None, None), None, None, None)}
758
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
759
total_size = batcher.add_key(('key',))
760
self.assertEqual(50, total_size)
761
self.assertEqual([('key',)], batcher.keys)
762
self.assertEqual([read_memo], batcher.memos_to_get)
764
def test_add_key_duplicate_read_memo(self):
765
"""read_memos that occur multiple times in a batch will only be fetched
768
read_memo = ('fake index', 100, 50)
769
# Two keys, both sharing the same read memo (but different overall
772
('key1',): (read_memo + (0, 1), None, None, None),
773
('key2',): (read_memo + (1, 2), None, None, None)}
774
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
775
total_size = batcher.add_key(('key1',))
776
total_size = batcher.add_key(('key2',))
777
self.assertEqual(50, total_size)
778
self.assertEqual([('key1',), ('key2',)], batcher.keys)
779
self.assertEqual([read_memo], batcher.memos_to_get)
781
def test_add_key_cached_read_memo(self):
782
"""Adding a key with a cached read_memo will not cause that read_memo
783
to be added to the list to fetch.
785
read_memo = ('fake index', 100, 50)
787
gcvf._group_cache[read_memo] = 'fake block'
789
('key',): (read_memo + (None, None), None, None, None)}
790
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
791
total_size = batcher.add_key(('key',))
792
self.assertEqual(0, total_size)
793
self.assertEqual([('key',)], batcher.keys)
794
self.assertEqual([], batcher.memos_to_get)
796
def test_yield_factories_empty(self):
797
"""An empty batch yields no factories."""
798
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
799
self.assertEqual([], list(batcher.yield_factories()))
801
def test_yield_factories_calls_get_blocks(self):
802
"""Uncached memos are retrieved via get_blocks."""
803
read_memo1 = ('fake index', 100, 50)
804
read_memo2 = ('fake index', 150, 40)
807
(read_memo1, groupcompress.GroupCompressBlock()),
808
(read_memo2, groupcompress.GroupCompressBlock())])
810
('key1',): (read_memo1 + (None, None), None, None, None),
811
('key2',): (read_memo2 + (None, None), None, None, None)}
812
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
813
batcher.add_key(('key1',))
814
batcher.add_key(('key2',))
815
factories = list(batcher.yield_factories(full_flush=True))
816
self.assertLength(2, factories)
817
keys = [f.key for f in factories]
818
kinds = [f.storage_kind for f in factories]
819
self.assertEqual([('key1',), ('key2',)], keys)
820
self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
822
def test_yield_factories_flushing(self):
823
"""yield_factories holds back on yielding results from the final block
824
unless passed full_flush=True.
826
fake_block = groupcompress.GroupCompressBlock()
827
read_memo = ('fake index', 100, 50)
829
gcvf._group_cache[read_memo] = fake_block
831
('key',): (read_memo + (None, None), None, None, None)}
832
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
833
batcher.add_key(('key',))
834
self.assertEqual([], list(batcher.yield_factories()))
835
factories = list(batcher.yield_factories(full_flush=True))
836
self.assertLength(1, factories)
837
self.assertEqual(('key',), factories[0].key)
838
self.assertEqual('groupcompress-block', factories[0].storage_kind)
841
class TestLazyGroupCompress(tests.TestCaseWithTransport):
844
('key1',): "this is a text\n"
845
"with a reasonable amount of compressible bytes\n"
846
"which can be shared between various other texts\n",
847
('key2',): "another text\n"
848
"with a reasonable amount of compressible bytes\n"
849
"which can be shared between various other texts\n",
850
('key3',): "yet another text which won't be extracted\n"
851
"with a reasonable amount of compressible bytes\n"
852
"which can be shared between various other texts\n",
853
('key4',): "this will be extracted\n"
854
"but references most of its bytes from\n"
855
"yet another text which won't be extracted\n"
856
"with a reasonable amount of compressible bytes\n"
857
"which can be shared between various other texts\n",
859
def make_block(self, key_to_text):
860
"""Create a GroupCompressBlock, filling it with the given texts."""
861
compressor = groupcompress.GroupCompressor()
863
for key in sorted(key_to_text):
864
compressor.compress(key, key_to_text[key], None)
865
locs = dict((key, (start, end)) for key, (start, _, end, _)
866
in compressor.labels_deltas.iteritems())
867
block = compressor.flush()
868
raw_bytes = block.to_bytes()
869
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
871
def add_key_to_manager(self, key, locations, block, manager):
872
start, end = locations[key]
873
manager.add_factory(key, (), start, end)
875
def make_block_and_full_manager(self, texts):
876
locations, block = self.make_block(texts)
877
manager = groupcompress._LazyGroupContentManager(block)
878
for key in sorted(texts):
879
self.add_key_to_manager(key, locations, block, manager)
880
return block, manager
882
def test_get_fulltexts(self):
883
locations, block = self.make_block(self._texts)
884
manager = groupcompress._LazyGroupContentManager(block)
885
self.add_key_to_manager(('key1',), locations, block, manager)
886
self.add_key_to_manager(('key2',), locations, block, manager)
888
for record in manager.get_record_stream():
889
result_order.append(record.key)
890
text = self._texts[record.key]
891
self.assertEqual(text, record.get_bytes_as('fulltext'))
892
self.assertEqual([('key1',), ('key2',)], result_order)
894
# If we build the manager in the opposite order, we should get them
895
# back in the opposite order
896
manager = groupcompress._LazyGroupContentManager(block)
897
self.add_key_to_manager(('key2',), locations, block, manager)
898
self.add_key_to_manager(('key1',), locations, block, manager)
900
for record in manager.get_record_stream():
901
result_order.append(record.key)
902
text = self._texts[record.key]
903
self.assertEqual(text, record.get_bytes_as('fulltext'))
904
self.assertEqual([('key2',), ('key1',)], result_order)
906
def test__wire_bytes_no_keys(self):
907
locations, block = self.make_block(self._texts)
908
manager = groupcompress._LazyGroupContentManager(block)
909
wire_bytes = manager._wire_bytes()
910
block_length = len(block.to_bytes())
911
# We should have triggered a strip, since we aren't using any content
912
stripped_block = manager._block.to_bytes()
913
self.assertTrue(block_length > len(stripped_block))
914
empty_z_header = zlib.compress('')
915
self.assertEqual('groupcompress-block\n'
916
'8\n' # len(compress(''))
918
'%d\n'# compressed block len
921
% (len(stripped_block), empty_z_header,
925
def test__wire_bytes(self):
926
locations, block = self.make_block(self._texts)
927
manager = groupcompress._LazyGroupContentManager(block)
928
self.add_key_to_manager(('key1',), locations, block, manager)
929
self.add_key_to_manager(('key4',), locations, block, manager)
930
block_bytes = block.to_bytes()
931
wire_bytes = manager._wire_bytes()
932
(storage_kind, z_header_len, header_len,
933
block_len, rest) = wire_bytes.split('\n', 4)
934
z_header_len = int(z_header_len)
935
header_len = int(header_len)
936
block_len = int(block_len)
937
self.assertEqual('groupcompress-block', storage_kind)
938
self.assertEqual(34, z_header_len)
939
self.assertEqual(26, header_len)
940
self.assertEqual(len(block_bytes), block_len)
941
z_header = rest[:z_header_len]
942
header = zlib.decompress(z_header)
943
self.assertEqual(header_len, len(header))
944
entry1 = locations[('key1',)]
945
entry4 = locations[('key4',)]
946
self.assertEqualDiff('key1\n'
948
'%d\n' # start offset
954
% (entry1[0], entry1[1],
955
entry4[0], entry4[1]),
957
z_block = rest[z_header_len:]
958
self.assertEqual(block_bytes, z_block)
960
def test_from_bytes(self):
961
locations, block = self.make_block(self._texts)
962
manager = groupcompress._LazyGroupContentManager(block)
963
self.add_key_to_manager(('key1',), locations, block, manager)
964
self.add_key_to_manager(('key4',), locations, block, manager)
965
wire_bytes = manager._wire_bytes()
966
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
967
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
968
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
969
self.assertEqual(2, len(manager._factories))
970
self.assertEqual(block._z_content, manager._block._z_content)
972
for record in manager.get_record_stream():
973
result_order.append(record.key)
974
text = self._texts[record.key]
975
self.assertEqual(text, record.get_bytes_as('fulltext'))
976
self.assertEqual([('key1',), ('key4',)], result_order)
978
def test__check_rebuild_no_changes(self):
979
block, manager = self.make_block_and_full_manager(self._texts)
980
manager._check_rebuild_block()
981
self.assertIs(block, manager._block)
983
def test__check_rebuild_only_one(self):
984
locations, block = self.make_block(self._texts)
985
manager = groupcompress._LazyGroupContentManager(block)
986
# Request just the first key, which should trigger a 'strip' action
987
self.add_key_to_manager(('key1',), locations, block, manager)
988
manager._check_rebuild_block()
989
self.assertIsNot(block, manager._block)
990
self.assertTrue(block._content_length > manager._block._content_length)
991
# We should be able to still get the content out of this block, though
992
# it should only have 1 entry
993
for record in manager.get_record_stream():
994
self.assertEqual(('key1',), record.key)
995
self.assertEqual(self._texts[record.key],
996
record.get_bytes_as('fulltext'))
998
def test__check_rebuild_middle(self):
999
locations, block = self.make_block(self._texts)
1000
manager = groupcompress._LazyGroupContentManager(block)
1001
# Request a small key in the middle should trigger a 'rebuild'
1002
self.add_key_to_manager(('key4',), locations, block, manager)
1003
manager._check_rebuild_block()
1004
self.assertIsNot(block, manager._block)
1005
self.assertTrue(block._content_length > manager._block._content_length)
1006
for record in manager.get_record_stream():
1007
self.assertEqual(('key4',), record.key)
1008
self.assertEqual(self._texts[record.key],
1009
record.get_bytes_as('fulltext'))
1011
def test_check_is_well_utilized_all_keys(self):
1012
block, manager = self.make_block_and_full_manager(self._texts)
1013
self.assertFalse(manager.check_is_well_utilized())
1014
# Though we can fake it by changing the recommended minimum size
1015
manager._full_enough_block_size = block._content_length
1016
self.assertTrue(manager.check_is_well_utilized())
1017
# Setting it just above causes it to fail
1018
manager._full_enough_block_size = block._content_length + 1
1019
self.assertFalse(manager.check_is_well_utilized())
1020
# Setting the mixed-block size doesn't do anything, because the content
1021
# is considered to not be 'mixed'
1022
manager._full_enough_mixed_block_size = block._content_length
1023
self.assertFalse(manager.check_is_well_utilized())
1025
def test_check_is_well_utilized_mixed_keys(self):
1031
texts[f1k1] = self._texts[('key1',)]
1032
texts[f1k2] = self._texts[('key2',)]
1033
texts[f2k1] = self._texts[('key3',)]
1034
texts[f2k2] = self._texts[('key4',)]
1035
block, manager = self.make_block_and_full_manager(texts)
1036
self.assertFalse(manager.check_is_well_utilized())
1037
manager._full_enough_block_size = block._content_length
1038
self.assertTrue(manager.check_is_well_utilized())
1039
manager._full_enough_block_size = block._content_length + 1
1040
self.assertFalse(manager.check_is_well_utilized())
1041
manager._full_enough_mixed_block_size = block._content_length
1042
self.assertTrue(manager.check_is_well_utilized())
1044
def test_check_is_well_utilized_partial_use(self):
1045
locations, block = self.make_block(self._texts)
1046
manager = groupcompress._LazyGroupContentManager(block)
1047
manager._full_enough_block_size = block._content_length
1048
self.add_key_to_manager(('key1',), locations, block, manager)
1049
self.add_key_to_manager(('key2',), locations, block, manager)
1050
# Just using the content from key1 and 2 is not enough to be considered
1052
self.assertFalse(manager.check_is_well_utilized())
1053
# However if we add key3, then we have enough, as we only require 75%
1055
self.add_key_to_manager(('key4',), locations, block, manager)
1056
self.assertTrue(manager.check_is_well_utilized())