1
# Copyright (C) 2008-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for group compression."""
34
from ..osutils import sha_string
35
from .test__groupcompress import compiled_groupcompress_feature
36
from .scenarios import load_tests_apply_scenarios
39
def group_compress_implementation_scenarios():
41
('python', {'compressor': groupcompress.PythonGroupCompressor}),
43
if compiled_groupcompress_feature.available():
44
scenarios.append(('C',
45
{'compressor': groupcompress.PyrexGroupCompressor}))
49
load_tests = load_tests_apply_scenarios
52
class TestGroupCompressor(tests.TestCase):
54
def _chunks_to_repr_lines(self, chunks):
55
return '\n'.join(map(repr, ''.join(chunks).split('\n')))
57
def assertEqualDiffEncoded(self, expected, actual):
58
"""Compare the actual content to the expected content.
60
:param expected: A group of chunks that we expect to see
61
:param actual: The measured 'chunks'
63
We will transform the chunks back into lines, and then run 'repr()'
64
over them to handle non-ascii characters.
66
self.assertEqualDiff(self._chunks_to_repr_lines(expected),
67
self._chunks_to_repr_lines(actual))
70
class TestAllGroupCompressors(TestGroupCompressor):
71
"""Tests for GroupCompressor"""
73
scenarios = group_compress_implementation_scenarios()
74
compressor = None # Set by scenario
76
def test_empty_delta(self):
77
compressor = self.compressor()
78
self.assertEqual([], compressor.chunks)
80
def test_one_nosha_delta(self):
82
compressor = self.compressor()
83
sha1, start_point, end_point, _ = compressor.compress(('label',),
84
'strange\ncommon\n', None)
85
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
86
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
87
self.assertEqual(expected_lines, ''.join(compressor.chunks))
88
self.assertEqual(0, start_point)
89
self.assertEqual(sum(map(len, expected_lines)), end_point)
91
def test_empty_content(self):
92
compressor = self.compressor()
93
# Adding empty bytes should return the 'null' record
94
sha1, start_point, end_point, kind = compressor.compress(('empty',),
96
self.assertEqual(0, start_point)
97
self.assertEqual(0, end_point)
98
self.assertEqual('fulltext', kind)
99
self.assertEqual(groupcompress._null_sha1, sha1)
100
self.assertEqual(0, compressor.endpoint)
101
self.assertEqual([], compressor.chunks)
102
# Even after adding some content
103
compressor.compress(('content',), 'some\nbytes\n', None)
104
self.assertTrue(compressor.endpoint > 0)
105
sha1, start_point, end_point, kind = compressor.compress(('empty2',),
107
self.assertEqual(0, start_point)
108
self.assertEqual(0, end_point)
109
self.assertEqual('fulltext', kind)
110
self.assertEqual(groupcompress._null_sha1, sha1)
112
def test_extract_from_compressor(self):
113
# Knit fetching will try to reconstruct texts locally which results in
114
# reading something that is in the compressor stream already.
115
compressor = self.compressor()
116
sha1_1, _, _, _ = compressor.compress(('label',),
117
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
118
expected_lines = list(compressor.chunks)
119
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
120
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
122
self.assertEqual(('strange\ncommon long line\n'
123
'that needs a 16 byte match\n', sha1_1),
124
compressor.extract(('label',)))
126
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
127
'different\n', sha1_2),
128
compressor.extract(('newlabel',)))
130
def test_pop_last(self):
131
compressor = self.compressor()
132
_, _, _, _ = compressor.compress(('key1',),
133
'some text\nfor the first entry\n', None)
134
expected_lines = list(compressor.chunks)
135
_, _, _, _ = compressor.compress(('key2',),
136
'some text\nfor the second entry\n', None)
137
compressor.pop_last()
138
self.assertEqual(expected_lines, compressor.chunks)
141
class TestPyrexGroupCompressor(TestGroupCompressor):
143
_test_needs_features = [compiled_groupcompress_feature]
144
compressor = groupcompress.PyrexGroupCompressor
146
def test_stats(self):
147
compressor = self.compressor()
148
compressor.compress(('label',),
150
'common very very long line\n'
151
'plus more text\n', None)
152
compressor.compress(('newlabel',),
153
'common very very long line\n'
156
'moredifferent\n', None)
157
compressor.compress(('label3',),
159
'common very very long line\n'
162
'moredifferent\n', None)
163
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
165
def test_two_nosha_delta(self):
166
compressor = self.compressor()
167
sha1_1, _, _, _ = compressor.compress(('label',),
168
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
169
expected_lines = list(compressor.chunks)
170
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
171
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
172
self.assertEqual(sha_string('common long line\n'
173
'that needs a 16 byte match\n'
174
'different\n'), sha1_2)
175
expected_lines.extend([
176
# 'delta', delta length
178
# source and target length
180
# copy the line common
181
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
182
# add the line different, and the trailing newline
183
'\x0adifferent\n', # insert 10 bytes
185
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
186
self.assertEqual(sum(map(len, expected_lines)), end_point)
188
def test_three_nosha_delta(self):
189
# The first interesting test: make a change that should use lines from
191
compressor = self.compressor()
192
sha1_1, _, _, _ = compressor.compress(('label',),
193
'strange\ncommon very very long line\nwith some extra text\n', None)
194
sha1_2, _, _, _ = compressor.compress(('newlabel',),
195
'different\nmoredifferent\nand then some more\n', None)
196
expected_lines = list(compressor.chunks)
197
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
198
'new\ncommon very very long line\nwith some extra text\n'
199
'different\nmoredifferent\nand then some more\n',
202
sha_string('new\ncommon very very long line\nwith some extra text\n'
203
'different\nmoredifferent\nand then some more\n'),
205
expected_lines.extend([
206
# 'delta', delta length
208
# source and target length
212
# Copy of first parent 'common' range
213
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
214
# Copy of second parent 'different' range
215
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
217
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
218
self.assertEqual(sum(map(len, expected_lines)), end_point)
221
class TestPythonGroupCompressor(TestGroupCompressor):
223
compressor = groupcompress.PythonGroupCompressor
225
def test_stats(self):
226
compressor = self.compressor()
227
compressor.compress(('label',),
229
'common very very long line\n'
230
'plus more text\n', None)
231
compressor.compress(('newlabel',),
232
'common very very long line\n'
235
'moredifferent\n', None)
236
compressor.compress(('label3',),
238
'common very very long line\n'
241
'moredifferent\n', None)
242
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
244
def test_two_nosha_delta(self):
245
compressor = self.compressor()
246
sha1_1, _, _, _ = compressor.compress(('label',),
247
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
248
expected_lines = list(compressor.chunks)
249
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
250
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
251
self.assertEqual(sha_string('common long line\n'
252
'that needs a 16 byte match\n'
253
'different\n'), sha1_2)
254
expected_lines.extend([
255
# 'delta', delta length
259
# copy the line common
260
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
261
# add the line different, and the trailing newline
262
'\x0adifferent\n', # insert 10 bytes
264
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
265
self.assertEqual(sum(map(len, expected_lines)), end_point)
267
def test_three_nosha_delta(self):
268
# The first interesting test: make a change that should use lines from
270
compressor = self.compressor()
271
sha1_1, _, _, _ = compressor.compress(('label',),
272
'strange\ncommon very very long line\nwith some extra text\n', None)
273
sha1_2, _, _, _ = compressor.compress(('newlabel',),
274
'different\nmoredifferent\nand then some more\n', None)
275
expected_lines = list(compressor.chunks)
276
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
277
'new\ncommon very very long line\nwith some extra text\n'
278
'different\nmoredifferent\nand then some more\n',
281
sha_string('new\ncommon very very long line\nwith some extra text\n'
282
'different\nmoredifferent\nand then some more\n'),
284
expected_lines.extend([
285
# 'delta', delta length
291
# Copy of first parent 'common' range
292
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
293
# Copy of second parent 'different' range
294
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
296
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
297
self.assertEqual(sum(map(len, expected_lines)), end_point)
300
class TestGroupCompressBlock(tests.TestCase):
302
def make_block(self, key_to_text):
303
"""Create a GroupCompressBlock, filling it with the given texts."""
304
compressor = groupcompress.GroupCompressor()
306
for key in sorted(key_to_text):
307
compressor.compress(key, key_to_text[key], None)
308
locs = dict((key, (start, end)) for key, (start, _, end, _)
309
in compressor.labels_deltas.items())
310
block = compressor.flush()
311
raw_bytes = block.to_bytes()
312
# Go through from_bytes(to_bytes()) so that we start with a compressed
314
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
316
def test_from_empty_bytes(self):
317
self.assertRaises(ValueError,
318
groupcompress.GroupCompressBlock.from_bytes, '')
320
def test_from_minimal_bytes(self):
321
block = groupcompress.GroupCompressBlock.from_bytes(
323
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
324
self.assertIs(None, block._content)
325
self.assertEqual('', block._z_content)
326
block._ensure_content()
327
self.assertEqual('', block._content)
328
self.assertEqual('', block._z_content)
329
block._ensure_content() # Ensure content is safe to call 2x
331
def test_from_invalid(self):
332
self.assertRaises(ValueError,
333
groupcompress.GroupCompressBlock.from_bytes,
334
'this is not a valid header')
336
def test_from_bytes(self):
337
content = ('a tiny bit of content\n')
338
z_content = zlib.compress(content)
340
'gcb1z\n' # group compress block v1 plain
341
'%d\n' # Length of compressed content
342
'%d\n' # Length of uncompressed content
343
'%s' # Compressed content
344
) % (len(z_content), len(content), z_content)
345
block = groupcompress.GroupCompressBlock.from_bytes(
347
self.assertEqual(z_content, block._z_content)
348
self.assertIs(None, block._content)
349
self.assertEqual(len(z_content), block._z_content_length)
350
self.assertEqual(len(content), block._content_length)
351
block._ensure_content()
352
self.assertEqual(z_content, block._z_content)
353
self.assertEqual(content, block._content)
355
def test_to_chunks(self):
356
content_chunks = ['this is some content\n',
357
'this content will be compressed\n']
358
content_len = sum(map(len, content_chunks))
359
content = ''.join(content_chunks)
360
gcb = groupcompress.GroupCompressBlock()
361
gcb.set_chunked_content(content_chunks, content_len)
362
total_len, block_chunks = gcb.to_chunks()
363
block_bytes = ''.join(block_chunks)
364
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
365
self.assertEqual(total_len, len(block_bytes))
366
self.assertEqual(gcb._content_length, content_len)
367
expected_header =('gcb1z\n' # group compress block v1 zlib
368
'%d\n' # Length of compressed content
369
'%d\n' # Length of uncompressed content
370
) % (gcb._z_content_length, gcb._content_length)
371
# The first chunk should be the header chunk. It is small, fixed size,
372
# and there is no compelling reason to split it up
373
self.assertEqual(expected_header, block_chunks[0])
374
self.assertStartsWith(block_bytes, expected_header)
375
remaining_bytes = block_bytes[len(expected_header):]
376
raw_bytes = zlib.decompress(remaining_bytes)
377
self.assertEqual(content, raw_bytes)
379
def test_to_bytes(self):
380
content = ('this is some content\n'
381
'this content will be compressed\n')
382
gcb = groupcompress.GroupCompressBlock()
383
gcb.set_content(content)
384
bytes = gcb.to_bytes()
385
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
386
self.assertEqual(gcb._content_length, len(content))
387
expected_header =('gcb1z\n' # group compress block v1 zlib
388
'%d\n' # Length of compressed content
389
'%d\n' # Length of uncompressed content
390
) % (gcb._z_content_length, gcb._content_length)
391
self.assertStartsWith(bytes, expected_header)
392
remaining_bytes = bytes[len(expected_header):]
393
raw_bytes = zlib.decompress(remaining_bytes)
394
self.assertEqual(content, raw_bytes)
396
# we should get the same results if using the chunked version
397
gcb = groupcompress.GroupCompressBlock()
398
gcb.set_chunked_content(['this is some content\n'
399
'this content will be compressed\n'],
402
bytes = gcb.to_bytes()
403
self.assertEqual(old_bytes, bytes)
405
def test_partial_decomp(self):
407
# We need a sufficient amount of data so that zlib.decompress has
408
# partial decompression to work with. Most auto-generated data
409
# compresses a bit too well, we want a combination, so we combine a sha
410
# hash with compressible data.
411
for i in range(2048):
412
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
413
content_chunks.append(next_content)
414
next_sha1 = osutils.sha_string(next_content)
415
content_chunks.append(next_sha1 + '\n')
416
content = ''.join(content_chunks)
417
self.assertEqual(158634, len(content))
418
z_content = zlib.compress(content)
419
self.assertEqual(57182, len(z_content))
420
block = groupcompress.GroupCompressBlock()
421
block._z_content_chunks = (z_content,)
422
block._z_content_length = len(z_content)
423
block._compressor_name = 'zlib'
424
block._content_length = 158634
425
self.assertIs(None, block._content)
426
block._ensure_content(100)
427
self.assertIsNot(None, block._content)
428
# We have decompressed at least 100 bytes
429
self.assertTrue(len(block._content) >= 100)
430
# We have not decompressed the whole content
431
self.assertTrue(len(block._content) < 158634)
432
self.assertEqualDiff(content[:len(block._content)], block._content)
433
# ensuring content that we already have shouldn't cause any more data
435
cur_len = len(block._content)
436
block._ensure_content(cur_len - 10)
437
self.assertEqual(cur_len, len(block._content))
438
# Now we want a bit more content
440
block._ensure_content(cur_len)
441
self.assertTrue(len(block._content) >= cur_len)
442
self.assertTrue(len(block._content) < 158634)
443
self.assertEqualDiff(content[:len(block._content)], block._content)
444
# And now lets finish
445
block._ensure_content(158634)
446
self.assertEqualDiff(content, block._content)
447
# And the decompressor is finalized
448
self.assertIs(None, block._z_content_decompressor)
450
def test__ensure_all_content(self):
452
# We need a sufficient amount of data so that zlib.decompress has
453
# partial decompression to work with. Most auto-generated data
454
# compresses a bit too well, we want a combination, so we combine a sha
455
# hash with compressible data.
456
for i in range(2048):
457
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
458
content_chunks.append(next_content)
459
next_sha1 = osutils.sha_string(next_content)
460
content_chunks.append(next_sha1 + '\n')
461
content = ''.join(content_chunks)
462
self.assertEqual(158634, len(content))
463
z_content = zlib.compress(content)
464
self.assertEqual(57182, len(z_content))
465
block = groupcompress.GroupCompressBlock()
466
block._z_content_chunks = (z_content,)
467
block._z_content_length = len(z_content)
468
block._compressor_name = 'zlib'
469
block._content_length = 158634
470
self.assertIs(None, block._content)
471
# The first _ensure_content got all of the required data
472
block._ensure_content(158634)
473
self.assertEqualDiff(content, block._content)
474
# And we should have released the _z_content_decompressor since it was
476
self.assertIs(None, block._z_content_decompressor)
478
def test__dump(self):
479
dup_content = 'some duplicate content\nwhich is sufficiently long\n'
480
key_to_text = {('1',): dup_content + '1 unique\n',
481
('2',): dup_content + '2 extra special\n'}
482
locs, block = self.make_block(key_to_text)
483
self.assertEqual([('f', len(key_to_text[('1',)])),
484
('d', 21, len(key_to_text[('2',)]),
485
[('c', 2, len(dup_content)),
486
('i', len('2 extra special\n'), '')
491
class TestCaseWithGroupCompressVersionedFiles(
492
tests.TestCaseWithMemoryTransport):
494
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
495
dir='.', inconsistency_fatal=True):
496
t = self.get_transport(dir)
498
vf = groupcompress.make_pack_factory(graph=create_graph,
499
delta=False, keylength=keylength,
500
inconsistency_fatal=inconsistency_fatal)(t)
502
self.addCleanup(groupcompress.cleanup_pack_group, vf)
506
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
508
def make_g_index(self, name, ref_lists=0, nodes=[]):
509
builder = btree_index.BTreeBuilder(ref_lists)
510
for node, references, value in nodes:
511
builder.add_node(node, references, value)
512
stream = builder.finish()
513
trans = self.get_transport()
514
size = trans.put_file(name, stream)
515
return btree_index.BTreeGraphIndex(trans, name, size)
517
def make_g_index_missing_parent(self):
518
graph_index = self.make_g_index('missing_parent', 1,
519
[(('parent', ), '2 78 2 10', ([],)),
520
(('tip', ), '2 78 2 10',
521
([('parent', ), ('missing-parent', )],)),
525
def test_get_record_stream_as_requested(self):
526
# Consider promoting 'as-requested' to general availability, and
527
# make this a VF interface test
528
vf = self.make_test_vf(False, dir='source')
529
vf.add_lines(('a',), (), ['lines\n'])
530
vf.add_lines(('b',), (), ['lines\n'])
531
vf.add_lines(('c',), (), ['lines\n'])
532
vf.add_lines(('d',), (), ['lines\n'])
534
keys = [record.key for record in vf.get_record_stream(
535
[('a',), ('b',), ('c',), ('d',)],
536
'as-requested', False)]
537
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
538
keys = [record.key for record in vf.get_record_stream(
539
[('b',), ('a',), ('d',), ('c',)],
540
'as-requested', False)]
541
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
543
# It should work even after being repacked into another VF
544
vf2 = self.make_test_vf(False, dir='target')
545
vf2.insert_record_stream(vf.get_record_stream(
546
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
549
keys = [record.key for record in vf2.get_record_stream(
550
[('a',), ('b',), ('c',), ('d',)],
551
'as-requested', False)]
552
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
553
keys = [record.key for record in vf2.get_record_stream(
554
[('b',), ('a',), ('d',), ('c',)],
555
'as-requested', False)]
556
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
558
def test_get_record_stream_max_bytes_to_index_default(self):
559
vf = self.make_test_vf(True, dir='source')
560
vf.add_lines(('a',), (), ['lines\n'])
562
record = next(vf.get_record_stream([('a',)], 'unordered', True))
563
self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
564
record._manager._get_compressor_settings())
566
def test_get_record_stream_accesses_compressor_settings(self):
567
vf = self.make_test_vf(True, dir='source')
568
vf.add_lines(('a',), (), ['lines\n'])
570
vf._max_bytes_to_index = 1234
571
record = next(vf.get_record_stream([('a',)], 'unordered', True))
572
self.assertEqual(dict(max_bytes_to_index=1234),
573
record._manager._get_compressor_settings())
575
def test_insert_record_stream_reuses_blocks(self):
576
vf = self.make_test_vf(True, dir='source')
577
def grouped_stream(revision_ids, first_parents=()):
578
parents = first_parents
579
for revision_id in revision_ids:
581
record = versionedfile.FulltextContentFactory(
583
'some content that is\n'
584
'identical except for\n'
585
'revision_id:%s\n' % (revision_id,))
589
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
591
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
592
first_parents=(('d',),)))
594
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
597
for record in stream:
598
if record.key in [('a',), ('e',)]:
599
self.assertEqual('groupcompress-block', record.storage_kind)
601
self.assertEqual('groupcompress-block-ref',
603
block_bytes[record.key] = record._manager._block._z_content
605
self.assertEqual(8, num_records)
608
self.assertIs(block_bytes[key], block_bytes[('a',)])
609
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
612
self.assertIs(block_bytes[key], block_bytes[('e',)])
613
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
614
# Now copy the blocks into another vf, and ensure that the blocks are
615
# preserved without creating new entries
616
vf2 = self.make_test_vf(True, dir='target')
617
# ordering in 'groupcompress' order, should actually swap the groups in
618
# the target vf, but the groups themselves should not be disturbed.
619
def small_size_stream():
620
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
621
'groupcompress', False):
622
record._manager._full_enough_block_size = \
623
record._manager._block._content_length
626
vf2.insert_record_stream(small_size_stream())
627
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
628
'groupcompress', False)
631
for record in stream:
633
self.assertEqual(block_bytes[record.key],
634
record._manager._block._z_content)
635
self.assertEqual(8, num_records)
637
def test_insert_record_stream_packs_on_the_fly(self):
638
vf = self.make_test_vf(True, dir='source')
639
def grouped_stream(revision_ids, first_parents=()):
640
parents = first_parents
641
for revision_id in revision_ids:
643
record = versionedfile.FulltextContentFactory(
645
'some content that is\n'
646
'identical except for\n'
647
'revision_id:%s\n' % (revision_id,))
651
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
653
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
654
first_parents=(('d',),)))
655
# Now copy the blocks into another vf, and see that the
656
# insert_record_stream rebuilt a new block on-the-fly because of
658
vf2 = self.make_test_vf(True, dir='target')
659
vf2.insert_record_stream(vf.get_record_stream(
660
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
661
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
662
'groupcompress', False)
665
# All of the records should be recombined into a single block
667
for record in stream:
670
block = record._manager._block
672
self.assertIs(block, record._manager._block)
673
self.assertEqual(8, num_records)
675
def test__insert_record_stream_no_reuse_block(self):
676
vf = self.make_test_vf(True, dir='source')
677
def grouped_stream(revision_ids, first_parents=()):
678
parents = first_parents
679
for revision_id in revision_ids:
681
record = versionedfile.FulltextContentFactory(
683
'some content that is\n'
684
'identical except for\n'
685
'revision_id:%s\n' % (revision_id,))
689
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
691
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
692
first_parents=(('d',),)))
694
self.assertEqual(8, len(list(vf.get_record_stream(
695
[(r,) for r in 'abcdefgh'],
696
'unordered', False))))
697
# Now copy the blocks into another vf, and ensure that the blocks are
698
# preserved without creating new entries
699
vf2 = self.make_test_vf(True, dir='target')
700
# ordering in 'groupcompress' order, should actually swap the groups in
701
# the target vf, but the groups themselves should not be disturbed.
702
list(vf2._insert_record_stream(vf.get_record_stream(
703
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
706
# After inserting with reuse_blocks=False, we should have everything in
707
# a single new block.
708
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
709
'groupcompress', False)
711
for record in stream:
713
block = record._manager._block
715
self.assertIs(block, record._manager._block)
717
def test_add_missing_noncompression_parent_unvalidated_index(self):
718
unvalidated = self.make_g_index_missing_parent()
719
combined = _mod_index.CombinedGraphIndex([unvalidated])
720
index = groupcompress._GCGraphIndex(combined,
721
is_locked=lambda: True, parents=True,
722
track_external_parent_refs=True)
723
index.scan_unvalidated_index(unvalidated)
725
frozenset([('missing-parent',)]), index.get_missing_parents())
727
def test_track_external_parent_refs(self):
728
g_index = self.make_g_index('empty', 1, [])
729
mod_index = btree_index.BTreeBuilder(1, 1)
730
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
731
index = groupcompress._GCGraphIndex(combined,
732
is_locked=lambda: True, parents=True,
733
add_callback=mod_index.add_nodes,
734
track_external_parent_refs=True)
736
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
738
frozenset([('parent-1',), ('parent-2',)]),
739
index.get_missing_parents())
741
def make_source_with_b(self, a_parent, path):
742
source = self.make_test_vf(True, dir=path)
743
source.add_lines(('a',), (), ['lines\n'])
745
b_parents = (('a',),)
748
source.add_lines(('b',), b_parents, ['lines\n'])
751
def do_inconsistent_inserts(self, inconsistency_fatal):
752
target = self.make_test_vf(True, dir='target',
753
inconsistency_fatal=inconsistency_fatal)
755
source = self.make_source_with_b(x==1, 'source%s' % x)
756
target.insert_record_stream(source.get_record_stream(
757
[('b',)], 'unordered', False))
759
def test_inconsistent_redundant_inserts_warn(self):
760
"""Should not insert a record that is already present."""
762
def warning(template, args):
763
warnings.append(template % args)
764
_trace_warning = trace.warning
765
trace.warning = warning
767
self.do_inconsistent_inserts(inconsistency_fatal=False)
769
trace.warning = _trace_warning
770
self.assertEqual(["inconsistent details in skipped record: ('b',)"
771
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
774
def test_inconsistent_redundant_inserts_raises(self):
775
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
776
inconsistency_fatal=True)
777
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
779
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
780
" 0 8', \(\(\('a',\),\),\)\)")
782
def test_clear_cache(self):
783
vf = self.make_source_with_b(True, 'source')
785
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
788
self.assertTrue(len(vf._group_cache) > 0)
790
self.assertEqual(0, len(vf._group_cache))
793
class TestGroupCompressConfig(tests.TestCaseWithTransport):
795
def make_test_vf(self):
796
t = self.get_transport('.')
798
factory = groupcompress.make_pack_factory(graph=True,
799
delta=False, keylength=1, inconsistency_fatal=True)
801
self.addCleanup(groupcompress.cleanup_pack_group, vf)
804
def test_max_bytes_to_index_default(self):
805
vf = self.make_test_vf()
806
gc = vf._make_group_compressor()
807
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
808
vf._max_bytes_to_index)
809
if isinstance(gc, groupcompress.PyrexGroupCompressor):
810
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
811
gc._delta_index._max_bytes_to_index)
813
def test_max_bytes_to_index_in_config(self):
814
c = config.GlobalConfig()
815
c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
816
vf = self.make_test_vf()
817
gc = vf._make_group_compressor()
818
self.assertEqual(10000, vf._max_bytes_to_index)
819
if isinstance(gc, groupcompress.PyrexGroupCompressor):
820
self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
822
def test_max_bytes_to_index_bad_config(self):
823
c = config.GlobalConfig()
824
c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
825
vf = self.make_test_vf()
826
# TODO: This is triggering a warning, we might want to trap and make
827
# sure it is readable.
828
gc = vf._make_group_compressor()
829
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
830
vf._max_bytes_to_index)
831
if isinstance(gc, groupcompress.PyrexGroupCompressor):
832
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
833
gc._delta_index._max_bytes_to_index)
836
class StubGCVF(object):
837
def __init__(self, canned_get_blocks=None):
838
self._group_cache = {}
839
self._canned_get_blocks = canned_get_blocks or []
840
def _get_blocks(self, read_memos):
841
return iter(self._canned_get_blocks)
844
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
845
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
847
def test_add_key_new_read_memo(self):
848
"""Adding a key with an uncached read_memo new to this batch adds that
849
read_memo to the list of memos to fetch.
851
# locations are: index_memo, ignored, parents, ignored
852
# where index_memo is: (idx, offset, len, factory_start, factory_end)
853
# and (idx, offset, size) is known as the 'read_memo', identifying the
855
read_memo = ('fake index', 100, 50)
857
('key',): (read_memo + (None, None), None, None, None)}
858
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
859
total_size = batcher.add_key(('key',))
860
self.assertEqual(50, total_size)
861
self.assertEqual([('key',)], batcher.keys)
862
self.assertEqual([read_memo], batcher.memos_to_get)
864
def test_add_key_duplicate_read_memo(self):
865
"""read_memos that occur multiple times in a batch will only be fetched
868
read_memo = ('fake index', 100, 50)
869
# Two keys, both sharing the same read memo (but different overall
872
('key1',): (read_memo + (0, 1), None, None, None),
873
('key2',): (read_memo + (1, 2), None, None, None)}
874
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
875
total_size = batcher.add_key(('key1',))
876
total_size = batcher.add_key(('key2',))
877
self.assertEqual(50, total_size)
878
self.assertEqual([('key1',), ('key2',)], batcher.keys)
879
self.assertEqual([read_memo], batcher.memos_to_get)
881
def test_add_key_cached_read_memo(self):
882
"""Adding a key with a cached read_memo will not cause that read_memo
883
to be added to the list to fetch.
885
read_memo = ('fake index', 100, 50)
887
gcvf._group_cache[read_memo] = 'fake block'
889
('key',): (read_memo + (None, None), None, None, None)}
890
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
891
total_size = batcher.add_key(('key',))
892
self.assertEqual(0, total_size)
893
self.assertEqual([('key',)], batcher.keys)
894
self.assertEqual([], batcher.memos_to_get)
896
def test_yield_factories_empty(self):
897
"""An empty batch yields no factories."""
898
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
899
self.assertEqual([], list(batcher.yield_factories()))
901
def test_yield_factories_calls_get_blocks(self):
902
"""Uncached memos are retrieved via get_blocks."""
903
read_memo1 = ('fake index', 100, 50)
904
read_memo2 = ('fake index', 150, 40)
907
(read_memo1, groupcompress.GroupCompressBlock()),
908
(read_memo2, groupcompress.GroupCompressBlock())])
910
('key1',): (read_memo1 + (None, None), None, None, None),
911
('key2',): (read_memo2 + (None, None), None, None, None)}
912
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
913
batcher.add_key(('key1',))
914
batcher.add_key(('key2',))
915
factories = list(batcher.yield_factories(full_flush=True))
916
self.assertLength(2, factories)
917
keys = [f.key for f in factories]
918
kinds = [f.storage_kind for f in factories]
919
self.assertEqual([('key1',), ('key2',)], keys)
920
self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
922
def test_yield_factories_flushing(self):
923
"""yield_factories holds back on yielding results from the final block
924
unless passed full_flush=True.
926
fake_block = groupcompress.GroupCompressBlock()
927
read_memo = ('fake index', 100, 50)
929
gcvf._group_cache[read_memo] = fake_block
931
('key',): (read_memo + (None, None), None, None, None)}
932
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
933
batcher.add_key(('key',))
934
self.assertEqual([], list(batcher.yield_factories()))
935
factories = list(batcher.yield_factories(full_flush=True))
936
self.assertLength(1, factories)
937
self.assertEqual(('key',), factories[0].key)
938
self.assertEqual('groupcompress-block', factories[0].storage_kind)
941
class TestLazyGroupCompress(tests.TestCaseWithTransport):
944
('key1',): "this is a text\n"
945
"with a reasonable amount of compressible bytes\n"
946
"which can be shared between various other texts\n",
947
('key2',): "another text\n"
948
"with a reasonable amount of compressible bytes\n"
949
"which can be shared between various other texts\n",
950
('key3',): "yet another text which won't be extracted\n"
951
"with a reasonable amount of compressible bytes\n"
952
"which can be shared between various other texts\n",
953
('key4',): "this will be extracted\n"
954
"but references most of its bytes from\n"
955
"yet another text which won't be extracted\n"
956
"with a reasonable amount of compressible bytes\n"
957
"which can be shared between various other texts\n",
959
def make_block(self, key_to_text):
960
"""Create a GroupCompressBlock, filling it with the given texts."""
961
compressor = groupcompress.GroupCompressor()
963
for key in sorted(key_to_text):
964
compressor.compress(key, key_to_text[key], None)
965
locs = dict((key, (start, end)) for key, (start, _, end, _)
966
in compressor.labels_deltas.items())
967
block = compressor.flush()
968
raw_bytes = block.to_bytes()
969
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
971
def add_key_to_manager(self, key, locations, block, manager):
972
start, end = locations[key]
973
manager.add_factory(key, (), start, end)
975
def make_block_and_full_manager(self, texts):
976
locations, block = self.make_block(texts)
977
manager = groupcompress._LazyGroupContentManager(block)
978
for key in sorted(texts):
979
self.add_key_to_manager(key, locations, block, manager)
980
return block, manager
982
def test_get_fulltexts(self):
983
locations, block = self.make_block(self._texts)
984
manager = groupcompress._LazyGroupContentManager(block)
985
self.add_key_to_manager(('key1',), locations, block, manager)
986
self.add_key_to_manager(('key2',), locations, block, manager)
988
for record in manager.get_record_stream():
989
result_order.append(record.key)
990
text = self._texts[record.key]
991
self.assertEqual(text, record.get_bytes_as('fulltext'))
992
self.assertEqual([('key1',), ('key2',)], result_order)
994
# If we build the manager in the opposite order, we should get them
995
# back in the opposite order
996
manager = groupcompress._LazyGroupContentManager(block)
997
self.add_key_to_manager(('key2',), locations, block, manager)
998
self.add_key_to_manager(('key1',), locations, block, manager)
1000
for record in manager.get_record_stream():
1001
result_order.append(record.key)
1002
text = self._texts[record.key]
1003
self.assertEqual(text, record.get_bytes_as('fulltext'))
1004
self.assertEqual([('key2',), ('key1',)], result_order)
1006
def test__wire_bytes_no_keys(self):
1007
locations, block = self.make_block(self._texts)
1008
manager = groupcompress._LazyGroupContentManager(block)
1009
wire_bytes = manager._wire_bytes()
1010
block_length = len(block.to_bytes())
1011
# We should have triggered a strip, since we aren't using any content
1012
stripped_block = manager._block.to_bytes()
1013
self.assertTrue(block_length > len(stripped_block))
1014
empty_z_header = zlib.compress('')
1015
self.assertEqual('groupcompress-block\n'
1016
'8\n' # len(compress(''))
1018
'%d\n'# compressed block len
1021
% (len(stripped_block), empty_z_header,
1025
def test__wire_bytes(self):
1026
locations, block = self.make_block(self._texts)
1027
manager = groupcompress._LazyGroupContentManager(block)
1028
self.add_key_to_manager(('key1',), locations, block, manager)
1029
self.add_key_to_manager(('key4',), locations, block, manager)
1030
block_bytes = block.to_bytes()
1031
wire_bytes = manager._wire_bytes()
1032
(storage_kind, z_header_len, header_len,
1033
block_len, rest) = wire_bytes.split('\n', 4)
1034
z_header_len = int(z_header_len)
1035
header_len = int(header_len)
1036
block_len = int(block_len)
1037
self.assertEqual('groupcompress-block', storage_kind)
1038
self.assertEqual(34, z_header_len)
1039
self.assertEqual(26, header_len)
1040
self.assertEqual(len(block_bytes), block_len)
1041
z_header = rest[:z_header_len]
1042
header = zlib.decompress(z_header)
1043
self.assertEqual(header_len, len(header))
1044
entry1 = locations[('key1',)]
1045
entry4 = locations[('key4',)]
1046
self.assertEqualDiff('key1\n'
1048
'%d\n' # start offset
1054
% (entry1[0], entry1[1],
1055
entry4[0], entry4[1]),
1057
z_block = rest[z_header_len:]
1058
self.assertEqual(block_bytes, z_block)
1060
def test_from_bytes(self):
1061
locations, block = self.make_block(self._texts)
1062
manager = groupcompress._LazyGroupContentManager(block)
1063
self.add_key_to_manager(('key1',), locations, block, manager)
1064
self.add_key_to_manager(('key4',), locations, block, manager)
1065
wire_bytes = manager._wire_bytes()
1066
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1067
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
1068
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
1069
self.assertEqual(2, len(manager._factories))
1070
self.assertEqual(block._z_content, manager._block._z_content)
1072
for record in manager.get_record_stream():
1073
result_order.append(record.key)
1074
text = self._texts[record.key]
1075
self.assertEqual(text, record.get_bytes_as('fulltext'))
1076
self.assertEqual([('key1',), ('key4',)], result_order)
1078
def test__check_rebuild_no_changes(self):
1079
block, manager = self.make_block_and_full_manager(self._texts)
1080
manager._check_rebuild_block()
1081
self.assertIs(block, manager._block)
1083
def test__check_rebuild_only_one(self):
1084
locations, block = self.make_block(self._texts)
1085
manager = groupcompress._LazyGroupContentManager(block)
1086
# Request just the first key, which should trigger a 'strip' action
1087
self.add_key_to_manager(('key1',), locations, block, manager)
1088
manager._check_rebuild_block()
1089
self.assertIsNot(block, manager._block)
1090
self.assertTrue(block._content_length > manager._block._content_length)
1091
# We should be able to still get the content out of this block, though
1092
# it should only have 1 entry
1093
for record in manager.get_record_stream():
1094
self.assertEqual(('key1',), record.key)
1095
self.assertEqual(self._texts[record.key],
1096
record.get_bytes_as('fulltext'))
1098
def test__check_rebuild_middle(self):
1099
locations, block = self.make_block(self._texts)
1100
manager = groupcompress._LazyGroupContentManager(block)
1101
# Request a small key in the middle should trigger a 'rebuild'
1102
self.add_key_to_manager(('key4',), locations, block, manager)
1103
manager._check_rebuild_block()
1104
self.assertIsNot(block, manager._block)
1105
self.assertTrue(block._content_length > manager._block._content_length)
1106
for record in manager.get_record_stream():
1107
self.assertEqual(('key4',), record.key)
1108
self.assertEqual(self._texts[record.key],
1109
record.get_bytes_as('fulltext'))
1111
def test_manager_default_compressor_settings(self):
1112
locations, old_block = self.make_block(self._texts)
1113
manager = groupcompress._LazyGroupContentManager(old_block)
1114
gcvf = groupcompress.GroupCompressVersionedFiles
1115
# It doesn't greedily evaluate _max_bytes_to_index
1116
self.assertIs(None, manager._compressor_settings)
1117
self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1118
manager._get_compressor_settings())
1120
def test_manager_custom_compressor_settings(self):
1121
locations, old_block = self.make_block(self._texts)
1123
def compressor_settings():
1124
called.append('called')
1126
manager = groupcompress._LazyGroupContentManager(old_block,
1127
get_compressor_settings=compressor_settings)
1128
gcvf = groupcompress.GroupCompressVersionedFiles
1129
# It doesn't greedily evaluate compressor_settings
1130
self.assertIs(None, manager._compressor_settings)
1131
self.assertEqual((10,), manager._get_compressor_settings())
1132
self.assertEqual((10,), manager._get_compressor_settings())
1133
self.assertEqual((10,), manager._compressor_settings)
1134
# Only called 1 time
1135
self.assertEqual(['called'], called)
1137
def test__rebuild_handles_compressor_settings(self):
1138
if not isinstance(groupcompress.GroupCompressor,
1139
groupcompress.PyrexGroupCompressor):
1140
raise tests.TestNotApplicable('pure-python compressor'
1141
' does not handle compressor_settings')
1142
locations, old_block = self.make_block(self._texts)
1143
manager = groupcompress._LazyGroupContentManager(old_block,
1144
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1145
gc = manager._make_group_compressor()
1146
self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1147
self.add_key_to_manager(('key3',), locations, old_block, manager)
1148
self.add_key_to_manager(('key4',), locations, old_block, manager)
1149
action, last_byte, total_bytes = manager._check_rebuild_action()
1150
self.assertEqual('rebuild', action)
1151
manager._rebuild_block()
1152
new_block = manager._block
1153
self.assertIsNot(old_block, new_block)
1154
# Because of the new max_bytes_to_index, we do a poor job of
1155
# rebuilding. This is a side-effect of the change, but at least it does
1156
# show the setting had an effect.
1157
self.assertTrue(old_block._content_length < new_block._content_length)
1159
def test_check_is_well_utilized_all_keys(self):
1160
block, manager = self.make_block_and_full_manager(self._texts)
1161
self.assertFalse(manager.check_is_well_utilized())
1162
# Though we can fake it by changing the recommended minimum size
1163
manager._full_enough_block_size = block._content_length
1164
self.assertTrue(manager.check_is_well_utilized())
1165
# Setting it just above causes it to fail
1166
manager._full_enough_block_size = block._content_length + 1
1167
self.assertFalse(manager.check_is_well_utilized())
1168
# Setting the mixed-block size doesn't do anything, because the content
1169
# is considered to not be 'mixed'
1170
manager._full_enough_mixed_block_size = block._content_length
1171
self.assertFalse(manager.check_is_well_utilized())
1173
def test_check_is_well_utilized_mixed_keys(self):
1179
texts[f1k1] = self._texts[('key1',)]
1180
texts[f1k2] = self._texts[('key2',)]
1181
texts[f2k1] = self._texts[('key3',)]
1182
texts[f2k2] = self._texts[('key4',)]
1183
block, manager = self.make_block_and_full_manager(texts)
1184
self.assertFalse(manager.check_is_well_utilized())
1185
manager._full_enough_block_size = block._content_length
1186
self.assertTrue(manager.check_is_well_utilized())
1187
manager._full_enough_block_size = block._content_length + 1
1188
self.assertFalse(manager.check_is_well_utilized())
1189
manager._full_enough_mixed_block_size = block._content_length
1190
self.assertTrue(manager.check_is_well_utilized())
1192
def test_check_is_well_utilized_partial_use(self):
1193
locations, block = self.make_block(self._texts)
1194
manager = groupcompress._LazyGroupContentManager(block)
1195
manager._full_enough_block_size = block._content_length
1196
self.add_key_to_manager(('key1',), locations, block, manager)
1197
self.add_key_to_manager(('key2',), locations, block, manager)
1198
# Just using the content from key1 and 2 is not enough to be considered
1200
self.assertFalse(manager.check_is_well_utilized())
1201
# However if we add key3, then we have enough, as we only require 75%
1203
self.add_key_to_manager(('key4',), locations, block, manager)
1204
self.assertTrue(manager.check_is_well_utilized())
1207
class Test_GCBuildDetails(tests.TestCase):
1209
def test_acts_like_tuple(self):
1210
# _GCBuildDetails inlines some of the data that used to be spread out
1211
# across a bunch of tuples
1212
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1213
('INDEX', 10, 20, 0, 5))
1214
self.assertEqual(4, len(bd))
1215
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1216
self.assertEqual(None, bd[1]) # Compression Parent is always None
1217
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1218
self.assertEqual(('group', None), bd[3]) # Record details
1220
def test__repr__(self):
1221
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1222
('INDEX', 10, 20, 0, 5))
1223
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1224
" (('parent1',), ('parent2',)))",