1
# Copyright (C) 2008-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for group compression."""
35
from ..osutils import sha_string
36
from .test__groupcompress import compiled_groupcompress_feature
37
from .scenarios import load_tests_apply_scenarios
40
def group_compress_implementation_scenarios():
42
('python', {'compressor': groupcompress.PythonGroupCompressor}),
44
if compiled_groupcompress_feature.available():
45
scenarios.append(('C',
46
{'compressor': groupcompress.PyrexGroupCompressor}))
50
load_tests = load_tests_apply_scenarios
53
class TestGroupCompressor(tests.TestCase):
55
def _chunks_to_repr_lines(self, chunks):
56
return '\n'.join(map(repr, ''.join(chunks).split('\n')))
58
def assertEqualDiffEncoded(self, expected, actual):
59
"""Compare the actual content to the expected content.
61
:param expected: A group of chunks that we expect to see
62
:param actual: The measured 'chunks'
64
We will transform the chunks back into lines, and then run 'repr()'
65
over them to handle non-ascii characters.
67
self.assertEqualDiff(self._chunks_to_repr_lines(expected),
68
self._chunks_to_repr_lines(actual))
71
class TestAllGroupCompressors(TestGroupCompressor):
72
"""Tests for GroupCompressor"""
74
scenarios = group_compress_implementation_scenarios()
75
compressor = None # Set by scenario
77
def test_empty_delta(self):
78
compressor = self.compressor()
79
self.assertEqual([], compressor.chunks)
81
def test_one_nosha_delta(self):
83
compressor = self.compressor()
84
sha1, start_point, end_point, _ = compressor.compress(('label',),
85
'strange\ncommon\n', None)
86
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
87
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
88
self.assertEqual(expected_lines, ''.join(compressor.chunks))
89
self.assertEqual(0, start_point)
90
self.assertEqual(sum(map(len, expected_lines)), end_point)
92
def test_empty_content(self):
93
compressor = self.compressor()
94
# Adding empty bytes should return the 'null' record
95
sha1, start_point, end_point, kind = compressor.compress(('empty',),
97
self.assertEqual(0, start_point)
98
self.assertEqual(0, end_point)
99
self.assertEqual('fulltext', kind)
100
self.assertEqual(groupcompress._null_sha1, sha1)
101
self.assertEqual(0, compressor.endpoint)
102
self.assertEqual([], compressor.chunks)
103
# Even after adding some content
104
compressor.compress(('content',), 'some\nbytes\n', None)
105
self.assertTrue(compressor.endpoint > 0)
106
sha1, start_point, end_point, kind = compressor.compress(('empty2',),
108
self.assertEqual(0, start_point)
109
self.assertEqual(0, end_point)
110
self.assertEqual('fulltext', kind)
111
self.assertEqual(groupcompress._null_sha1, sha1)
113
def test_extract_from_compressor(self):
114
# Knit fetching will try to reconstruct texts locally which results in
115
# reading something that is in the compressor stream already.
116
compressor = self.compressor()
117
sha1_1, _, _, _ = compressor.compress(('label',),
118
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
119
expected_lines = list(compressor.chunks)
120
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
121
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
123
self.assertEqual(('strange\ncommon long line\n'
124
'that needs a 16 byte match\n', sha1_1),
125
compressor.extract(('label',)))
127
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
128
'different\n', sha1_2),
129
compressor.extract(('newlabel',)))
131
def test_pop_last(self):
132
compressor = self.compressor()
133
_, _, _, _ = compressor.compress(('key1',),
134
'some text\nfor the first entry\n', None)
135
expected_lines = list(compressor.chunks)
136
_, _, _, _ = compressor.compress(('key2',),
137
'some text\nfor the second entry\n', None)
138
compressor.pop_last()
139
self.assertEqual(expected_lines, compressor.chunks)
142
class TestPyrexGroupCompressor(TestGroupCompressor):
144
_test_needs_features = [compiled_groupcompress_feature]
145
compressor = groupcompress.PyrexGroupCompressor
147
def test_stats(self):
148
compressor = self.compressor()
149
compressor.compress(('label',),
151
'common very very long line\n'
152
'plus more text\n', None)
153
compressor.compress(('newlabel',),
154
'common very very long line\n'
157
'moredifferent\n', None)
158
compressor.compress(('label3',),
160
'common very very long line\n'
163
'moredifferent\n', None)
164
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
166
def test_two_nosha_delta(self):
167
compressor = self.compressor()
168
sha1_1, _, _, _ = compressor.compress(('label',),
169
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
170
expected_lines = list(compressor.chunks)
171
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
172
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
self.assertEqual(sha_string('common long line\n'
174
'that needs a 16 byte match\n'
175
'different\n'), sha1_2)
176
expected_lines.extend([
177
# 'delta', delta length
179
# source and target length
181
# copy the line common
182
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
183
# add the line different, and the trailing newline
184
'\x0adifferent\n', # insert 10 bytes
186
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
187
self.assertEqual(sum(map(len, expected_lines)), end_point)
189
def test_three_nosha_delta(self):
190
# The first interesting test: make a change that should use lines from
192
compressor = self.compressor()
193
sha1_1, _, _, _ = compressor.compress(('label',),
194
'strange\ncommon very very long line\nwith some extra text\n', None)
195
sha1_2, _, _, _ = compressor.compress(('newlabel',),
196
'different\nmoredifferent\nand then some more\n', None)
197
expected_lines = list(compressor.chunks)
198
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
199
'new\ncommon very very long line\nwith some extra text\n'
200
'different\nmoredifferent\nand then some more\n',
203
sha_string('new\ncommon very very long line\nwith some extra text\n'
204
'different\nmoredifferent\nand then some more\n'),
206
expected_lines.extend([
207
# 'delta', delta length
209
# source and target length
213
# Copy of first parent 'common' range
214
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
215
# Copy of second parent 'different' range
216
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
218
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
219
self.assertEqual(sum(map(len, expected_lines)), end_point)
222
class TestPythonGroupCompressor(TestGroupCompressor):
224
compressor = groupcompress.PythonGroupCompressor
226
def test_stats(self):
227
compressor = self.compressor()
228
compressor.compress(('label',),
230
'common very very long line\n'
231
'plus more text\n', None)
232
compressor.compress(('newlabel',),
233
'common very very long line\n'
236
'moredifferent\n', None)
237
compressor.compress(('label3',),
239
'common very very long line\n'
242
'moredifferent\n', None)
243
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
245
def test_two_nosha_delta(self):
246
compressor = self.compressor()
247
sha1_1, _, _, _ = compressor.compress(('label',),
248
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
249
expected_lines = list(compressor.chunks)
250
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
251
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
self.assertEqual(sha_string('common long line\n'
253
'that needs a 16 byte match\n'
254
'different\n'), sha1_2)
255
expected_lines.extend([
256
# 'delta', delta length
260
# copy the line common
261
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
262
# add the line different, and the trailing newline
263
'\x0adifferent\n', # insert 10 bytes
265
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
266
self.assertEqual(sum(map(len, expected_lines)), end_point)
268
def test_three_nosha_delta(self):
269
# The first interesting test: make a change that should use lines from
271
compressor = self.compressor()
272
sha1_1, _, _, _ = compressor.compress(('label',),
273
'strange\ncommon very very long line\nwith some extra text\n', None)
274
sha1_2, _, _, _ = compressor.compress(('newlabel',),
275
'different\nmoredifferent\nand then some more\n', None)
276
expected_lines = list(compressor.chunks)
277
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
278
'new\ncommon very very long line\nwith some extra text\n'
279
'different\nmoredifferent\nand then some more\n',
282
sha_string('new\ncommon very very long line\nwith some extra text\n'
283
'different\nmoredifferent\nand then some more\n'),
285
expected_lines.extend([
286
# 'delta', delta length
292
# Copy of first parent 'common' range
293
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
294
# Copy of second parent 'different' range
295
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
297
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
298
self.assertEqual(sum(map(len, expected_lines)), end_point)
301
class TestGroupCompressBlock(tests.TestCase):
303
def make_block(self, key_to_text):
304
"""Create a GroupCompressBlock, filling it with the given texts."""
305
compressor = groupcompress.GroupCompressor()
307
for key in sorted(key_to_text):
308
compressor.compress(key, key_to_text[key], None)
309
locs = dict((key, (start, end)) for key, (start, _, end, _)
310
in compressor.labels_deltas.items())
311
block = compressor.flush()
312
raw_bytes = block.to_bytes()
313
# Go through from_bytes(to_bytes()) so that we start with a compressed
315
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
317
def test_from_empty_bytes(self):
318
self.assertRaises(ValueError,
319
groupcompress.GroupCompressBlock.from_bytes, '')
321
def test_from_minimal_bytes(self):
322
block = groupcompress.GroupCompressBlock.from_bytes(
324
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
325
self.assertIs(None, block._content)
326
self.assertEqual('', block._z_content)
327
block._ensure_content()
328
self.assertEqual('', block._content)
329
self.assertEqual('', block._z_content)
330
block._ensure_content() # Ensure content is safe to call 2x
332
def test_from_invalid(self):
333
self.assertRaises(ValueError,
334
groupcompress.GroupCompressBlock.from_bytes,
335
'this is not a valid header')
337
def test_from_bytes(self):
338
content = ('a tiny bit of content\n')
339
z_content = zlib.compress(content)
341
'gcb1z\n' # group compress block v1 plain
342
'%d\n' # Length of compressed content
343
'%d\n' # Length of uncompressed content
344
'%s' # Compressed content
345
) % (len(z_content), len(content), z_content)
346
block = groupcompress.GroupCompressBlock.from_bytes(
348
self.assertEqual(z_content, block._z_content)
349
self.assertIs(None, block._content)
350
self.assertEqual(len(z_content), block._z_content_length)
351
self.assertEqual(len(content), block._content_length)
352
block._ensure_content()
353
self.assertEqual(z_content, block._z_content)
354
self.assertEqual(content, block._content)
356
def test_to_chunks(self):
357
content_chunks = ['this is some content\n',
358
'this content will be compressed\n']
359
content_len = sum(map(len, content_chunks))
360
content = ''.join(content_chunks)
361
gcb = groupcompress.GroupCompressBlock()
362
gcb.set_chunked_content(content_chunks, content_len)
363
total_len, block_chunks = gcb.to_chunks()
364
block_bytes = ''.join(block_chunks)
365
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
366
self.assertEqual(total_len, len(block_bytes))
367
self.assertEqual(gcb._content_length, content_len)
368
expected_header =('gcb1z\n' # group compress block v1 zlib
369
'%d\n' # Length of compressed content
370
'%d\n' # Length of uncompressed content
371
) % (gcb._z_content_length, gcb._content_length)
372
# The first chunk should be the header chunk. It is small, fixed size,
373
# and there is no compelling reason to split it up
374
self.assertEqual(expected_header, block_chunks[0])
375
self.assertStartsWith(block_bytes, expected_header)
376
remaining_bytes = block_bytes[len(expected_header):]
377
raw_bytes = zlib.decompress(remaining_bytes)
378
self.assertEqual(content, raw_bytes)
380
def test_to_bytes(self):
381
content = ('this is some content\n'
382
'this content will be compressed\n')
383
gcb = groupcompress.GroupCompressBlock()
384
gcb.set_content(content)
385
bytes = gcb.to_bytes()
386
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
387
self.assertEqual(gcb._content_length, len(content))
388
expected_header =('gcb1z\n' # group compress block v1 zlib
389
'%d\n' # Length of compressed content
390
'%d\n' # Length of uncompressed content
391
) % (gcb._z_content_length, gcb._content_length)
392
self.assertStartsWith(bytes, expected_header)
393
remaining_bytes = bytes[len(expected_header):]
394
raw_bytes = zlib.decompress(remaining_bytes)
395
self.assertEqual(content, raw_bytes)
397
# we should get the same results if using the chunked version
398
gcb = groupcompress.GroupCompressBlock()
399
gcb.set_chunked_content(['this is some content\n'
400
'this content will be compressed\n'],
403
bytes = gcb.to_bytes()
404
self.assertEqual(old_bytes, bytes)
406
def test_partial_decomp(self):
408
# We need a sufficient amount of data so that zlib.decompress has
409
# partial decompression to work with. Most auto-generated data
410
# compresses a bit too well, we want a combination, so we combine a sha
411
# hash with compressible data.
412
for i in range(2048):
413
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
414
content_chunks.append(next_content)
415
next_sha1 = osutils.sha_string(next_content)
416
content_chunks.append(next_sha1 + '\n')
417
content = ''.join(content_chunks)
418
self.assertEqual(158634, len(content))
419
z_content = zlib.compress(content)
420
self.assertEqual(57182, len(z_content))
421
block = groupcompress.GroupCompressBlock()
422
block._z_content_chunks = (z_content,)
423
block._z_content_length = len(z_content)
424
block._compressor_name = 'zlib'
425
block._content_length = 158634
426
self.assertIs(None, block._content)
427
block._ensure_content(100)
428
self.assertIsNot(None, block._content)
429
# We have decompressed at least 100 bytes
430
self.assertTrue(len(block._content) >= 100)
431
# We have not decompressed the whole content
432
self.assertTrue(len(block._content) < 158634)
433
self.assertEqualDiff(content[:len(block._content)], block._content)
434
# ensuring content that we already have shouldn't cause any more data
436
cur_len = len(block._content)
437
block._ensure_content(cur_len - 10)
438
self.assertEqual(cur_len, len(block._content))
439
# Now we want a bit more content
441
block._ensure_content(cur_len)
442
self.assertTrue(len(block._content) >= cur_len)
443
self.assertTrue(len(block._content) < 158634)
444
self.assertEqualDiff(content[:len(block._content)], block._content)
445
# And now lets finish
446
block._ensure_content(158634)
447
self.assertEqualDiff(content, block._content)
448
# And the decompressor is finalized
449
self.assertIs(None, block._z_content_decompressor)
451
def test__ensure_all_content(self):
453
# We need a sufficient amount of data so that zlib.decompress has
454
# partial decompression to work with. Most auto-generated data
455
# compresses a bit too well, we want a combination, so we combine a sha
456
# hash with compressible data.
457
for i in range(2048):
458
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
459
content_chunks.append(next_content)
460
next_sha1 = osutils.sha_string(next_content)
461
content_chunks.append(next_sha1 + '\n')
462
content = ''.join(content_chunks)
463
self.assertEqual(158634, len(content))
464
z_content = zlib.compress(content)
465
self.assertEqual(57182, len(z_content))
466
block = groupcompress.GroupCompressBlock()
467
block._z_content_chunks = (z_content,)
468
block._z_content_length = len(z_content)
469
block._compressor_name = 'zlib'
470
block._content_length = 158634
471
self.assertIs(None, block._content)
472
# The first _ensure_content got all of the required data
473
block._ensure_content(158634)
474
self.assertEqualDiff(content, block._content)
475
# And we should have released the _z_content_decompressor since it was
477
self.assertIs(None, block._z_content_decompressor)
479
def test__dump(self):
480
dup_content = 'some duplicate content\nwhich is sufficiently long\n'
481
key_to_text = {('1',): dup_content + '1 unique\n',
482
('2',): dup_content + '2 extra special\n'}
483
locs, block = self.make_block(key_to_text)
484
self.assertEqual([('f', len(key_to_text[('1',)])),
485
('d', 21, len(key_to_text[('2',)]),
486
[('c', 2, len(dup_content)),
487
('i', len('2 extra special\n'), '')
492
class TestCaseWithGroupCompressVersionedFiles(
493
tests.TestCaseWithMemoryTransport):
495
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
496
dir='.', inconsistency_fatal=True):
497
t = self.get_transport(dir)
499
vf = groupcompress.make_pack_factory(graph=create_graph,
500
delta=False, keylength=keylength,
501
inconsistency_fatal=inconsistency_fatal)(t)
503
self.addCleanup(groupcompress.cleanup_pack_group, vf)
507
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
509
def make_g_index(self, name, ref_lists=0, nodes=[]):
510
builder = btree_index.BTreeBuilder(ref_lists)
511
for node, references, value in nodes:
512
builder.add_node(node, references, value)
513
stream = builder.finish()
514
trans = self.get_transport()
515
size = trans.put_file(name, stream)
516
return btree_index.BTreeGraphIndex(trans, name, size)
518
def make_g_index_missing_parent(self):
519
graph_index = self.make_g_index('missing_parent', 1,
520
[(('parent', ), '2 78 2 10', ([],)),
521
(('tip', ), '2 78 2 10',
522
([('parent', ), ('missing-parent', )],)),
526
def test_get_record_stream_as_requested(self):
527
# Consider promoting 'as-requested' to general availability, and
528
# make this a VF interface test
529
vf = self.make_test_vf(False, dir='source')
530
vf.add_lines(('a',), (), ['lines\n'])
531
vf.add_lines(('b',), (), ['lines\n'])
532
vf.add_lines(('c',), (), ['lines\n'])
533
vf.add_lines(('d',), (), ['lines\n'])
535
keys = [record.key for record in vf.get_record_stream(
536
[('a',), ('b',), ('c',), ('d',)],
537
'as-requested', False)]
538
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
539
keys = [record.key for record in vf.get_record_stream(
540
[('b',), ('a',), ('d',), ('c',)],
541
'as-requested', False)]
542
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
544
# It should work even after being repacked into another VF
545
vf2 = self.make_test_vf(False, dir='target')
546
vf2.insert_record_stream(vf.get_record_stream(
547
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
550
keys = [record.key for record in vf2.get_record_stream(
551
[('a',), ('b',), ('c',), ('d',)],
552
'as-requested', False)]
553
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
554
keys = [record.key for record in vf2.get_record_stream(
555
[('b',), ('a',), ('d',), ('c',)],
556
'as-requested', False)]
557
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
559
def test_get_record_stream_max_bytes_to_index_default(self):
560
vf = self.make_test_vf(True, dir='source')
561
vf.add_lines(('a',), (), ['lines\n'])
563
record = next(vf.get_record_stream([('a',)], 'unordered', True))
564
self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
565
record._manager._get_compressor_settings())
567
def test_get_record_stream_accesses_compressor_settings(self):
568
vf = self.make_test_vf(True, dir='source')
569
vf.add_lines(('a',), (), ['lines\n'])
571
vf._max_bytes_to_index = 1234
572
record = next(vf.get_record_stream([('a',)], 'unordered', True))
573
self.assertEqual(dict(max_bytes_to_index=1234),
574
record._manager._get_compressor_settings())
576
def test_insert_record_stream_reuses_blocks(self):
577
vf = self.make_test_vf(True, dir='source')
578
def grouped_stream(revision_ids, first_parents=()):
579
parents = first_parents
580
for revision_id in revision_ids:
582
record = versionedfile.FulltextContentFactory(
584
'some content that is\n'
585
'identical except for\n'
586
'revision_id:%s\n' % (revision_id,))
590
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
592
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
593
first_parents=(('d',),)))
595
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
598
for record in stream:
599
if record.key in [('a',), ('e',)]:
600
self.assertEqual('groupcompress-block', record.storage_kind)
602
self.assertEqual('groupcompress-block-ref',
604
block_bytes[record.key] = record._manager._block._z_content
606
self.assertEqual(8, num_records)
609
self.assertIs(block_bytes[key], block_bytes[('a',)])
610
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
613
self.assertIs(block_bytes[key], block_bytes[('e',)])
614
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
615
# Now copy the blocks into another vf, and ensure that the blocks are
616
# preserved without creating new entries
617
vf2 = self.make_test_vf(True, dir='target')
618
# ordering in 'groupcompress' order, should actually swap the groups in
619
# the target vf, but the groups themselves should not be disturbed.
620
def small_size_stream():
621
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
622
'groupcompress', False):
623
record._manager._full_enough_block_size = \
624
record._manager._block._content_length
627
vf2.insert_record_stream(small_size_stream())
628
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
629
'groupcompress', False)
632
for record in stream:
634
self.assertEqual(block_bytes[record.key],
635
record._manager._block._z_content)
636
self.assertEqual(8, num_records)
638
def test_insert_record_stream_packs_on_the_fly(self):
639
vf = self.make_test_vf(True, dir='source')
640
def grouped_stream(revision_ids, first_parents=()):
641
parents = first_parents
642
for revision_id in revision_ids:
644
record = versionedfile.FulltextContentFactory(
646
'some content that is\n'
647
'identical except for\n'
648
'revision_id:%s\n' % (revision_id,))
652
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
654
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
655
first_parents=(('d',),)))
656
# Now copy the blocks into another vf, and see that the
657
# insert_record_stream rebuilt a new block on-the-fly because of
659
vf2 = self.make_test_vf(True, dir='target')
660
vf2.insert_record_stream(vf.get_record_stream(
661
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
662
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
'groupcompress', False)
666
# All of the records should be recombined into a single block
668
for record in stream:
671
block = record._manager._block
673
self.assertIs(block, record._manager._block)
674
self.assertEqual(8, num_records)
676
def test__insert_record_stream_no_reuse_block(self):
677
vf = self.make_test_vf(True, dir='source')
678
def grouped_stream(revision_ids, first_parents=()):
679
parents = first_parents
680
for revision_id in revision_ids:
682
record = versionedfile.FulltextContentFactory(
684
'some content that is\n'
685
'identical except for\n'
686
'revision_id:%s\n' % (revision_id,))
690
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
692
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
693
first_parents=(('d',),)))
695
self.assertEqual(8, len(list(vf.get_record_stream(
696
[(r,) for r in 'abcdefgh'],
697
'unordered', False))))
698
# Now copy the blocks into another vf, and ensure that the blocks are
699
# preserved without creating new entries
700
vf2 = self.make_test_vf(True, dir='target')
701
# ordering in 'groupcompress' order, should actually swap the groups in
702
# the target vf, but the groups themselves should not be disturbed.
703
list(vf2._insert_record_stream(vf.get_record_stream(
704
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
707
# After inserting with reuse_blocks=False, we should have everything in
708
# a single new block.
709
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
710
'groupcompress', False)
712
for record in stream:
714
block = record._manager._block
716
self.assertIs(block, record._manager._block)
718
def test_add_missing_noncompression_parent_unvalidated_index(self):
719
unvalidated = self.make_g_index_missing_parent()
720
combined = _mod_index.CombinedGraphIndex([unvalidated])
721
index = groupcompress._GCGraphIndex(combined,
722
is_locked=lambda: True, parents=True,
723
track_external_parent_refs=True)
724
index.scan_unvalidated_index(unvalidated)
726
frozenset([('missing-parent',)]), index.get_missing_parents())
728
def test_track_external_parent_refs(self):
729
g_index = self.make_g_index('empty', 1, [])
730
mod_index = btree_index.BTreeBuilder(1, 1)
731
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
732
index = groupcompress._GCGraphIndex(combined,
733
is_locked=lambda: True, parents=True,
734
add_callback=mod_index.add_nodes,
735
track_external_parent_refs=True)
737
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
739
frozenset([('parent-1',), ('parent-2',)]),
740
index.get_missing_parents())
742
def make_source_with_b(self, a_parent, path):
743
source = self.make_test_vf(True, dir=path)
744
source.add_lines(('a',), (), ['lines\n'])
746
b_parents = (('a',),)
749
source.add_lines(('b',), b_parents, ['lines\n'])
752
def do_inconsistent_inserts(self, inconsistency_fatal):
753
target = self.make_test_vf(True, dir='target',
754
inconsistency_fatal=inconsistency_fatal)
756
source = self.make_source_with_b(x==1, 'source%s' % x)
757
target.insert_record_stream(source.get_record_stream(
758
[('b',)], 'unordered', False))
760
def test_inconsistent_redundant_inserts_warn(self):
761
"""Should not insert a record that is already present."""
763
def warning(template, args):
764
warnings.append(template % args)
765
_trace_warning = trace.warning
766
trace.warning = warning
768
self.do_inconsistent_inserts(inconsistency_fatal=False)
770
trace.warning = _trace_warning
771
self.assertEqual(["inconsistent details in skipped record: ('b',)"
772
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
775
def test_inconsistent_redundant_inserts_raises(self):
776
e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
777
inconsistency_fatal=True)
778
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
780
" \\('b',\\) \\('42 32 0 8', \\(\\(\\),\\)\\) \\('74 32"
781
" 0 8', \\(\\(\\('a',\\),\\),\\)\\)")
783
def test_clear_cache(self):
784
vf = self.make_source_with_b(True, 'source')
786
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
789
self.assertTrue(len(vf._group_cache) > 0)
791
self.assertEqual(0, len(vf._group_cache))
794
class TestGroupCompressConfig(tests.TestCaseWithTransport):
796
def make_test_vf(self):
797
t = self.get_transport('.')
799
factory = groupcompress.make_pack_factory(graph=True,
800
delta=False, keylength=1, inconsistency_fatal=True)
802
self.addCleanup(groupcompress.cleanup_pack_group, vf)
805
def test_max_bytes_to_index_default(self):
806
vf = self.make_test_vf()
807
gc = vf._make_group_compressor()
808
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
809
vf._max_bytes_to_index)
810
if isinstance(gc, groupcompress.PyrexGroupCompressor):
811
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
812
gc._delta_index._max_bytes_to_index)
814
def test_max_bytes_to_index_in_config(self):
815
c = config.GlobalConfig()
816
c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
817
vf = self.make_test_vf()
818
gc = vf._make_group_compressor()
819
self.assertEqual(10000, vf._max_bytes_to_index)
820
if isinstance(gc, groupcompress.PyrexGroupCompressor):
821
self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
823
def test_max_bytes_to_index_bad_config(self):
824
c = config.GlobalConfig()
825
c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
826
vf = self.make_test_vf()
827
# TODO: This is triggering a warning, we might want to trap and make
828
# sure it is readable.
829
gc = vf._make_group_compressor()
830
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
831
vf._max_bytes_to_index)
832
if isinstance(gc, groupcompress.PyrexGroupCompressor):
833
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
834
gc._delta_index._max_bytes_to_index)
837
class StubGCVF(object):
838
def __init__(self, canned_get_blocks=None):
839
self._group_cache = {}
840
self._canned_get_blocks = canned_get_blocks or []
841
def _get_blocks(self, read_memos):
842
return iter(self._canned_get_blocks)
845
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
846
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
848
def test_add_key_new_read_memo(self):
849
"""Adding a key with an uncached read_memo new to this batch adds that
850
read_memo to the list of memos to fetch.
852
# locations are: index_memo, ignored, parents, ignored
853
# where index_memo is: (idx, offset, len, factory_start, factory_end)
854
# and (idx, offset, size) is known as the 'read_memo', identifying the
856
read_memo = ('fake index', 100, 50)
858
('key',): (read_memo + (None, None), None, None, None)}
859
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
860
total_size = batcher.add_key(('key',))
861
self.assertEqual(50, total_size)
862
self.assertEqual([('key',)], batcher.keys)
863
self.assertEqual([read_memo], batcher.memos_to_get)
865
def test_add_key_duplicate_read_memo(self):
866
"""read_memos that occur multiple times in a batch will only be fetched
869
read_memo = ('fake index', 100, 50)
870
# Two keys, both sharing the same read memo (but different overall
873
('key1',): (read_memo + (0, 1), None, None, None),
874
('key2',): (read_memo + (1, 2), None, None, None)}
875
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
876
total_size = batcher.add_key(('key1',))
877
total_size = batcher.add_key(('key2',))
878
self.assertEqual(50, total_size)
879
self.assertEqual([('key1',), ('key2',)], batcher.keys)
880
self.assertEqual([read_memo], batcher.memos_to_get)
882
def test_add_key_cached_read_memo(self):
883
"""Adding a key with a cached read_memo will not cause that read_memo
884
to be added to the list to fetch.
886
read_memo = ('fake index', 100, 50)
888
gcvf._group_cache[read_memo] = 'fake block'
890
('key',): (read_memo + (None, None), None, None, None)}
891
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
892
total_size = batcher.add_key(('key',))
893
self.assertEqual(0, total_size)
894
self.assertEqual([('key',)], batcher.keys)
895
self.assertEqual([], batcher.memos_to_get)
897
def test_yield_factories_empty(self):
898
"""An empty batch yields no factories."""
899
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
900
self.assertEqual([], list(batcher.yield_factories()))
902
def test_yield_factories_calls_get_blocks(self):
903
"""Uncached memos are retrieved via get_blocks."""
904
read_memo1 = ('fake index', 100, 50)
905
read_memo2 = ('fake index', 150, 40)
908
(read_memo1, groupcompress.GroupCompressBlock()),
909
(read_memo2, groupcompress.GroupCompressBlock())])
911
('key1',): (read_memo1 + (None, None), None, None, None),
912
('key2',): (read_memo2 + (None, None), None, None, None)}
913
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
914
batcher.add_key(('key1',))
915
batcher.add_key(('key2',))
916
factories = list(batcher.yield_factories(full_flush=True))
917
self.assertLength(2, factories)
918
keys = [f.key for f in factories]
919
kinds = [f.storage_kind for f in factories]
920
self.assertEqual([('key1',), ('key2',)], keys)
921
self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
923
def test_yield_factories_flushing(self):
924
"""yield_factories holds back on yielding results from the final block
925
unless passed full_flush=True.
927
fake_block = groupcompress.GroupCompressBlock()
928
read_memo = ('fake index', 100, 50)
930
gcvf._group_cache[read_memo] = fake_block
932
('key',): (read_memo + (None, None), None, None, None)}
933
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
934
batcher.add_key(('key',))
935
self.assertEqual([], list(batcher.yield_factories()))
936
factories = list(batcher.yield_factories(full_flush=True))
937
self.assertLength(1, factories)
938
self.assertEqual(('key',), factories[0].key)
939
self.assertEqual('groupcompress-block', factories[0].storage_kind)
942
class TestLazyGroupCompress(tests.TestCaseWithTransport):
945
('key1',): "this is a text\n"
946
"with a reasonable amount of compressible bytes\n"
947
"which can be shared between various other texts\n",
948
('key2',): "another text\n"
949
"with a reasonable amount of compressible bytes\n"
950
"which can be shared between various other texts\n",
951
('key3',): "yet another text which won't be extracted\n"
952
"with a reasonable amount of compressible bytes\n"
953
"which can be shared between various other texts\n",
954
('key4',): "this will be extracted\n"
955
"but references most of its bytes from\n"
956
"yet another text which won't be extracted\n"
957
"with a reasonable amount of compressible bytes\n"
958
"which can be shared between various other texts\n",
960
def make_block(self, key_to_text):
961
"""Create a GroupCompressBlock, filling it with the given texts."""
962
compressor = groupcompress.GroupCompressor()
964
for key in sorted(key_to_text):
965
compressor.compress(key, key_to_text[key], None)
966
locs = dict((key, (start, end)) for key, (start, _, end, _)
967
in compressor.labels_deltas.items())
968
block = compressor.flush()
969
raw_bytes = block.to_bytes()
970
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
972
def add_key_to_manager(self, key, locations, block, manager):
973
start, end = locations[key]
974
manager.add_factory(key, (), start, end)
976
def make_block_and_full_manager(self, texts):
977
locations, block = self.make_block(texts)
978
manager = groupcompress._LazyGroupContentManager(block)
979
for key in sorted(texts):
980
self.add_key_to_manager(key, locations, block, manager)
981
return block, manager
983
def test_get_fulltexts(self):
984
locations, block = self.make_block(self._texts)
985
manager = groupcompress._LazyGroupContentManager(block)
986
self.add_key_to_manager(('key1',), locations, block, manager)
987
self.add_key_to_manager(('key2',), locations, block, manager)
989
for record in manager.get_record_stream():
990
result_order.append(record.key)
991
text = self._texts[record.key]
992
self.assertEqual(text, record.get_bytes_as('fulltext'))
993
self.assertEqual([('key1',), ('key2',)], result_order)
995
# If we build the manager in the opposite order, we should get them
996
# back in the opposite order
997
manager = groupcompress._LazyGroupContentManager(block)
998
self.add_key_to_manager(('key2',), locations, block, manager)
999
self.add_key_to_manager(('key1',), locations, block, manager)
1001
for record in manager.get_record_stream():
1002
result_order.append(record.key)
1003
text = self._texts[record.key]
1004
self.assertEqual(text, record.get_bytes_as('fulltext'))
1005
self.assertEqual([('key2',), ('key1',)], result_order)
1007
def test__wire_bytes_no_keys(self):
1008
locations, block = self.make_block(self._texts)
1009
manager = groupcompress._LazyGroupContentManager(block)
1010
wire_bytes = manager._wire_bytes()
1011
block_length = len(block.to_bytes())
1012
# We should have triggered a strip, since we aren't using any content
1013
stripped_block = manager._block.to_bytes()
1014
self.assertTrue(block_length > len(stripped_block))
1015
empty_z_header = zlib.compress('')
1016
self.assertEqual('groupcompress-block\n'
1017
'8\n' # len(compress(''))
1019
'%d\n'# compressed block len
1022
% (len(stripped_block), empty_z_header,
1026
def test__wire_bytes(self):
1027
locations, block = self.make_block(self._texts)
1028
manager = groupcompress._LazyGroupContentManager(block)
1029
self.add_key_to_manager(('key1',), locations, block, manager)
1030
self.add_key_to_manager(('key4',), locations, block, manager)
1031
block_bytes = block.to_bytes()
1032
wire_bytes = manager._wire_bytes()
1033
(storage_kind, z_header_len, header_len,
1034
block_len, rest) = wire_bytes.split('\n', 4)
1035
z_header_len = int(z_header_len)
1036
header_len = int(header_len)
1037
block_len = int(block_len)
1038
self.assertEqual('groupcompress-block', storage_kind)
1039
self.assertEqual(34, z_header_len)
1040
self.assertEqual(26, header_len)
1041
self.assertEqual(len(block_bytes), block_len)
1042
z_header = rest[:z_header_len]
1043
header = zlib.decompress(z_header)
1044
self.assertEqual(header_len, len(header))
1045
entry1 = locations[('key1',)]
1046
entry4 = locations[('key4',)]
1047
self.assertEqualDiff('key1\n'
1049
'%d\n' # start offset
1055
% (entry1[0], entry1[1],
1056
entry4[0], entry4[1]),
1058
z_block = rest[z_header_len:]
1059
self.assertEqual(block_bytes, z_block)
1061
def test_from_bytes(self):
1062
locations, block = self.make_block(self._texts)
1063
manager = groupcompress._LazyGroupContentManager(block)
1064
self.add_key_to_manager(('key1',), locations, block, manager)
1065
self.add_key_to_manager(('key4',), locations, block, manager)
1066
wire_bytes = manager._wire_bytes()
1067
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1068
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
1069
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
1070
self.assertEqual(2, len(manager._factories))
1071
self.assertEqual(block._z_content, manager._block._z_content)
1073
for record in manager.get_record_stream():
1074
result_order.append(record.key)
1075
text = self._texts[record.key]
1076
self.assertEqual(text, record.get_bytes_as('fulltext'))
1077
self.assertEqual([('key1',), ('key4',)], result_order)
1079
def test__check_rebuild_no_changes(self):
1080
block, manager = self.make_block_and_full_manager(self._texts)
1081
manager._check_rebuild_block()
1082
self.assertIs(block, manager._block)
1084
def test__check_rebuild_only_one(self):
1085
locations, block = self.make_block(self._texts)
1086
manager = groupcompress._LazyGroupContentManager(block)
1087
# Request just the first key, which should trigger a 'strip' action
1088
self.add_key_to_manager(('key1',), locations, block, manager)
1089
manager._check_rebuild_block()
1090
self.assertIsNot(block, manager._block)
1091
self.assertTrue(block._content_length > manager._block._content_length)
1092
# We should be able to still get the content out of this block, though
1093
# it should only have 1 entry
1094
for record in manager.get_record_stream():
1095
self.assertEqual(('key1',), record.key)
1096
self.assertEqual(self._texts[record.key],
1097
record.get_bytes_as('fulltext'))
1099
def test__check_rebuild_middle(self):
1100
locations, block = self.make_block(self._texts)
1101
manager = groupcompress._LazyGroupContentManager(block)
1102
# Request a small key in the middle should trigger a 'rebuild'
1103
self.add_key_to_manager(('key4',), locations, block, manager)
1104
manager._check_rebuild_block()
1105
self.assertIsNot(block, manager._block)
1106
self.assertTrue(block._content_length > manager._block._content_length)
1107
for record in manager.get_record_stream():
1108
self.assertEqual(('key4',), record.key)
1109
self.assertEqual(self._texts[record.key],
1110
record.get_bytes_as('fulltext'))
1112
def test_manager_default_compressor_settings(self):
1113
locations, old_block = self.make_block(self._texts)
1114
manager = groupcompress._LazyGroupContentManager(old_block)
1115
gcvf = groupcompress.GroupCompressVersionedFiles
1116
# It doesn't greedily evaluate _max_bytes_to_index
1117
self.assertIs(None, manager._compressor_settings)
1118
self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1119
manager._get_compressor_settings())
1121
def test_manager_custom_compressor_settings(self):
1122
locations, old_block = self.make_block(self._texts)
1124
def compressor_settings():
1125
called.append('called')
1127
manager = groupcompress._LazyGroupContentManager(old_block,
1128
get_compressor_settings=compressor_settings)
1129
gcvf = groupcompress.GroupCompressVersionedFiles
1130
# It doesn't greedily evaluate compressor_settings
1131
self.assertIs(None, manager._compressor_settings)
1132
self.assertEqual((10,), manager._get_compressor_settings())
1133
self.assertEqual((10,), manager._get_compressor_settings())
1134
self.assertEqual((10,), manager._compressor_settings)
1135
# Only called 1 time
1136
self.assertEqual(['called'], called)
1138
def test__rebuild_handles_compressor_settings(self):
1139
if not isinstance(groupcompress.GroupCompressor,
1140
groupcompress.PyrexGroupCompressor):
1141
raise tests.TestNotApplicable('pure-python compressor'
1142
' does not handle compressor_settings')
1143
locations, old_block = self.make_block(self._texts)
1144
manager = groupcompress._LazyGroupContentManager(old_block,
1145
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1146
gc = manager._make_group_compressor()
1147
self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1148
self.add_key_to_manager(('key3',), locations, old_block, manager)
1149
self.add_key_to_manager(('key4',), locations, old_block, manager)
1150
action, last_byte, total_bytes = manager._check_rebuild_action()
1151
self.assertEqual('rebuild', action)
1152
manager._rebuild_block()
1153
new_block = manager._block
1154
self.assertIsNot(old_block, new_block)
1155
# Because of the new max_bytes_to_index, we do a poor job of
1156
# rebuilding. This is a side-effect of the change, but at least it does
1157
# show the setting had an effect.
1158
self.assertTrue(old_block._content_length < new_block._content_length)
1160
def test_check_is_well_utilized_all_keys(self):
1161
block, manager = self.make_block_and_full_manager(self._texts)
1162
self.assertFalse(manager.check_is_well_utilized())
1163
# Though we can fake it by changing the recommended minimum size
1164
manager._full_enough_block_size = block._content_length
1165
self.assertTrue(manager.check_is_well_utilized())
1166
# Setting it just above causes it to fail
1167
manager._full_enough_block_size = block._content_length + 1
1168
self.assertFalse(manager.check_is_well_utilized())
1169
# Setting the mixed-block size doesn't do anything, because the content
1170
# is considered to not be 'mixed'
1171
manager._full_enough_mixed_block_size = block._content_length
1172
self.assertFalse(manager.check_is_well_utilized())
1174
def test_check_is_well_utilized_mixed_keys(self):
1180
texts[f1k1] = self._texts[('key1',)]
1181
texts[f1k2] = self._texts[('key2',)]
1182
texts[f2k1] = self._texts[('key3',)]
1183
texts[f2k2] = self._texts[('key4',)]
1184
block, manager = self.make_block_and_full_manager(texts)
1185
self.assertFalse(manager.check_is_well_utilized())
1186
manager._full_enough_block_size = block._content_length
1187
self.assertTrue(manager.check_is_well_utilized())
1188
manager._full_enough_block_size = block._content_length + 1
1189
self.assertFalse(manager.check_is_well_utilized())
1190
manager._full_enough_mixed_block_size = block._content_length
1191
self.assertTrue(manager.check_is_well_utilized())
1193
def test_check_is_well_utilized_partial_use(self):
1194
locations, block = self.make_block(self._texts)
1195
manager = groupcompress._LazyGroupContentManager(block)
1196
manager._full_enough_block_size = block._content_length
1197
self.add_key_to_manager(('key1',), locations, block, manager)
1198
self.add_key_to_manager(('key2',), locations, block, manager)
1199
# Just using the content from key1 and 2 is not enough to be considered
1201
self.assertFalse(manager.check_is_well_utilized())
1202
# However if we add key3, then we have enough, as we only require 75%
1204
self.add_key_to_manager(('key4',), locations, block, manager)
1205
self.assertTrue(manager.check_is_well_utilized())
1208
class Test_GCBuildDetails(tests.TestCase):
1210
def test_acts_like_tuple(self):
1211
# _GCBuildDetails inlines some of the data that used to be spread out
1212
# across a bunch of tuples
1213
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1214
('INDEX', 10, 20, 0, 5))
1215
self.assertEqual(4, len(bd))
1216
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1217
self.assertEqual(None, bd[1]) # Compression Parent is always None
1218
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1219
self.assertEqual(('group', None), bd[3]) # Record details
1221
def test__repr__(self):
1222
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1223
('INDEX', 10, 20, 0, 5))
1224
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1225
" (('parent1',), ('parent2',)))",