1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for group compression."""
31
from bzrlib.osutils import sha_string
32
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
35
def load_tests(standard_tests, module, loader):
36
"""Parameterize tests for all versions of groupcompress."""
37
to_adapt, result = tests.split_suite_by_condition(
38
standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
40
('python', {'compressor': groupcompress.PythonGroupCompressor}),
42
if CompiledGroupCompressFeature.available():
43
scenarios.append(('C',
44
{'compressor': groupcompress.PyrexGroupCompressor}))
45
return tests.multiply_tests(to_adapt, scenarios, result)
48
class TestGroupCompressor(tests.TestCase):
50
def _chunks_to_repr_lines(self, chunks):
51
return '\n'.join(map(repr, ''.join(chunks).split('\n')))
53
def assertEqualDiffEncoded(self, expected, actual):
54
"""Compare the actual content to the expected content.
56
:param expected: A group of chunks that we expect to see
57
:param actual: The measured 'chunks'
59
We will transform the chunks back into lines, and then run 'repr()'
60
over them to handle non-ascii characters.
62
self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
self._chunks_to_repr_lines(actual))
66
class TestAllGroupCompressors(TestGroupCompressor):
67
"""Tests for GroupCompressor"""
69
compressor = None # Set by multiply_tests
71
def test_empty_delta(self):
72
compressor = self.compressor()
73
self.assertEqual([], compressor.chunks)
75
def test_one_nosha_delta(self):
77
compressor = self.compressor()
78
sha1, start_point, end_point, _ = compressor.compress(('label',),
79
'strange\ncommon\n', None)
80
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
self.assertEqual(expected_lines, ''.join(compressor.chunks))
83
self.assertEqual(0, start_point)
84
self.assertEqual(sum(map(len, expected_lines)), end_point)
86
def test_empty_content(self):
87
compressor = self.compressor()
88
# Adding empty bytes should return the 'null' record
89
sha1, start_point, end_point, kind = compressor.compress(('empty',),
91
self.assertEqual(0, start_point)
92
self.assertEqual(0, end_point)
93
self.assertEqual('fulltext', kind)
94
self.assertEqual(groupcompress._null_sha1, sha1)
95
self.assertEqual(0, compressor.endpoint)
96
self.assertEqual([], compressor.chunks)
97
# Even after adding some content
98
compressor.compress(('content',), 'some\nbytes\n', None)
99
self.assertTrue(compressor.endpoint > 0)
100
sha1, start_point, end_point, kind = compressor.compress(('empty2',),
102
self.assertEqual(0, start_point)
103
self.assertEqual(0, end_point)
104
self.assertEqual('fulltext', kind)
105
self.assertEqual(groupcompress._null_sha1, sha1)
107
def test_extract_from_compressor(self):
108
# Knit fetching will try to reconstruct texts locally which results in
109
# reading something that is in the compressor stream already.
110
compressor = self.compressor()
111
sha1_1, _, _, _ = compressor.compress(('label',),
112
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
expected_lines = list(compressor.chunks)
114
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
117
self.assertEqual(('strange\ncommon long line\n'
118
'that needs a 16 byte match\n', sha1_1),
119
compressor.extract(('label',)))
121
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
'different\n', sha1_2),
123
compressor.extract(('newlabel',)))
125
def test_pop_last(self):
126
compressor = self.compressor()
127
_, _, _, _ = compressor.compress(('key1',),
128
'some text\nfor the first entry\n', None)
129
expected_lines = list(compressor.chunks)
130
_, _, _, _ = compressor.compress(('key2',),
131
'some text\nfor the second entry\n', None)
132
compressor.pop_last()
133
self.assertEqual(expected_lines, compressor.chunks)
136
class TestPyrexGroupCompressor(TestGroupCompressor):
138
_test_needs_features = [CompiledGroupCompressFeature]
139
compressor = groupcompress.PyrexGroupCompressor
141
def test_stats(self):
142
compressor = self.compressor()
143
compressor.compress(('label',),
145
'common very very long line\n'
146
'plus more text\n', None)
147
compressor.compress(('newlabel',),
148
'common very very long line\n'
151
'moredifferent\n', None)
152
compressor.compress(('label3',),
154
'common very very long line\n'
157
'moredifferent\n', None)
158
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
160
def test_two_nosha_delta(self):
161
compressor = self.compressor()
162
sha1_1, _, _, _ = compressor.compress(('label',),
163
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
expected_lines = list(compressor.chunks)
165
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
self.assertEqual(sha_string('common long line\n'
168
'that needs a 16 byte match\n'
169
'different\n'), sha1_2)
170
expected_lines.extend([
171
# 'delta', delta length
173
# source and target length
175
# copy the line common
176
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
# add the line different, and the trailing newline
178
'\x0adifferent\n', # insert 10 bytes
180
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
self.assertEqual(sum(map(len, expected_lines)), end_point)
183
def test_three_nosha_delta(self):
184
# The first interesting test: make a change that should use lines from
186
compressor = self.compressor()
187
sha1_1, _, _, _ = compressor.compress(('label',),
188
'strange\ncommon very very long line\nwith some extra text\n', None)
189
sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
'different\nmoredifferent\nand then some more\n', None)
191
expected_lines = list(compressor.chunks)
192
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
'new\ncommon very very long line\nwith some extra text\n'
194
'different\nmoredifferent\nand then some more\n',
197
sha_string('new\ncommon very very long line\nwith some extra text\n'
198
'different\nmoredifferent\nand then some more\n'),
200
expected_lines.extend([
201
# 'delta', delta length
203
# source and target length
207
# Copy of first parent 'common' range
208
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
# Copy of second parent 'different' range
210
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
212
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
self.assertEqual(sum(map(len, expected_lines)), end_point)
216
class TestPythonGroupCompressor(TestGroupCompressor):
218
compressor = groupcompress.PythonGroupCompressor
220
def test_stats(self):
221
compressor = self.compressor()
222
compressor.compress(('label',),
224
'common very very long line\n'
225
'plus more text\n', None)
226
compressor.compress(('newlabel',),
227
'common very very long line\n'
230
'moredifferent\n', None)
231
compressor.compress(('label3',),
233
'common very very long line\n'
236
'moredifferent\n', None)
237
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
239
def test_two_nosha_delta(self):
240
compressor = self.compressor()
241
sha1_1, _, _, _ = compressor.compress(('label',),
242
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
expected_lines = list(compressor.chunks)
244
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
self.assertEqual(sha_string('common long line\n'
247
'that needs a 16 byte match\n'
248
'different\n'), sha1_2)
249
expected_lines.extend([
250
# 'delta', delta length
254
# copy the line common
255
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
# add the line different, and the trailing newline
257
'\x0adifferent\n', # insert 10 bytes
259
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
self.assertEqual(sum(map(len, expected_lines)), end_point)
262
def test_three_nosha_delta(self):
263
# The first interesting test: make a change that should use lines from
265
compressor = self.compressor()
266
sha1_1, _, _, _ = compressor.compress(('label',),
267
'strange\ncommon very very long line\nwith some extra text\n', None)
268
sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
'different\nmoredifferent\nand then some more\n', None)
270
expected_lines = list(compressor.chunks)
271
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
'new\ncommon very very long line\nwith some extra text\n'
273
'different\nmoredifferent\nand then some more\n',
276
sha_string('new\ncommon very very long line\nwith some extra text\n'
277
'different\nmoredifferent\nand then some more\n'),
279
expected_lines.extend([
280
# 'delta', delta length
286
# Copy of first parent 'common' range
287
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
# Copy of second parent 'different' range
289
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
291
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
self.assertEqual(sum(map(len, expected_lines)), end_point)
295
class TestGroupCompressBlock(tests.TestCase):
297
def make_block(self, key_to_text):
298
"""Create a GroupCompressBlock, filling it with the given texts."""
299
compressor = groupcompress.GroupCompressor()
301
for key in sorted(key_to_text):
302
compressor.compress(key, key_to_text[key], None)
303
locs = dict((key, (start, end)) for key, (start, _, end, _)
304
in compressor.labels_deltas.iteritems())
305
block = compressor.flush()
306
raw_bytes = block.to_bytes()
307
# Go through from_bytes(to_bytes()) so that we start with a compressed
309
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
311
def test_from_empty_bytes(self):
312
self.assertRaises(ValueError,
313
groupcompress.GroupCompressBlock.from_bytes, '')
315
def test_from_minimal_bytes(self):
316
block = groupcompress.GroupCompressBlock.from_bytes(
318
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
self.assertIs(None, block._content)
320
self.assertEqual('', block._z_content)
321
block._ensure_content()
322
self.assertEqual('', block._content)
323
self.assertEqual('', block._z_content)
324
block._ensure_content() # Ensure content is safe to call 2x
326
def test_from_invalid(self):
327
self.assertRaises(ValueError,
328
groupcompress.GroupCompressBlock.from_bytes,
329
'this is not a valid header')
331
def test_from_bytes(self):
332
content = ('a tiny bit of content\n')
333
z_content = zlib.compress(content)
335
'gcb1z\n' # group compress block v1 plain
336
'%d\n' # Length of compressed content
337
'%d\n' # Length of uncompressed content
338
'%s' # Compressed content
339
) % (len(z_content), len(content), z_content)
340
block = groupcompress.GroupCompressBlock.from_bytes(
342
self.assertEqual(z_content, block._z_content)
343
self.assertIs(None, block._content)
344
self.assertEqual(len(z_content), block._z_content_length)
345
self.assertEqual(len(content), block._content_length)
346
block._ensure_content()
347
self.assertEqual(z_content, block._z_content)
348
self.assertEqual(content, block._content)
350
def test_to_bytes(self):
351
content = ('this is some content\n'
352
'this content will be compressed\n')
353
gcb = groupcompress.GroupCompressBlock()
354
gcb.set_content(content)
355
bytes = gcb.to_bytes()
356
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
self.assertEqual(gcb._content_length, len(content))
358
expected_header =('gcb1z\n' # group compress block v1 zlib
359
'%d\n' # Length of compressed content
360
'%d\n' # Length of uncompressed content
361
) % (gcb._z_content_length, gcb._content_length)
362
self.assertStartsWith(bytes, expected_header)
363
remaining_bytes = bytes[len(expected_header):]
364
raw_bytes = zlib.decompress(remaining_bytes)
365
self.assertEqual(content, raw_bytes)
367
# we should get the same results if using the chunked version
368
gcb = groupcompress.GroupCompressBlock()
369
gcb.set_chunked_content(['this is some content\n'
370
'this content will be compressed\n'],
373
bytes = gcb.to_bytes()
374
self.assertEqual(old_bytes, bytes)
376
def test_partial_decomp(self):
378
# We need a sufficient amount of data so that zlib.decompress has
379
# partial decompression to work with. Most auto-generated data
380
# compresses a bit too well, we want a combination, so we combine a sha
381
# hash with compressible data.
382
for i in xrange(2048):
383
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
384
content_chunks.append(next_content)
385
next_sha1 = osutils.sha_string(next_content)
386
content_chunks.append(next_sha1 + '\n')
387
content = ''.join(content_chunks)
388
self.assertEqual(158634, len(content))
389
z_content = zlib.compress(content)
390
self.assertEqual(57182, len(z_content))
391
block = groupcompress.GroupCompressBlock()
392
block._z_content = z_content
393
block._z_content_length = len(z_content)
394
block._compressor_name = 'zlib'
395
block._content_length = 158634
396
self.assertIs(None, block._content)
397
block._ensure_content(100)
398
self.assertIsNot(None, block._content)
399
# We have decompressed at least 100 bytes
400
self.assertTrue(len(block._content) >= 100)
401
# We have not decompressed the whole content
402
self.assertTrue(len(block._content) < 158634)
403
self.assertEqualDiff(content[:len(block._content)], block._content)
404
# ensuring content that we already have shouldn't cause any more data
406
cur_len = len(block._content)
407
block._ensure_content(cur_len - 10)
408
self.assertEqual(cur_len, len(block._content))
409
# Now we want a bit more content
411
block._ensure_content(cur_len)
412
self.assertTrue(len(block._content) >= cur_len)
413
self.assertTrue(len(block._content) < 158634)
414
self.assertEqualDiff(content[:len(block._content)], block._content)
415
# And now lets finish
416
block._ensure_content(158634)
417
self.assertEqualDiff(content, block._content)
418
# And the decompressor is finalized
419
self.assertIs(None, block._z_content_decompressor)
421
def test_partial_decomp_no_known_length(self):
423
for i in xrange(2048):
424
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
425
content_chunks.append(next_content)
426
next_sha1 = osutils.sha_string(next_content)
427
content_chunks.append(next_sha1 + '\n')
428
content = ''.join(content_chunks)
429
self.assertEqual(158634, len(content))
430
z_content = zlib.compress(content)
431
self.assertEqual(57182, len(z_content))
432
block = groupcompress.GroupCompressBlock()
433
block._z_content = z_content
434
block._z_content_length = len(z_content)
435
block._compressor_name = 'zlib'
436
block._content_length = None # Don't tell the decompressed length
437
self.assertIs(None, block._content)
438
block._ensure_content(100)
439
self.assertIsNot(None, block._content)
440
# We have decompressed at least 100 bytes
441
self.assertTrue(len(block._content) >= 100)
442
# We have not decompressed the whole content
443
self.assertTrue(len(block._content) < 158634)
444
self.assertEqualDiff(content[:len(block._content)], block._content)
445
# ensuring content that we already have shouldn't cause any more data
447
cur_len = len(block._content)
448
block._ensure_content(cur_len - 10)
449
self.assertEqual(cur_len, len(block._content))
450
# Now we want a bit more content
452
block._ensure_content(cur_len)
453
self.assertTrue(len(block._content) >= cur_len)
454
self.assertTrue(len(block._content) < 158634)
455
self.assertEqualDiff(content[:len(block._content)], block._content)
456
# And now lets finish
457
block._ensure_content()
458
self.assertEqualDiff(content, block._content)
459
# And the decompressor is finalized
460
self.assertIs(None, block._z_content_decompressor)
462
def test__dump(self):
463
dup_content = 'some duplicate content\nwhich is sufficiently long\n'
464
key_to_text = {('1',): dup_content + '1 unique\n',
465
('2',): dup_content + '2 extra special\n'}
466
locs, block = self.make_block(key_to_text)
467
self.assertEqual([('f', len(key_to_text[('1',)])),
468
('d', 21, len(key_to_text[('2',)]),
469
[('c', 2, len(dup_content)),
470
('i', len('2 extra special\n'), '')
475
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
477
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
478
dir='.', inconsistency_fatal=True):
479
t = self.get_transport(dir)
481
vf = groupcompress.make_pack_factory(graph=create_graph,
482
delta=False, keylength=keylength,
483
inconsistency_fatal=inconsistency_fatal)(t)
485
self.addCleanup(groupcompress.cleanup_pack_group, vf)
489
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
491
def make_g_index(self, name, ref_lists=0, nodes=[]):
492
builder = btree_index.BTreeBuilder(ref_lists)
493
for node, references, value in nodes:
494
builder.add_node(node, references, value)
495
stream = builder.finish()
496
trans = self.get_transport()
497
size = trans.put_file(name, stream)
498
return btree_index.BTreeGraphIndex(trans, name, size)
500
def make_g_index_missing_parent(self):
501
graph_index = self.make_g_index('missing_parent', 1,
502
[(('parent', ), '2 78 2 10', ([],)),
503
(('tip', ), '2 78 2 10',
504
([('parent', ), ('missing-parent', )],)),
508
def test_get_record_stream_as_requested(self):
509
# Consider promoting 'as-requested' to general availability, and
510
# make this a VF interface test
511
vf = self.make_test_vf(False, dir='source')
512
vf.add_lines(('a',), (), ['lines\n'])
513
vf.add_lines(('b',), (), ['lines\n'])
514
vf.add_lines(('c',), (), ['lines\n'])
515
vf.add_lines(('d',), (), ['lines\n'])
517
keys = [record.key for record in vf.get_record_stream(
518
[('a',), ('b',), ('c',), ('d',)],
519
'as-requested', False)]
520
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
521
keys = [record.key for record in vf.get_record_stream(
522
[('b',), ('a',), ('d',), ('c',)],
523
'as-requested', False)]
524
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
526
# It should work even after being repacked into another VF
527
vf2 = self.make_test_vf(False, dir='target')
528
vf2.insert_record_stream(vf.get_record_stream(
529
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
532
keys = [record.key for record in vf2.get_record_stream(
533
[('a',), ('b',), ('c',), ('d',)],
534
'as-requested', False)]
535
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
536
keys = [record.key for record in vf2.get_record_stream(
537
[('b',), ('a',), ('d',), ('c',)],
538
'as-requested', False)]
539
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
541
def test_insert_record_stream_re_uses_blocks(self):
542
vf = self.make_test_vf(True, dir='source')
543
def grouped_stream(revision_ids, first_parents=()):
544
parents = first_parents
545
for revision_id in revision_ids:
547
record = versionedfile.FulltextContentFactory(
549
'some content that is\n'
550
'identical except for\n'
551
'revision_id:%s\n' % (revision_id,))
555
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
557
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
558
first_parents=(('d',),)))
560
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
563
for record in stream:
564
if record.key in [('a',), ('e',)]:
565
self.assertEqual('groupcompress-block', record.storage_kind)
567
self.assertEqual('groupcompress-block-ref',
569
block_bytes[record.key] = record._manager._block._z_content
571
self.assertEqual(8, num_records)
574
self.assertIs(block_bytes[key], block_bytes[('a',)])
575
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
578
self.assertIs(block_bytes[key], block_bytes[('e',)])
579
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
580
# Now copy the blocks into another vf, and ensure that the blocks are
581
# preserved without creating new entries
582
vf2 = self.make_test_vf(True, dir='target')
583
# ordering in 'groupcompress' order, should actually swap the groups in
584
# the target vf, but the groups themselves should not be disturbed.
585
vf2.insert_record_stream(vf.get_record_stream(
586
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
587
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
588
'groupcompress', False)
591
for record in stream:
593
self.assertEqual(block_bytes[record.key],
594
record._manager._block._z_content)
595
self.assertEqual(8, num_records)
597
def test__insert_record_stream_no_reuse_block(self):
598
vf = self.make_test_vf(True, dir='source')
599
def grouped_stream(revision_ids, first_parents=()):
600
parents = first_parents
601
for revision_id in revision_ids:
603
record = versionedfile.FulltextContentFactory(
605
'some content that is\n'
606
'identical except for\n'
607
'revision_id:%s\n' % (revision_id,))
611
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
613
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
614
first_parents=(('d',),)))
616
self.assertEqual(8, len(list(vf.get_record_stream(
617
[(r,) for r in 'abcdefgh'],
618
'unordered', False))))
619
# Now copy the blocks into another vf, and ensure that the blocks are
620
# preserved without creating new entries
621
vf2 = self.make_test_vf(True, dir='target')
622
# ordering in 'groupcompress' order, should actually swap the groups in
623
# the target vf, but the groups themselves should not be disturbed.
624
list(vf2._insert_record_stream(vf.get_record_stream(
625
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
628
# After inserting with reuse_blocks=False, we should have everything in
629
# a single new block.
630
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
631
'groupcompress', False)
633
for record in stream:
635
block = record._manager._block
637
self.assertIs(block, record._manager._block)
639
def test_add_missing_noncompression_parent_unvalidated_index(self):
640
unvalidated = self.make_g_index_missing_parent()
641
combined = _mod_index.CombinedGraphIndex([unvalidated])
642
index = groupcompress._GCGraphIndex(combined,
643
is_locked=lambda: True, parents=True,
644
track_external_parent_refs=True)
645
index.scan_unvalidated_index(unvalidated)
647
frozenset([('missing-parent',)]), index.get_missing_parents())
649
def test_track_external_parent_refs(self):
650
g_index = self.make_g_index('empty', 1, [])
651
mod_index = btree_index.BTreeBuilder(1, 1)
652
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
653
index = groupcompress._GCGraphIndex(combined,
654
is_locked=lambda: True, parents=True,
655
add_callback=mod_index.add_nodes,
656
track_external_parent_refs=True)
658
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
660
frozenset([('parent-1',), ('parent-2',)]),
661
index.get_missing_parents())
663
def make_source_with_b(self, a_parent, path):
664
source = self.make_test_vf(True, dir=path)
665
source.add_lines(('a',), (), ['lines\n'])
667
b_parents = (('a',),)
670
source.add_lines(('b',), b_parents, ['lines\n'])
673
def do_inconsistent_inserts(self, inconsistency_fatal):
674
target = self.make_test_vf(True, dir='target',
675
inconsistency_fatal=inconsistency_fatal)
677
source = self.make_source_with_b(x==1, 'source%s' % x)
678
target.insert_record_stream(source.get_record_stream(
679
[('b',)], 'unordered', False))
681
def test_inconsistent_redundant_inserts_warn(self):
682
"""Should not insert a record that is already present."""
684
def warning(template, args):
685
warnings.append(template % args)
686
_trace_warning = trace.warning
687
trace.warning = warning
689
self.do_inconsistent_inserts(inconsistency_fatal=False)
691
trace.warning = _trace_warning
692
self.assertEqual(["inconsistent details in skipped record: ('b',)"
693
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
696
def test_inconsistent_redundant_inserts_raises(self):
697
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
698
inconsistency_fatal=True)
699
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
701
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
702
" 0 8', \(\(\('a',\),\),\)\)")
705
class StubGCVF(object):
706
def __init__(self, canned_get_blocks=None):
707
self._group_cache = {}
708
self._canned_get_blocks = canned_get_blocks or []
709
def _get_blocks(self, read_memos):
710
return iter(self._canned_get_blocks)
713
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
714
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
716
def test_add_key_new_read_memo(self):
717
"""Adding a key with an uncached read_memo new to this batch adds that
718
read_memo to the list of memos to fetch.
720
# locations are: index_memo, ignored, parents, ignored
721
# where index_memo is: (idx, offset, len, factory_start, factory_end)
722
# and (idx, offset, size) is known as the 'read_memo', identifying the
724
read_memo = ('fake index', 100, 50)
726
('key',): (read_memo + (None, None), None, None, None)}
727
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
728
total_size = batcher.add_key(('key',))
729
self.assertEqual(50, total_size)
730
self.assertEqual([('key',)], batcher.keys)
731
self.assertEqual([read_memo], batcher.memos_to_get)
733
def test_add_key_duplicate_read_memo(self):
734
"""read_memos that occur multiple times in a batch will only be fetched
737
read_memo = ('fake index', 100, 50)
738
# Two keys, both sharing the same read memo (but different overall
741
('key1',): (read_memo + (0, 1), None, None, None),
742
('key2',): (read_memo + (1, 2), None, None, None)}
743
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
744
total_size = batcher.add_key(('key1',))
745
total_size = batcher.add_key(('key2',))
746
self.assertEqual(50, total_size)
747
self.assertEqual([('key1',), ('key2',)], batcher.keys)
748
self.assertEqual([read_memo], batcher.memos_to_get)
750
def test_add_key_cached_read_memo(self):
751
"""Adding a key with a cached read_memo will not cause that read_memo
752
to be added to the list to fetch.
754
read_memo = ('fake index', 100, 50)
756
gcvf._group_cache[read_memo] = 'fake block'
758
('key',): (read_memo + (None, None), None, None, None)}
759
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
760
total_size = batcher.add_key(('key',))
761
self.assertEqual(0, total_size)
762
self.assertEqual([('key',)], batcher.keys)
763
self.assertEqual([], batcher.memos_to_get)
765
def test_yield_factories_empty(self):
766
"""An empty batch yields no factories."""
767
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
768
self.assertEqual([], list(batcher.yield_factories()))
770
def test_yield_factories_calls_get_blocks(self):
771
"""Uncached memos are retrieved via get_blocks."""
772
read_memo1 = ('fake index', 100, 50)
773
read_memo2 = ('fake index', 150, 40)
776
(read_memo1, groupcompress.GroupCompressBlock()),
777
(read_memo2, groupcompress.GroupCompressBlock())])
779
('key1',): (read_memo1 + (None, None), None, None, None),
780
('key2',): (read_memo2 + (None, None), None, None, None)}
781
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
782
batcher.add_key(('key1',))
783
batcher.add_key(('key2',))
784
factories = list(batcher.yield_factories(full_flush=True))
785
self.assertLength(2, factories)
786
keys = [f.key for f in factories]
787
kinds = [f.storage_kind for f in factories]
788
self.assertEqual([('key1',), ('key2',)], keys)
789
self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
791
def test_yield_factories_flushing(self):
792
"""yield_factories holds back on yielding results from the final block
793
unless passed full_flush=True.
795
fake_block = groupcompress.GroupCompressBlock()
796
read_memo = ('fake index', 100, 50)
798
gcvf._group_cache[read_memo] = fake_block
800
('key',): (read_memo + (None, None), None, None, None)}
801
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
802
batcher.add_key(('key',))
803
self.assertEqual([], list(batcher.yield_factories()))
804
factories = list(batcher.yield_factories(full_flush=True))
805
self.assertLength(1, factories)
806
self.assertEqual(('key',), factories[0].key)
807
self.assertEqual('groupcompress-block', factories[0].storage_kind)
810
class TestLazyGroupCompress(tests.TestCaseWithTransport):
813
('key1',): "this is a text\n"
814
"with a reasonable amount of compressible bytes\n",
815
('key2',): "another text\n"
816
"with a reasonable amount of compressible bytes\n",
817
('key3',): "yet another text which won't be extracted\n"
818
"with a reasonable amount of compressible bytes\n",
819
('key4',): "this will be extracted\n"
820
"but references most of its bytes from\n"
821
"yet another text which won't be extracted\n"
822
"with a reasonable amount of compressible bytes\n",
824
def make_block(self, key_to_text):
825
"""Create a GroupCompressBlock, filling it with the given texts."""
826
compressor = groupcompress.GroupCompressor()
828
for key in sorted(key_to_text):
829
compressor.compress(key, key_to_text[key], None)
830
locs = dict((key, (start, end)) for key, (start, _, end, _)
831
in compressor.labels_deltas.iteritems())
832
block = compressor.flush()
833
raw_bytes = block.to_bytes()
834
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
836
def add_key_to_manager(self, key, locations, block, manager):
837
start, end = locations[key]
838
manager.add_factory(key, (), start, end)
840
def test_get_fulltexts(self):
841
locations, block = self.make_block(self._texts)
842
manager = groupcompress._LazyGroupContentManager(block)
843
self.add_key_to_manager(('key1',), locations, block, manager)
844
self.add_key_to_manager(('key2',), locations, block, manager)
846
for record in manager.get_record_stream():
847
result_order.append(record.key)
848
text = self._texts[record.key]
849
self.assertEqual(text, record.get_bytes_as('fulltext'))
850
self.assertEqual([('key1',), ('key2',)], result_order)
852
# If we build the manager in the opposite order, we should get them
853
# back in the opposite order
854
manager = groupcompress._LazyGroupContentManager(block)
855
self.add_key_to_manager(('key2',), locations, block, manager)
856
self.add_key_to_manager(('key1',), locations, block, manager)
858
for record in manager.get_record_stream():
859
result_order.append(record.key)
860
text = self._texts[record.key]
861
self.assertEqual(text, record.get_bytes_as('fulltext'))
862
self.assertEqual([('key2',), ('key1',)], result_order)
864
def test__wire_bytes_no_keys(self):
865
locations, block = self.make_block(self._texts)
866
manager = groupcompress._LazyGroupContentManager(block)
867
wire_bytes = manager._wire_bytes()
868
block_length = len(block.to_bytes())
869
# We should have triggered a strip, since we aren't using any content
870
stripped_block = manager._block.to_bytes()
871
self.assertTrue(block_length > len(stripped_block))
872
empty_z_header = zlib.compress('')
873
self.assertEqual('groupcompress-block\n'
874
'8\n' # len(compress(''))
876
'%d\n'# compressed block len
879
% (len(stripped_block), empty_z_header,
883
def test__wire_bytes(self):
884
locations, block = self.make_block(self._texts)
885
manager = groupcompress._LazyGroupContentManager(block)
886
self.add_key_to_manager(('key1',), locations, block, manager)
887
self.add_key_to_manager(('key4',), locations, block, manager)
888
block_bytes = block.to_bytes()
889
wire_bytes = manager._wire_bytes()
890
(storage_kind, z_header_len, header_len,
891
block_len, rest) = wire_bytes.split('\n', 4)
892
z_header_len = int(z_header_len)
893
header_len = int(header_len)
894
block_len = int(block_len)
895
self.assertEqual('groupcompress-block', storage_kind)
896
self.assertEqual(33, z_header_len)
897
self.assertEqual(25, header_len)
898
self.assertEqual(len(block_bytes), block_len)
899
z_header = rest[:z_header_len]
900
header = zlib.decompress(z_header)
901
self.assertEqual(header_len, len(header))
902
entry1 = locations[('key1',)]
903
entry4 = locations[('key4',)]
904
self.assertEqualDiff('key1\n'
906
'%d\n' # start offset
912
% (entry1[0], entry1[1],
913
entry4[0], entry4[1]),
915
z_block = rest[z_header_len:]
916
self.assertEqual(block_bytes, z_block)
918
def test_from_bytes(self):
919
locations, block = self.make_block(self._texts)
920
manager = groupcompress._LazyGroupContentManager(block)
921
self.add_key_to_manager(('key1',), locations, block, manager)
922
self.add_key_to_manager(('key4',), locations, block, manager)
923
wire_bytes = manager._wire_bytes()
924
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
925
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
926
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
927
self.assertEqual(2, len(manager._factories))
928
self.assertEqual(block._z_content, manager._block._z_content)
930
for record in manager.get_record_stream():
931
result_order.append(record.key)
932
text = self._texts[record.key]
933
self.assertEqual(text, record.get_bytes_as('fulltext'))
934
self.assertEqual([('key1',), ('key4',)], result_order)
936
def test__check_rebuild_no_changes(self):
937
locations, block = self.make_block(self._texts)
938
manager = groupcompress._LazyGroupContentManager(block)
939
# Request all the keys, which ensures that we won't rebuild
940
self.add_key_to_manager(('key1',), locations, block, manager)
941
self.add_key_to_manager(('key2',), locations, block, manager)
942
self.add_key_to_manager(('key3',), locations, block, manager)
943
self.add_key_to_manager(('key4',), locations, block, manager)
944
manager._check_rebuild_block()
945
self.assertIs(block, manager._block)
947
def test__check_rebuild_only_one(self):
948
locations, block = self.make_block(self._texts)
949
manager = groupcompress._LazyGroupContentManager(block)
950
# Request just the first key, which should trigger a 'strip' action
951
self.add_key_to_manager(('key1',), locations, block, manager)
952
manager._check_rebuild_block()
953
self.assertIsNot(block, manager._block)
954
self.assertTrue(block._content_length > manager._block._content_length)
955
# We should be able to still get the content out of this block, though
956
# it should only have 1 entry
957
for record in manager.get_record_stream():
958
self.assertEqual(('key1',), record.key)
959
self.assertEqual(self._texts[record.key],
960
record.get_bytes_as('fulltext'))
962
def test__check_rebuild_middle(self):
963
locations, block = self.make_block(self._texts)
964
manager = groupcompress._LazyGroupContentManager(block)
965
# Request a small key in the middle should trigger a 'rebuild'
966
self.add_key_to_manager(('key4',), locations, block, manager)
967
manager._check_rebuild_block()
968
self.assertIsNot(block, manager._block)
969
self.assertTrue(block._content_length > manager._block._content_length)
970
for record in manager.get_record_stream():
971
self.assertEqual(('key4',), record.key)
972
self.assertEqual(self._texts[record.key],
973
record.get_bytes_as('fulltext'))