1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for group compression."""
31
from bzrlib.osutils import sha_string
32
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
35
def load_tests(standard_tests, module, loader):
36
"""Parameterize tests for all versions of groupcompress."""
37
to_adapt, result = tests.split_suite_by_condition(
38
standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
40
('python', {'compressor': groupcompress.PythonGroupCompressor}),
42
if CompiledGroupCompressFeature.available():
43
scenarios.append(('C',
44
{'compressor': groupcompress.PyrexGroupCompressor}))
45
return tests.multiply_tests(to_adapt, scenarios, result)
48
class TestGroupCompressor(tests.TestCase):
50
def _chunks_to_repr_lines(self, chunks):
51
return '\n'.join(map(repr, ''.join(chunks).split('\n')))
53
def assertEqualDiffEncoded(self, expected, actual):
54
"""Compare the actual content to the expected content.
56
:param expected: A group of chunks that we expect to see
57
:param actual: The measured 'chunks'
59
We will transform the chunks back into lines, and then run 'repr()'
60
over them to handle non-ascii characters.
62
self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
self._chunks_to_repr_lines(actual))
66
class TestAllGroupCompressors(TestGroupCompressor):
67
"""Tests for GroupCompressor"""
69
compressor = None # Set by multiply_tests
71
def test_empty_delta(self):
72
compressor = self.compressor()
73
self.assertEqual([], compressor.chunks)
75
def test_one_nosha_delta(self):
77
compressor = self.compressor()
78
sha1, start_point, end_point, _ = compressor.compress(('label',),
79
'strange\ncommon\n', None)
80
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
self.assertEqual(expected_lines, ''.join(compressor.chunks))
83
self.assertEqual(0, start_point)
84
self.assertEqual(sum(map(len, expected_lines)), end_point)
86
def test_empty_content(self):
87
compressor = self.compressor()
88
# Adding empty bytes should return the 'null' record
89
sha1, start_point, end_point, kind = compressor.compress(('empty',),
91
self.assertEqual(0, start_point)
92
self.assertEqual(0, end_point)
93
self.assertEqual('fulltext', kind)
94
self.assertEqual(groupcompress._null_sha1, sha1)
95
self.assertEqual(0, compressor.endpoint)
96
self.assertEqual([], compressor.chunks)
97
# Even after adding some content
98
compressor.compress(('content',), 'some\nbytes\n', None)
99
self.assertTrue(compressor.endpoint > 0)
100
sha1, start_point, end_point, kind = compressor.compress(('empty2',),
102
self.assertEqual(0, start_point)
103
self.assertEqual(0, end_point)
104
self.assertEqual('fulltext', kind)
105
self.assertEqual(groupcompress._null_sha1, sha1)
107
def test_extract_from_compressor(self):
108
# Knit fetching will try to reconstruct texts locally which results in
109
# reading something that is in the compressor stream already.
110
compressor = self.compressor()
111
sha1_1, _, _, _ = compressor.compress(('label',),
112
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
expected_lines = list(compressor.chunks)
114
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
117
self.assertEqual(('strange\ncommon long line\n'
118
'that needs a 16 byte match\n', sha1_1),
119
compressor.extract(('label',)))
121
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
'different\n', sha1_2),
123
compressor.extract(('newlabel',)))
125
def test_pop_last(self):
126
compressor = self.compressor()
127
_, _, _, _ = compressor.compress(('key1',),
128
'some text\nfor the first entry\n', None)
129
expected_lines = list(compressor.chunks)
130
_, _, _, _ = compressor.compress(('key2',),
131
'some text\nfor the second entry\n', None)
132
compressor.pop_last()
133
self.assertEqual(expected_lines, compressor.chunks)
136
class TestPyrexGroupCompressor(TestGroupCompressor):
138
_test_needs_features = [CompiledGroupCompressFeature]
139
compressor = groupcompress.PyrexGroupCompressor
141
def test_stats(self):
142
compressor = self.compressor()
143
compressor.compress(('label',),
145
'common very very long line\n'
146
'plus more text\n', None)
147
compressor.compress(('newlabel',),
148
'common very very long line\n'
151
'moredifferent\n', None)
152
compressor.compress(('label3',),
154
'common very very long line\n'
157
'moredifferent\n', None)
158
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
160
def test_two_nosha_delta(self):
161
compressor = self.compressor()
162
sha1_1, _, _, _ = compressor.compress(('label',),
163
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
expected_lines = list(compressor.chunks)
165
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
self.assertEqual(sha_string('common long line\n'
168
'that needs a 16 byte match\n'
169
'different\n'), sha1_2)
170
expected_lines.extend([
171
# 'delta', delta length
173
# source and target length
175
# copy the line common
176
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
# add the line different, and the trailing newline
178
'\x0adifferent\n', # insert 10 bytes
180
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
self.assertEqual(sum(map(len, expected_lines)), end_point)
183
def test_three_nosha_delta(self):
184
# The first interesting test: make a change that should use lines from
186
compressor = self.compressor()
187
sha1_1, _, _, _ = compressor.compress(('label',),
188
'strange\ncommon very very long line\nwith some extra text\n', None)
189
sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
'different\nmoredifferent\nand then some more\n', None)
191
expected_lines = list(compressor.chunks)
192
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
'new\ncommon very very long line\nwith some extra text\n'
194
'different\nmoredifferent\nand then some more\n',
197
sha_string('new\ncommon very very long line\nwith some extra text\n'
198
'different\nmoredifferent\nand then some more\n'),
200
expected_lines.extend([
201
# 'delta', delta length
203
# source and target length
207
# Copy of first parent 'common' range
208
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
# Copy of second parent 'different' range
210
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
212
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
self.assertEqual(sum(map(len, expected_lines)), end_point)
216
class TestPythonGroupCompressor(TestGroupCompressor):
218
compressor = groupcompress.PythonGroupCompressor
220
def test_stats(self):
221
compressor = self.compressor()
222
compressor.compress(('label',),
224
'common very very long line\n'
225
'plus more text\n', None)
226
compressor.compress(('newlabel',),
227
'common very very long line\n'
230
'moredifferent\n', None)
231
compressor.compress(('label3',),
233
'common very very long line\n'
236
'moredifferent\n', None)
237
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
239
def test_two_nosha_delta(self):
240
compressor = self.compressor()
241
sha1_1, _, _, _ = compressor.compress(('label',),
242
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
expected_lines = list(compressor.chunks)
244
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
self.assertEqual(sha_string('common long line\n'
247
'that needs a 16 byte match\n'
248
'different\n'), sha1_2)
249
expected_lines.extend([
250
# 'delta', delta length
254
# copy the line common
255
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
# add the line different, and the trailing newline
257
'\x0adifferent\n', # insert 10 bytes
259
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
self.assertEqual(sum(map(len, expected_lines)), end_point)
262
def test_three_nosha_delta(self):
263
# The first interesting test: make a change that should use lines from
265
compressor = self.compressor()
266
sha1_1, _, _, _ = compressor.compress(('label',),
267
'strange\ncommon very very long line\nwith some extra text\n', None)
268
sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
'different\nmoredifferent\nand then some more\n', None)
270
expected_lines = list(compressor.chunks)
271
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
'new\ncommon very very long line\nwith some extra text\n'
273
'different\nmoredifferent\nand then some more\n',
276
sha_string('new\ncommon very very long line\nwith some extra text\n'
277
'different\nmoredifferent\nand then some more\n'),
279
expected_lines.extend([
280
# 'delta', delta length
286
# Copy of first parent 'common' range
287
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
# Copy of second parent 'different' range
289
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
291
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
self.assertEqual(sum(map(len, expected_lines)), end_point)
295
class TestGroupCompressBlock(tests.TestCase):
297
def make_block(self, key_to_text):
298
"""Create a GroupCompressBlock, filling it with the given texts."""
299
compressor = groupcompress.GroupCompressor()
301
for key in sorted(key_to_text):
302
compressor.compress(key, key_to_text[key], None)
303
locs = dict((key, (start, end)) for key, (start, _, end, _)
304
in compressor.labels_deltas.iteritems())
305
block = compressor.flush()
306
raw_bytes = block.to_bytes()
307
# Go through from_bytes(to_bytes()) so that we start with a compressed
309
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
311
def test_from_empty_bytes(self):
312
self.assertRaises(ValueError,
313
groupcompress.GroupCompressBlock.from_bytes, '')
315
def test_from_minimal_bytes(self):
316
block = groupcompress.GroupCompressBlock.from_bytes(
318
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
self.assertIs(None, block._content)
320
self.assertEqual('', block._z_content)
321
block._ensure_content()
322
self.assertEqual('', block._content)
323
self.assertEqual('', block._z_content)
324
block._ensure_content() # Ensure content is safe to call 2x
326
def test_from_invalid(self):
327
self.assertRaises(ValueError,
328
groupcompress.GroupCompressBlock.from_bytes,
329
'this is not a valid header')
331
def test_from_bytes(self):
332
content = ('a tiny bit of content\n')
333
z_content = zlib.compress(content)
335
'gcb1z\n' # group compress block v1 plain
336
'%d\n' # Length of compressed content
337
'%d\n' # Length of uncompressed content
338
'%s' # Compressed content
339
) % (len(z_content), len(content), z_content)
340
block = groupcompress.GroupCompressBlock.from_bytes(
342
self.assertEqual(z_content, block._z_content)
343
self.assertIs(None, block._content)
344
self.assertEqual(len(z_content), block._z_content_length)
345
self.assertEqual(len(content), block._content_length)
346
block._ensure_content()
347
self.assertEqual(z_content, block._z_content)
348
self.assertEqual(content, block._content)
350
def test_to_bytes(self):
351
content = ('this is some content\n'
352
'this content will be compressed\n')
353
gcb = groupcompress.GroupCompressBlock()
354
gcb.set_content(content)
355
bytes = gcb.to_bytes()
356
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
self.assertEqual(gcb._content_length, len(content))
358
expected_header =('gcb1z\n' # group compress block v1 zlib
359
'%d\n' # Length of compressed content
360
'%d\n' # Length of uncompressed content
361
) % (gcb._z_content_length, gcb._content_length)
362
self.assertStartsWith(bytes, expected_header)
363
remaining_bytes = bytes[len(expected_header):]
364
raw_bytes = zlib.decompress(remaining_bytes)
365
self.assertEqual(content, raw_bytes)
367
def test_partial_decomp(self):
369
# We need a sufficient amount of data so that zlib.decompress has
370
# partial decompression to work with. Most auto-generated data
371
# compresses a bit too well, we want a combination, so we combine a sha
372
# hash with compressible data.
373
for i in xrange(2048):
374
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
375
content_chunks.append(next_content)
376
next_sha1 = osutils.sha_string(next_content)
377
content_chunks.append(next_sha1 + '\n')
378
content = ''.join(content_chunks)
379
self.assertEqual(158634, len(content))
380
z_content = zlib.compress(content)
381
self.assertEqual(57182, len(z_content))
382
block = groupcompress.GroupCompressBlock()
383
block._z_content = z_content
384
block._z_content_length = len(z_content)
385
block._compressor_name = 'zlib'
386
block._content_length = 158634
387
self.assertIs(None, block._content)
388
block._ensure_content(100)
389
self.assertIsNot(None, block._content)
390
# We have decompressed at least 100 bytes
391
self.assertTrue(len(block._content) >= 100)
392
# We have not decompressed the whole content
393
self.assertTrue(len(block._content) < 158634)
394
self.assertEqualDiff(content[:len(block._content)], block._content)
395
# ensuring content that we already have shouldn't cause any more data
397
cur_len = len(block._content)
398
block._ensure_content(cur_len - 10)
399
self.assertEqual(cur_len, len(block._content))
400
# Now we want a bit more content
402
block._ensure_content(cur_len)
403
self.assertTrue(len(block._content) >= cur_len)
404
self.assertTrue(len(block._content) < 158634)
405
self.assertEqualDiff(content[:len(block._content)], block._content)
406
# And now lets finish
407
block._ensure_content(158634)
408
self.assertEqualDiff(content, block._content)
409
# And the decompressor is finalized
410
self.assertIs(None, block._z_content_decompressor)
412
def test_partial_decomp_no_known_length(self):
414
for i in xrange(2048):
415
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
416
content_chunks.append(next_content)
417
next_sha1 = osutils.sha_string(next_content)
418
content_chunks.append(next_sha1 + '\n')
419
content = ''.join(content_chunks)
420
self.assertEqual(158634, len(content))
421
z_content = zlib.compress(content)
422
self.assertEqual(57182, len(z_content))
423
block = groupcompress.GroupCompressBlock()
424
block._z_content = z_content
425
block._z_content_length = len(z_content)
426
block._compressor_name = 'zlib'
427
block._content_length = None # Don't tell the decompressed length
428
self.assertIs(None, block._content)
429
block._ensure_content(100)
430
self.assertIsNot(None, block._content)
431
# We have decompressed at least 100 bytes
432
self.assertTrue(len(block._content) >= 100)
433
# We have not decompressed the whole content
434
self.assertTrue(len(block._content) < 158634)
435
self.assertEqualDiff(content[:len(block._content)], block._content)
436
# ensuring content that we already have shouldn't cause any more data
438
cur_len = len(block._content)
439
block._ensure_content(cur_len - 10)
440
self.assertEqual(cur_len, len(block._content))
441
# Now we want a bit more content
443
block._ensure_content(cur_len)
444
self.assertTrue(len(block._content) >= cur_len)
445
self.assertTrue(len(block._content) < 158634)
446
self.assertEqualDiff(content[:len(block._content)], block._content)
447
# And now lets finish
448
block._ensure_content()
449
self.assertEqualDiff(content, block._content)
450
# And the decompressor is finalized
451
self.assertIs(None, block._z_content_decompressor)
453
def test__dump(self):
454
dup_content = 'some duplicate content\nwhich is sufficiently long\n'
455
key_to_text = {('1',): dup_content + '1 unique\n',
456
('2',): dup_content + '2 extra special\n'}
457
locs, block = self.make_block(key_to_text)
458
self.assertEqual([('f', len(key_to_text[('1',)])),
459
('d', 21, len(key_to_text[('2',)]),
460
[('c', 2, len(dup_content)),
461
('i', len('2 extra special\n'), '')
466
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
468
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
469
dir='.', inconsistency_fatal=True):
470
t = self.get_transport(dir)
472
vf = groupcompress.make_pack_factory(graph=create_graph,
473
delta=False, keylength=keylength,
474
inconsistency_fatal=inconsistency_fatal)(t)
476
self.addCleanup(groupcompress.cleanup_pack_group, vf)
480
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
482
def make_g_index(self, name, ref_lists=0, nodes=[]):
483
builder = btree_index.BTreeBuilder(ref_lists)
484
for node, references, value in nodes:
485
builder.add_node(node, references, value)
486
stream = builder.finish()
487
trans = self.get_transport()
488
size = trans.put_file(name, stream)
489
return btree_index.BTreeGraphIndex(trans, name, size)
491
def make_g_index_missing_parent(self):
492
graph_index = self.make_g_index('missing_parent', 1,
493
[(('parent', ), '2 78 2 10', ([],)),
494
(('tip', ), '2 78 2 10',
495
([('parent', ), ('missing-parent', )],)),
499
def test_get_record_stream_as_requested(self):
500
# Consider promoting 'as-requested' to general availability, and
501
# make this a VF interface test
502
vf = self.make_test_vf(False, dir='source')
503
vf.add_lines(('a',), (), ['lines\n'])
504
vf.add_lines(('b',), (), ['lines\n'])
505
vf.add_lines(('c',), (), ['lines\n'])
506
vf.add_lines(('d',), (), ['lines\n'])
508
keys = [record.key for record in vf.get_record_stream(
509
[('a',), ('b',), ('c',), ('d',)],
510
'as-requested', False)]
511
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
512
keys = [record.key for record in vf.get_record_stream(
513
[('b',), ('a',), ('d',), ('c',)],
514
'as-requested', False)]
515
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
517
# It should work even after being repacked into another VF
518
vf2 = self.make_test_vf(False, dir='target')
519
vf2.insert_record_stream(vf.get_record_stream(
520
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
523
keys = [record.key for record in vf2.get_record_stream(
524
[('a',), ('b',), ('c',), ('d',)],
525
'as-requested', False)]
526
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
527
keys = [record.key for record in vf2.get_record_stream(
528
[('b',), ('a',), ('d',), ('c',)],
529
'as-requested', False)]
530
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
532
def test_insert_record_stream_re_uses_blocks(self):
533
vf = self.make_test_vf(True, dir='source')
534
def grouped_stream(revision_ids, first_parents=()):
535
parents = first_parents
536
for revision_id in revision_ids:
538
record = versionedfile.FulltextContentFactory(
540
'some content that is\n'
541
'identical except for\n'
542
'revision_id:%s\n' % (revision_id,))
546
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
548
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
549
first_parents=(('d',),)))
551
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
554
for record in stream:
555
if record.key in [('a',), ('e',)]:
556
self.assertEqual('groupcompress-block', record.storage_kind)
558
self.assertEqual('groupcompress-block-ref',
560
block_bytes[record.key] = record._manager._block._z_content
562
self.assertEqual(8, num_records)
565
self.assertIs(block_bytes[key], block_bytes[('a',)])
566
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
569
self.assertIs(block_bytes[key], block_bytes[('e',)])
570
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
571
# Now copy the blocks into another vf, and ensure that the blocks are
572
# preserved without creating new entries
573
vf2 = self.make_test_vf(True, dir='target')
574
# ordering in 'groupcompress' order, should actually swap the groups in
575
# the target vf, but the groups themselves should not be disturbed.
576
vf2.insert_record_stream(vf.get_record_stream(
577
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
578
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
579
'groupcompress', False)
582
for record in stream:
584
self.assertEqual(block_bytes[record.key],
585
record._manager._block._z_content)
586
self.assertEqual(8, num_records)
588
def test__insert_record_stream_no_reuse_block(self):
589
vf = self.make_test_vf(True, dir='source')
590
def grouped_stream(revision_ids, first_parents=()):
591
parents = first_parents
592
for revision_id in revision_ids:
594
record = versionedfile.FulltextContentFactory(
596
'some content that is\n'
597
'identical except for\n'
598
'revision_id:%s\n' % (revision_id,))
602
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
604
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
605
first_parents=(('d',),)))
607
self.assertEqual(8, len(list(vf.get_record_stream(
608
[(r,) for r in 'abcdefgh'],
609
'unordered', False))))
610
# Now copy the blocks into another vf, and ensure that the blocks are
611
# preserved without creating new entries
612
vf2 = self.make_test_vf(True, dir='target')
613
# ordering in 'groupcompress' order, should actually swap the groups in
614
# the target vf, but the groups themselves should not be disturbed.
615
list(vf2._insert_record_stream(vf.get_record_stream(
616
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
619
# After inserting with reuse_blocks=False, we should have everything in
620
# a single new block.
621
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
622
'groupcompress', False)
624
for record in stream:
626
block = record._manager._block
628
self.assertIs(block, record._manager._block)
630
def test_add_missing_noncompression_parent_unvalidated_index(self):
631
unvalidated = self.make_g_index_missing_parent()
632
combined = _mod_index.CombinedGraphIndex([unvalidated])
633
index = groupcompress._GCGraphIndex(combined,
634
is_locked=lambda: True, parents=True,
635
track_external_parent_refs=True)
636
index.scan_unvalidated_index(unvalidated)
638
frozenset([('missing-parent',)]), index.get_missing_parents())
640
def test_track_external_parent_refs(self):
641
g_index = self.make_g_index('empty', 1, [])
642
mod_index = btree_index.BTreeBuilder(1, 1)
643
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
644
index = groupcompress._GCGraphIndex(combined,
645
is_locked=lambda: True, parents=True,
646
add_callback=mod_index.add_nodes,
647
track_external_parent_refs=True)
649
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
651
frozenset([('parent-1',), ('parent-2',)]),
652
index.get_missing_parents())
654
def make_source_with_b(self, a_parent, path):
655
source = self.make_test_vf(True, dir=path)
656
source.add_lines(('a',), (), ['lines\n'])
658
b_parents = (('a',),)
661
source.add_lines(('b',), b_parents, ['lines\n'])
664
def do_inconsistent_inserts(self, inconsistency_fatal):
665
target = self.make_test_vf(True, dir='target',
666
inconsistency_fatal=inconsistency_fatal)
668
source = self.make_source_with_b(x==1, 'source%s' % x)
669
target.insert_record_stream(source.get_record_stream(
670
[('b',)], 'unordered', False))
672
def test_inconsistent_redundant_inserts_warn(self):
673
"""Should not insert a record that is already present."""
675
def warning(template, args):
676
warnings.append(template % args)
677
_trace_warning = trace.warning
678
trace.warning = warning
680
self.do_inconsistent_inserts(inconsistency_fatal=False)
682
trace.warning = _trace_warning
683
self.assertEqual(["inconsistent details in skipped record: ('b',)"
684
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
687
def test_inconsistent_redundant_inserts_raises(self):
688
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
689
inconsistency_fatal=True)
690
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
692
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
693
" 0 8', \(\(\('a',\),\),\)\)")
696
class TestLazyGroupCompress(tests.TestCaseWithTransport):
699
('key1',): "this is a text\n"
700
"with a reasonable amount of compressible bytes\n",
701
('key2',): "another text\n"
702
"with a reasonable amount of compressible bytes\n",
703
('key3',): "yet another text which won't be extracted\n"
704
"with a reasonable amount of compressible bytes\n",
705
('key4',): "this will be extracted\n"
706
"but references most of its bytes from\n"
707
"yet another text which won't be extracted\n"
708
"with a reasonable amount of compressible bytes\n",
710
def make_block(self, key_to_text):
711
"""Create a GroupCompressBlock, filling it with the given texts."""
712
compressor = groupcompress.GroupCompressor()
714
for key in sorted(key_to_text):
715
compressor.compress(key, key_to_text[key], None)
716
locs = dict((key, (start, end)) for key, (start, _, end, _)
717
in compressor.labels_deltas.iteritems())
718
block = compressor.flush()
719
raw_bytes = block.to_bytes()
720
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
722
def add_key_to_manager(self, key, locations, block, manager):
723
start, end = locations[key]
724
manager.add_factory(key, (), start, end)
726
def test_get_fulltexts(self):
727
locations, block = self.make_block(self._texts)
728
manager = groupcompress._LazyGroupContentManager(block)
729
self.add_key_to_manager(('key1',), locations, block, manager)
730
self.add_key_to_manager(('key2',), locations, block, manager)
732
for record in manager.get_record_stream():
733
result_order.append(record.key)
734
text = self._texts[record.key]
735
self.assertEqual(text, record.get_bytes_as('fulltext'))
736
self.assertEqual([('key1',), ('key2',)], result_order)
738
# If we build the manager in the opposite order, we should get them
739
# back in the opposite order
740
manager = groupcompress._LazyGroupContentManager(block)
741
self.add_key_to_manager(('key2',), locations, block, manager)
742
self.add_key_to_manager(('key1',), locations, block, manager)
744
for record in manager.get_record_stream():
745
result_order.append(record.key)
746
text = self._texts[record.key]
747
self.assertEqual(text, record.get_bytes_as('fulltext'))
748
self.assertEqual([('key2',), ('key1',)], result_order)
750
def test__wire_bytes_no_keys(self):
751
locations, block = self.make_block(self._texts)
752
manager = groupcompress._LazyGroupContentManager(block)
753
wire_bytes = manager._wire_bytes()
754
block_length = len(block.to_bytes())
755
# We should have triggered a strip, since we aren't using any content
756
stripped_block = manager._block.to_bytes()
757
self.assertTrue(block_length > len(stripped_block))
758
empty_z_header = zlib.compress('')
759
self.assertEqual('groupcompress-block\n'
760
'8\n' # len(compress(''))
762
'%d\n'# compressed block len
765
% (len(stripped_block), empty_z_header,
769
def test__wire_bytes(self):
770
locations, block = self.make_block(self._texts)
771
manager = groupcompress._LazyGroupContentManager(block)
772
self.add_key_to_manager(('key1',), locations, block, manager)
773
self.add_key_to_manager(('key4',), locations, block, manager)
774
block_bytes = block.to_bytes()
775
wire_bytes = manager._wire_bytes()
776
(storage_kind, z_header_len, header_len,
777
block_len, rest) = wire_bytes.split('\n', 4)
778
z_header_len = int(z_header_len)
779
header_len = int(header_len)
780
block_len = int(block_len)
781
self.assertEqual('groupcompress-block', storage_kind)
782
self.assertEqual(33, z_header_len)
783
self.assertEqual(25, header_len)
784
self.assertEqual(len(block_bytes), block_len)
785
z_header = rest[:z_header_len]
786
header = zlib.decompress(z_header)
787
self.assertEqual(header_len, len(header))
788
entry1 = locations[('key1',)]
789
entry4 = locations[('key4',)]
790
self.assertEqualDiff('key1\n'
792
'%d\n' # start offset
798
% (entry1[0], entry1[1],
799
entry4[0], entry4[1]),
801
z_block = rest[z_header_len:]
802
self.assertEqual(block_bytes, z_block)
804
def test_from_bytes(self):
805
locations, block = self.make_block(self._texts)
806
manager = groupcompress._LazyGroupContentManager(block)
807
self.add_key_to_manager(('key1',), locations, block, manager)
808
self.add_key_to_manager(('key4',), locations, block, manager)
809
wire_bytes = manager._wire_bytes()
810
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
811
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
812
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
813
self.assertEqual(2, len(manager._factories))
814
self.assertEqual(block._z_content, manager._block._z_content)
816
for record in manager.get_record_stream():
817
result_order.append(record.key)
818
text = self._texts[record.key]
819
self.assertEqual(text, record.get_bytes_as('fulltext'))
820
self.assertEqual([('key1',), ('key4',)], result_order)
822
def test__check_rebuild_no_changes(self):
823
locations, block = self.make_block(self._texts)
824
manager = groupcompress._LazyGroupContentManager(block)
825
# Request all the keys, which ensures that we won't rebuild
826
self.add_key_to_manager(('key1',), locations, block, manager)
827
self.add_key_to_manager(('key2',), locations, block, manager)
828
self.add_key_to_manager(('key3',), locations, block, manager)
829
self.add_key_to_manager(('key4',), locations, block, manager)
830
manager._check_rebuild_block()
831
self.assertIs(block, manager._block)
833
def test__check_rebuild_only_one(self):
834
locations, block = self.make_block(self._texts)
835
manager = groupcompress._LazyGroupContentManager(block)
836
# Request just the first key, which should trigger a 'strip' action
837
self.add_key_to_manager(('key1',), locations, block, manager)
838
manager._check_rebuild_block()
839
self.assertIsNot(block, manager._block)
840
self.assertTrue(block._content_length > manager._block._content_length)
841
# We should be able to still get the content out of this block, though
842
# it should only have 1 entry
843
for record in manager.get_record_stream():
844
self.assertEqual(('key1',), record.key)
845
self.assertEqual(self._texts[record.key],
846
record.get_bytes_as('fulltext'))
848
def test__check_rebuild_middle(self):
849
locations, block = self.make_block(self._texts)
850
manager = groupcompress._LazyGroupContentManager(block)
851
# Request a small key in the middle should trigger a 'rebuild'
852
self.add_key_to_manager(('key4',), locations, block, manager)
853
manager._check_rebuild_block()
854
self.assertIsNot(block, manager._block)
855
self.assertTrue(block._content_length > manager._block._content_length)
856
for record in manager.get_record_stream():
857
self.assertEqual(('key4',), record.key)
858
self.assertEqual(self._texts[record.key],
859
record.get_bytes_as('fulltext'))