1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for group compression."""
30
from bzrlib.osutils import sha_string
31
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
34
def load_tests(standard_tests, module, loader):
35
"""Parameterize tests for all versions of groupcompress."""
36
to_adapt, result = tests.split_suite_by_condition(
37
standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
39
('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
if CompiledGroupCompressFeature.available():
42
scenarios.append(('C',
43
{'compressor': groupcompress.PyrexGroupCompressor}))
44
return tests.multiply_tests(to_adapt, scenarios, result)
47
class TestGroupCompressor(tests.TestCase):
49
def _chunks_to_repr_lines(self, chunks):
50
return '\n'.join(map(repr, ''.join(chunks).split('\n')))
52
def assertEqualDiffEncoded(self, expected, actual):
53
"""Compare the actual content to the expected content.
55
:param expected: A group of chunks that we expect to see
56
:param actual: The measured 'chunks'
58
We will transform the chunks back into lines, and then run 'repr()'
59
over them to handle non-ascii characters.
61
self.assertEqualDiff(self._chunks_to_repr_lines(expected),
62
self._chunks_to_repr_lines(actual))
65
class TestAllGroupCompressors(TestGroupCompressor):
66
"""Tests for GroupCompressor"""
68
compressor = None # Set by multiply_tests
70
def test_empty_delta(self):
71
compressor = self.compressor()
72
self.assertEqual([], compressor.chunks)
74
def test_one_nosha_delta(self):
76
compressor = self.compressor()
77
sha1, start_point, end_point, _ = compressor.compress(('label',),
78
'strange\ncommon\n', None)
79
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
80
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
81
self.assertEqual(expected_lines, ''.join(compressor.chunks))
82
self.assertEqual(0, start_point)
83
self.assertEqual(sum(map(len, expected_lines)), end_point)
85
def test_empty_content(self):
86
compressor = self.compressor()
87
# Adding empty bytes should return the 'null' record
88
sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
self.assertEqual(0, start_point)
91
self.assertEqual(0, end_point)
92
self.assertEqual('fulltext', kind)
93
self.assertEqual(groupcompress._null_sha1, sha1)
94
self.assertEqual(0, compressor.endpoint)
95
self.assertEqual([], compressor.chunks)
96
# Even after adding some content
97
compressor.compress(('content',), 'some\nbytes\n', None)
98
self.assertTrue(compressor.endpoint > 0)
99
sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
self.assertEqual(0, start_point)
102
self.assertEqual(0, end_point)
103
self.assertEqual('fulltext', kind)
104
self.assertEqual(groupcompress._null_sha1, sha1)
106
def test_extract_from_compressor(self):
107
# Knit fetching will try to reconstruct texts locally which results in
108
# reading something that is in the compressor stream already.
109
compressor = self.compressor()
110
sha1_1, _, _, _ = compressor.compress(('label',),
111
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
112
expected_lines = list(compressor.chunks)
113
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
114
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
self.assertEqual(('strange\ncommon long line\n'
117
'that needs a 16 byte match\n', sha1_1),
118
compressor.extract(('label',)))
120
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
121
'different\n', sha1_2),
122
compressor.extract(('newlabel',)))
124
def test_pop_last(self):
125
compressor = self.compressor()
126
_, _, _, _ = compressor.compress(('key1',),
127
'some text\nfor the first entry\n', None)
128
expected_lines = list(compressor.chunks)
129
_, _, _, _ = compressor.compress(('key2',),
130
'some text\nfor the second entry\n', None)
131
compressor.pop_last()
132
self.assertEqual(expected_lines, compressor.chunks)
135
class TestPyrexGroupCompressor(TestGroupCompressor):
137
_test_needs_features = [CompiledGroupCompressFeature]
138
compressor = groupcompress.PyrexGroupCompressor
140
def test_stats(self):
141
compressor = self.compressor()
142
compressor.compress(('label',),
144
'common very very long line\n'
145
'plus more text\n', None)
146
compressor.compress(('newlabel',),
147
'common very very long line\n'
150
'moredifferent\n', None)
151
compressor.compress(('label3',),
153
'common very very long line\n'
156
'moredifferent\n', None)
157
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
159
def test_two_nosha_delta(self):
160
compressor = self.compressor()
161
sha1_1, _, _, _ = compressor.compress(('label',),
162
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
163
expected_lines = list(compressor.chunks)
164
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
165
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
166
self.assertEqual(sha_string('common long line\n'
167
'that needs a 16 byte match\n'
168
'different\n'), sha1_2)
169
expected_lines.extend([
170
# 'delta', delta length
172
# source and target length
174
# copy the line common
175
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
176
# add the line different, and the trailing newline
177
'\x0adifferent\n', # insert 10 bytes
179
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
180
self.assertEqual(sum(map(len, expected_lines)), end_point)
182
def test_three_nosha_delta(self):
183
# The first interesting test: make a change that should use lines from
185
compressor = self.compressor()
186
sha1_1, _, _, _ = compressor.compress(('label',),
187
'strange\ncommon very very long line\nwith some extra text\n', None)
188
sha1_2, _, _, _ = compressor.compress(('newlabel',),
189
'different\nmoredifferent\nand then some more\n', None)
190
expected_lines = list(compressor.chunks)
191
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
192
'new\ncommon very very long line\nwith some extra text\n'
193
'different\nmoredifferent\nand then some more\n',
196
sha_string('new\ncommon very very long line\nwith some extra text\n'
197
'different\nmoredifferent\nand then some more\n'),
199
expected_lines.extend([
200
# 'delta', delta length
202
# source and target length
206
# Copy of first parent 'common' range
207
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
208
# Copy of second parent 'different' range
209
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
211
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
212
self.assertEqual(sum(map(len, expected_lines)), end_point)
215
class TestPythonGroupCompressor(TestGroupCompressor):
217
compressor = groupcompress.PythonGroupCompressor
219
def test_stats(self):
220
compressor = self.compressor()
221
compressor.compress(('label',),
223
'common very very long line\n'
224
'plus more text\n', None)
225
compressor.compress(('newlabel',),
226
'common very very long line\n'
229
'moredifferent\n', None)
230
compressor.compress(('label3',),
232
'common very very long line\n'
235
'moredifferent\n', None)
236
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
238
def test_two_nosha_delta(self):
239
compressor = self.compressor()
240
sha1_1, _, _, _ = compressor.compress(('label',),
241
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
242
expected_lines = list(compressor.chunks)
243
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
244
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
245
self.assertEqual(sha_string('common long line\n'
246
'that needs a 16 byte match\n'
247
'different\n'), sha1_2)
248
expected_lines.extend([
249
# 'delta', delta length
253
# copy the line common
254
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
255
# add the line different, and the trailing newline
256
'\x0adifferent\n', # insert 10 bytes
258
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
259
self.assertEqual(sum(map(len, expected_lines)), end_point)
261
def test_three_nosha_delta(self):
262
# The first interesting test: make a change that should use lines from
264
compressor = self.compressor()
265
sha1_1, _, _, _ = compressor.compress(('label',),
266
'strange\ncommon very very long line\nwith some extra text\n', None)
267
sha1_2, _, _, _ = compressor.compress(('newlabel',),
268
'different\nmoredifferent\nand then some more\n', None)
269
expected_lines = list(compressor.chunks)
270
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
271
'new\ncommon very very long line\nwith some extra text\n'
272
'different\nmoredifferent\nand then some more\n',
275
sha_string('new\ncommon very very long line\nwith some extra text\n'
276
'different\nmoredifferent\nand then some more\n'),
278
expected_lines.extend([
279
# 'delta', delta length
285
# Copy of first parent 'common' range
286
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
287
# Copy of second parent 'different' range
288
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
290
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
291
self.assertEqual(sum(map(len, expected_lines)), end_point)
294
class TestGroupCompressBlock(tests.TestCase):
296
def make_block(self, key_to_text):
297
"""Create a GroupCompressBlock, filling it with the given texts."""
298
compressor = groupcompress.GroupCompressor()
300
for key in sorted(key_to_text):
301
compressor.compress(key, key_to_text[key], None)
302
locs = dict((key, (start, end)) for key, (start, _, end, _)
303
in compressor.labels_deltas.iteritems())
304
block = compressor.flush()
305
raw_bytes = block.to_bytes()
306
# Go through from_bytes(to_bytes()) so that we start with a compressed
308
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
310
def test_from_empty_bytes(self):
311
self.assertRaises(ValueError,
312
groupcompress.GroupCompressBlock.from_bytes, '')
314
def test_from_minimal_bytes(self):
315
block = groupcompress.GroupCompressBlock.from_bytes(
317
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
318
self.assertIs(None, block._content)
319
self.assertEqual('', block._z_content)
320
block._ensure_content()
321
self.assertEqual('', block._content)
322
self.assertEqual('', block._z_content)
323
block._ensure_content() # Ensure content is safe to call 2x
325
def test_from_invalid(self):
326
self.assertRaises(ValueError,
327
groupcompress.GroupCompressBlock.from_bytes,
328
'this is not a valid header')
330
def test_from_bytes(self):
331
content = ('a tiny bit of content\n')
332
z_content = zlib.compress(content)
334
'gcb1z\n' # group compress block v1 plain
335
'%d\n' # Length of compressed content
336
'%d\n' # Length of uncompressed content
337
'%s' # Compressed content
338
) % (len(z_content), len(content), z_content)
339
block = groupcompress.GroupCompressBlock.from_bytes(
341
self.assertEqual(z_content, block._z_content)
342
self.assertIs(None, block._content)
343
self.assertEqual(len(z_content), block._z_content_length)
344
self.assertEqual(len(content), block._content_length)
345
block._ensure_content()
346
self.assertEqual(z_content, block._z_content)
347
self.assertEqual(content, block._content)
349
def test_to_bytes(self):
350
content = ('this is some content\n'
351
'this content will be compressed\n')
352
gcb = groupcompress.GroupCompressBlock()
353
gcb.set_content(content)
354
bytes = gcb.to_bytes()
355
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
356
self.assertEqual(gcb._content_length, len(content))
357
expected_header =('gcb1z\n' # group compress block v1 zlib
358
'%d\n' # Length of compressed content
359
'%d\n' # Length of uncompressed content
360
) % (gcb._z_content_length, gcb._content_length)
361
self.assertStartsWith(bytes, expected_header)
362
remaining_bytes = bytes[len(expected_header):]
363
raw_bytes = zlib.decompress(remaining_bytes)
364
self.assertEqual(content, raw_bytes)
366
# we should get the same results if using the chunked version
367
gcb = groupcompress.GroupCompressBlock()
368
gcb.set_chunked_content(['this is some content\n'
369
'this content will be compressed\n'],
372
bytes = gcb.to_bytes()
373
self.assertEqual(old_bytes, bytes)
375
def test_partial_decomp(self):
377
# We need a sufficient amount of data so that zlib.decompress has
378
# partial decompression to work with. Most auto-generated data
379
# compresses a bit too well, we want a combination, so we combine a sha
380
# hash with compressible data.
381
for i in xrange(2048):
382
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
383
content_chunks.append(next_content)
384
next_sha1 = osutils.sha_string(next_content)
385
content_chunks.append(next_sha1 + '\n')
386
content = ''.join(content_chunks)
387
self.assertEqual(158634, len(content))
388
z_content = zlib.compress(content)
389
self.assertEqual(57182, len(z_content))
390
block = groupcompress.GroupCompressBlock()
391
block._z_content = z_content
392
block._z_content_length = len(z_content)
393
block._compressor_name = 'zlib'
394
block._content_length = 158634
395
self.assertIs(None, block._content)
396
block._ensure_content(100)
397
self.assertIsNot(None, block._content)
398
# We have decompressed at least 100 bytes
399
self.assertTrue(len(block._content) >= 100)
400
# We have not decompressed the whole content
401
self.assertTrue(len(block._content) < 158634)
402
self.assertEqualDiff(content[:len(block._content)], block._content)
403
# ensuring content that we already have shouldn't cause any more data
405
cur_len = len(block._content)
406
block._ensure_content(cur_len - 10)
407
self.assertEqual(cur_len, len(block._content))
408
# Now we want a bit more content
410
block._ensure_content(cur_len)
411
self.assertTrue(len(block._content) >= cur_len)
412
self.assertTrue(len(block._content) < 158634)
413
self.assertEqualDiff(content[:len(block._content)], block._content)
414
# And now lets finish
415
block._ensure_content(158634)
416
self.assertEqualDiff(content, block._content)
417
# And the decompressor is finalized
418
self.assertIs(None, block._z_content_decompressor)
420
def test_partial_decomp_no_known_length(self):
422
for i in xrange(2048):
423
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
424
content_chunks.append(next_content)
425
next_sha1 = osutils.sha_string(next_content)
426
content_chunks.append(next_sha1 + '\n')
427
content = ''.join(content_chunks)
428
self.assertEqual(158634, len(content))
429
z_content = zlib.compress(content)
430
self.assertEqual(57182, len(z_content))
431
block = groupcompress.GroupCompressBlock()
432
block._z_content = z_content
433
block._z_content_length = len(z_content)
434
block._compressor_name = 'zlib'
435
block._content_length = None # Don't tell the decompressed length
436
self.assertIs(None, block._content)
437
block._ensure_content(100)
438
self.assertIsNot(None, block._content)
439
# We have decompressed at least 100 bytes
440
self.assertTrue(len(block._content) >= 100)
441
# We have not decompressed the whole content
442
self.assertTrue(len(block._content) < 158634)
443
self.assertEqualDiff(content[:len(block._content)], block._content)
444
# ensuring content that we already have shouldn't cause any more data
446
cur_len = len(block._content)
447
block._ensure_content(cur_len - 10)
448
self.assertEqual(cur_len, len(block._content))
449
# Now we want a bit more content
451
block._ensure_content(cur_len)
452
self.assertTrue(len(block._content) >= cur_len)
453
self.assertTrue(len(block._content) < 158634)
454
self.assertEqualDiff(content[:len(block._content)], block._content)
455
# And now lets finish
456
block._ensure_content()
457
self.assertEqualDiff(content, block._content)
458
# And the decompressor is finalized
459
self.assertIs(None, block._z_content_decompressor)
461
def test__dump(self):
462
dup_content = 'some duplicate content\nwhich is sufficiently long\n'
463
key_to_text = {('1',): dup_content + '1 unique\n',
464
('2',): dup_content + '2 extra special\n'}
465
locs, block = self.make_block(key_to_text)
466
self.assertEqual([('f', len(key_to_text[('1',)])),
467
('d', 21, len(key_to_text[('2',)]),
468
[('c', 2, len(dup_content)),
469
('i', len('2 extra special\n'), '')
474
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
476
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
478
t = self.get_transport(dir)
480
vf = groupcompress.make_pack_factory(graph=create_graph,
481
delta=False, keylength=keylength)(t)
483
self.addCleanup(groupcompress.cleanup_pack_group, vf)
487
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
489
def make_g_index(self, name, ref_lists=0, nodes=[]):
490
builder = btree_index.BTreeBuilder(ref_lists)
491
for node, references, value in nodes:
492
builder.add_node(node, references, value)
493
stream = builder.finish()
494
trans = self.get_transport()
495
size = trans.put_file(name, stream)
496
return btree_index.BTreeGraphIndex(trans, name, size)
498
def make_g_index_missing_parent(self):
499
graph_index = self.make_g_index('missing_parent', 1,
500
[(('parent', ), '2 78 2 10', ([],)),
501
(('tip', ), '2 78 2 10',
502
([('parent', ), ('missing-parent', )],)),
506
def test_get_record_stream_as_requested(self):
507
# Consider promoting 'as-requested' to general availability, and
508
# make this a VF interface test
509
vf = self.make_test_vf(False, dir='source')
510
vf.add_lines(('a',), (), ['lines\n'])
511
vf.add_lines(('b',), (), ['lines\n'])
512
vf.add_lines(('c',), (), ['lines\n'])
513
vf.add_lines(('d',), (), ['lines\n'])
515
keys = [record.key for record in vf.get_record_stream(
516
[('a',), ('b',), ('c',), ('d',)],
517
'as-requested', False)]
518
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
519
keys = [record.key for record in vf.get_record_stream(
520
[('b',), ('a',), ('d',), ('c',)],
521
'as-requested', False)]
522
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
524
# It should work even after being repacked into another VF
525
vf2 = self.make_test_vf(False, dir='target')
526
vf2.insert_record_stream(vf.get_record_stream(
527
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
530
keys = [record.key for record in vf2.get_record_stream(
531
[('a',), ('b',), ('c',), ('d',)],
532
'as-requested', False)]
533
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
534
keys = [record.key for record in vf2.get_record_stream(
535
[('b',), ('a',), ('d',), ('c',)],
536
'as-requested', False)]
537
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
539
def test_insert_record_stream_re_uses_blocks(self):
540
vf = self.make_test_vf(True, dir='source')
541
def grouped_stream(revision_ids, first_parents=()):
542
parents = first_parents
543
for revision_id in revision_ids:
545
record = versionedfile.FulltextContentFactory(
547
'some content that is\n'
548
'identical except for\n'
549
'revision_id:%s\n' % (revision_id,))
553
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
555
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
556
first_parents=(('d',),)))
558
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
561
for record in stream:
562
if record.key in [('a',), ('e',)]:
563
self.assertEqual('groupcompress-block', record.storage_kind)
565
self.assertEqual('groupcompress-block-ref',
567
block_bytes[record.key] = record._manager._block._z_content
569
self.assertEqual(8, num_records)
572
self.assertIs(block_bytes[key], block_bytes[('a',)])
573
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
576
self.assertIs(block_bytes[key], block_bytes[('e',)])
577
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
578
# Now copy the blocks into another vf, and ensure that the blocks are
579
# preserved without creating new entries
580
vf2 = self.make_test_vf(True, dir='target')
581
# ordering in 'groupcompress' order, should actually swap the groups in
582
# the target vf, but the groups themselves should not be disturbed.
583
vf2.insert_record_stream(vf.get_record_stream(
584
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
585
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
586
'groupcompress', False)
589
for record in stream:
591
self.assertEqual(block_bytes[record.key],
592
record._manager._block._z_content)
593
self.assertEqual(8, num_records)
595
def test__insert_record_stream_no_reuse_block(self):
596
vf = self.make_test_vf(True, dir='source')
597
def grouped_stream(revision_ids, first_parents=()):
598
parents = first_parents
599
for revision_id in revision_ids:
601
record = versionedfile.FulltextContentFactory(
603
'some content that is\n'
604
'identical except for\n'
605
'revision_id:%s\n' % (revision_id,))
609
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
611
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
612
first_parents=(('d',),)))
614
self.assertEqual(8, len(list(vf.get_record_stream(
615
[(r,) for r in 'abcdefgh'],
616
'unordered', False))))
617
# Now copy the blocks into another vf, and ensure that the blocks are
618
# preserved without creating new entries
619
vf2 = self.make_test_vf(True, dir='target')
620
# ordering in 'groupcompress' order, should actually swap the groups in
621
# the target vf, but the groups themselves should not be disturbed.
622
list(vf2._insert_record_stream(vf.get_record_stream(
623
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
626
# After inserting with reuse_blocks=False, we should have everything in
627
# a single new block.
628
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
629
'groupcompress', False)
631
for record in stream:
633
block = record._manager._block
635
self.assertIs(block, record._manager._block)
637
def test_add_missing_noncompression_parent_unvalidated_index(self):
638
unvalidated = self.make_g_index_missing_parent()
639
combined = _mod_index.CombinedGraphIndex([unvalidated])
640
index = groupcompress._GCGraphIndex(combined,
641
is_locked=lambda: True, parents=True,
642
track_external_parent_refs=True)
643
index.scan_unvalidated_index(unvalidated)
645
frozenset([('missing-parent',)]), index.get_missing_parents())
647
def test_track_external_parent_refs(self):
648
g_index = self.make_g_index('empty', 1, [])
649
mod_index = btree_index.BTreeBuilder(1, 1)
650
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
651
index = groupcompress._GCGraphIndex(combined,
652
is_locked=lambda: True, parents=True,
653
add_callback=mod_index.add_nodes,
654
track_external_parent_refs=True)
656
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
658
frozenset([('parent-1',), ('parent-2',)]),
659
index.get_missing_parents())
662
class TestLazyGroupCompress(tests.TestCaseWithTransport):
665
('key1',): "this is a text\n"
666
"with a reasonable amount of compressible bytes\n",
667
('key2',): "another text\n"
668
"with a reasonable amount of compressible bytes\n",
669
('key3',): "yet another text which won't be extracted\n"
670
"with a reasonable amount of compressible bytes\n",
671
('key4',): "this will be extracted\n"
672
"but references most of its bytes from\n"
673
"yet another text which won't be extracted\n"
674
"with a reasonable amount of compressible bytes\n",
676
def make_block(self, key_to_text):
677
"""Create a GroupCompressBlock, filling it with the given texts."""
678
compressor = groupcompress.GroupCompressor()
680
for key in sorted(key_to_text):
681
compressor.compress(key, key_to_text[key], None)
682
locs = dict((key, (start, end)) for key, (start, _, end, _)
683
in compressor.labels_deltas.iteritems())
684
block = compressor.flush()
685
raw_bytes = block.to_bytes()
686
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
688
def add_key_to_manager(self, key, locations, block, manager):
689
start, end = locations[key]
690
manager.add_factory(key, (), start, end)
692
def test_get_fulltexts(self):
693
locations, block = self.make_block(self._texts)
694
manager = groupcompress._LazyGroupContentManager(block)
695
self.add_key_to_manager(('key1',), locations, block, manager)
696
self.add_key_to_manager(('key2',), locations, block, manager)
698
for record in manager.get_record_stream():
699
result_order.append(record.key)
700
text = self._texts[record.key]
701
self.assertEqual(text, record.get_bytes_as('fulltext'))
702
self.assertEqual([('key1',), ('key2',)], result_order)
704
# If we build the manager in the opposite order, we should get them
705
# back in the opposite order
706
manager = groupcompress._LazyGroupContentManager(block)
707
self.add_key_to_manager(('key2',), locations, block, manager)
708
self.add_key_to_manager(('key1',), locations, block, manager)
710
for record in manager.get_record_stream():
711
result_order.append(record.key)
712
text = self._texts[record.key]
713
self.assertEqual(text, record.get_bytes_as('fulltext'))
714
self.assertEqual([('key2',), ('key1',)], result_order)
716
def test__wire_bytes_no_keys(self):
717
locations, block = self.make_block(self._texts)
718
manager = groupcompress._LazyGroupContentManager(block)
719
wire_bytes = manager._wire_bytes()
720
block_length = len(block.to_bytes())
721
# We should have triggered a strip, since we aren't using any content
722
stripped_block = manager._block.to_bytes()
723
self.assertTrue(block_length > len(stripped_block))
724
empty_z_header = zlib.compress('')
725
self.assertEqual('groupcompress-block\n'
726
'8\n' # len(compress(''))
728
'%d\n'# compressed block len
731
% (len(stripped_block), empty_z_header,
735
def test__wire_bytes(self):
736
locations, block = self.make_block(self._texts)
737
manager = groupcompress._LazyGroupContentManager(block)
738
self.add_key_to_manager(('key1',), locations, block, manager)
739
self.add_key_to_manager(('key4',), locations, block, manager)
740
block_bytes = block.to_bytes()
741
wire_bytes = manager._wire_bytes()
742
(storage_kind, z_header_len, header_len,
743
block_len, rest) = wire_bytes.split('\n', 4)
744
z_header_len = int(z_header_len)
745
header_len = int(header_len)
746
block_len = int(block_len)
747
self.assertEqual('groupcompress-block', storage_kind)
748
self.assertEqual(33, z_header_len)
749
self.assertEqual(25, header_len)
750
self.assertEqual(len(block_bytes), block_len)
751
z_header = rest[:z_header_len]
752
header = zlib.decompress(z_header)
753
self.assertEqual(header_len, len(header))
754
entry1 = locations[('key1',)]
755
entry4 = locations[('key4',)]
756
self.assertEqualDiff('key1\n'
758
'%d\n' # start offset
764
% (entry1[0], entry1[1],
765
entry4[0], entry4[1]),
767
z_block = rest[z_header_len:]
768
self.assertEqual(block_bytes, z_block)
770
def test_from_bytes(self):
771
locations, block = self.make_block(self._texts)
772
manager = groupcompress._LazyGroupContentManager(block)
773
self.add_key_to_manager(('key1',), locations, block, manager)
774
self.add_key_to_manager(('key4',), locations, block, manager)
775
wire_bytes = manager._wire_bytes()
776
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
777
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
778
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
779
self.assertEqual(2, len(manager._factories))
780
self.assertEqual(block._z_content, manager._block._z_content)
782
for record in manager.get_record_stream():
783
result_order.append(record.key)
784
text = self._texts[record.key]
785
self.assertEqual(text, record.get_bytes_as('fulltext'))
786
self.assertEqual([('key1',), ('key4',)], result_order)
788
def test__check_rebuild_no_changes(self):
789
locations, block = self.make_block(self._texts)
790
manager = groupcompress._LazyGroupContentManager(block)
791
# Request all the keys, which ensures that we won't rebuild
792
self.add_key_to_manager(('key1',), locations, block, manager)
793
self.add_key_to_manager(('key2',), locations, block, manager)
794
self.add_key_to_manager(('key3',), locations, block, manager)
795
self.add_key_to_manager(('key4',), locations, block, manager)
796
manager._check_rebuild_block()
797
self.assertIs(block, manager._block)
799
def test__check_rebuild_only_one(self):
800
locations, block = self.make_block(self._texts)
801
manager = groupcompress._LazyGroupContentManager(block)
802
# Request just the first key, which should trigger a 'strip' action
803
self.add_key_to_manager(('key1',), locations, block, manager)
804
manager._check_rebuild_block()
805
self.assertIsNot(block, manager._block)
806
self.assertTrue(block._content_length > manager._block._content_length)
807
# We should be able to still get the content out of this block, though
808
# it should only have 1 entry
809
for record in manager.get_record_stream():
810
self.assertEqual(('key1',), record.key)
811
self.assertEqual(self._texts[record.key],
812
record.get_bytes_as('fulltext'))
814
def test__check_rebuild_middle(self):
815
locations, block = self.make_block(self._texts)
816
manager = groupcompress._LazyGroupContentManager(block)
817
# Request a small key in the middle should trigger a 'rebuild'
818
self.add_key_to_manager(('key4',), locations, block, manager)
819
manager._check_rebuild_block()
820
self.assertIsNot(block, manager._block)
821
self.assertTrue(block._content_length > manager._block._content_length)
822
for record in manager.get_record_stream():
823
self.assertEqual(('key4',), record.key)
824
self.assertEqual(self._texts[record.key],
825
record.get_bytes_as('fulltext'))