418
418
# And the decompressor is finalized
419
419
self.assertIs(None, block._z_content_decompressor)
421
def test_partial_decomp_no_known_length(self):
421
def test__ensure_all_content(self):
422
422
content_chunks = []
423
# We need a sufficient amount of data so that zlib.decompress has
424
# partial decompression to work with. Most auto-generated data
425
# compresses a bit too well, we want a combination, so we combine a sha
426
# hash with compressible data.
423
427
for i in xrange(2048):
424
428
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
425
429
content_chunks.append(next_content)
433
437
block._z_content = z_content
434
438
block._z_content_length = len(z_content)
435
439
block._compressor_name = 'zlib'
436
block._content_length = None # Don't tell the decompressed length
440
block._content_length = 158634
437
441
self.assertIs(None, block._content)
438
block._ensure_content(100)
439
self.assertIsNot(None, block._content)
440
# We have decompressed at least 100 bytes
441
self.assertTrue(len(block._content) >= 100)
442
# We have not decompressed the whole content
443
self.assertTrue(len(block._content) < 158634)
444
self.assertEqualDiff(content[:len(block._content)], block._content)
445
# ensuring content that we already have shouldn't cause any more data
447
cur_len = len(block._content)
448
block._ensure_content(cur_len - 10)
449
self.assertEqual(cur_len, len(block._content))
450
# Now we want a bit more content
452
block._ensure_content(cur_len)
453
self.assertTrue(len(block._content) >= cur_len)
454
self.assertTrue(len(block._content) < 158634)
455
self.assertEqualDiff(content[:len(block._content)], block._content)
456
# And now lets finish
457
block._ensure_content()
442
# The first _ensure_content got all of the required data
443
block._ensure_content(158634)
458
444
self.assertEqualDiff(content, block._content)
459
# And the decompressor is finalized
445
# And we should have released the _z_content_decompressor since it was
460
447
self.assertIs(None, block._z_content_decompressor)
462
449
def test__dump(self):
472
459
], block._dump())
475
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
462
class TestCaseWithGroupCompressVersionedFiles(
463
tests.TestCaseWithMemoryTransport):
477
465
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
478
466
dir='.', inconsistency_fatal=True):
538
526
'as-requested', False)]
539
527
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
541
def test_insert_record_stream_re_uses_blocks(self):
529
def test_insert_record_stream_reuses_blocks(self):
542
530
vf = self.make_test_vf(True, dir='source')
543
531
def grouped_stream(revision_ids, first_parents=()):
544
532
parents = first_parents
582
570
vf2 = self.make_test_vf(True, dir='target')
583
571
# ordering in 'groupcompress' order, should actually swap the groups in
584
572
# the target vf, but the groups themselves should not be disturbed.
585
vf2.insert_record_stream(vf.get_record_stream(
586
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
573
def small_size_stream():
574
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
'groupcompress', False):
576
record._manager._full_enough_block_size = \
577
record._manager._block._content_length
580
vf2.insert_record_stream(small_size_stream())
587
581
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
588
582
'groupcompress', False)
594
588
record._manager._block._z_content)
595
589
self.assertEqual(8, num_records)
591
def test_insert_record_stream_packs_on_the_fly(self):
592
vf = self.make_test_vf(True, dir='source')
593
def grouped_stream(revision_ids, first_parents=()):
594
parents = first_parents
595
for revision_id in revision_ids:
597
record = versionedfile.FulltextContentFactory(
599
'some content that is\n'
600
'identical except for\n'
601
'revision_id:%s\n' % (revision_id,))
605
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
607
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
first_parents=(('d',),)))
609
# Now copy the blocks into another vf, and see that the
610
# insert_record_stream rebuilt a new block on-the-fly because of
612
vf2 = self.make_test_vf(True, dir='target')
613
vf2.insert_record_stream(vf.get_record_stream(
614
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
'groupcompress', False)
619
# All of the records should be recombined into a single block
621
for record in stream:
624
block = record._manager._block
626
self.assertIs(block, record._manager._block)
627
self.assertEqual(8, num_records)
597
629
def test__insert_record_stream_no_reuse_block(self):
598
630
vf = self.make_test_vf(True, dir='source')
599
631
def grouped_stream(revision_ids, first_parents=()):
701
733
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
702
734
" 0 8', \(\(\('a',\),\),\)\)")
736
def test_clear_cache(self):
737
vf = self.make_source_with_b(True, 'source')
739
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
742
self.assertTrue(len(vf._group_cache) > 0)
744
self.assertEqual(0, len(vf._group_cache))
705
748
class StubGCVF(object):
706
749
def __init__(self, canned_get_blocks=None):
813
856
('key1',): "this is a text\n"
814
"with a reasonable amount of compressible bytes\n",
857
"with a reasonable amount of compressible bytes\n"
858
"which can be shared between various other texts\n",
815
859
('key2',): "another text\n"
816
"with a reasonable amount of compressible bytes\n",
860
"with a reasonable amount of compressible bytes\n"
861
"which can be shared between various other texts\n",
817
862
('key3',): "yet another text which won't be extracted\n"
818
"with a reasonable amount of compressible bytes\n",
863
"with a reasonable amount of compressible bytes\n"
864
"which can be shared between various other texts\n",
819
865
('key4',): "this will be extracted\n"
820
866
"but references most of its bytes from\n"
821
867
"yet another text which won't be extracted\n"
822
"with a reasonable amount of compressible bytes\n",
868
"with a reasonable amount of compressible bytes\n"
869
"which can be shared between various other texts\n",
824
871
def make_block(self, key_to_text):
825
872
"""Create a GroupCompressBlock, filling it with the given texts."""
837
884
start, end = locations[key]
838
885
manager.add_factory(key, (), start, end)
887
def make_block_and_full_manager(self, texts):
888
locations, block = self.make_block(texts)
889
manager = groupcompress._LazyGroupContentManager(block)
890
for key in sorted(texts):
891
self.add_key_to_manager(key, locations, block, manager)
892
return block, manager
840
894
def test_get_fulltexts(self):
841
895
locations, block = self.make_block(self._texts)
842
896
manager = groupcompress._LazyGroupContentManager(block)
893
947
header_len = int(header_len)
894
948
block_len = int(block_len)
895
949
self.assertEqual('groupcompress-block', storage_kind)
896
self.assertEqual(33, z_header_len)
897
self.assertEqual(25, header_len)
950
self.assertEqual(34, z_header_len)
951
self.assertEqual(26, header_len)
898
952
self.assertEqual(len(block_bytes), block_len)
899
953
z_header = rest[:z_header_len]
900
954
header = zlib.decompress(z_header)
934
988
self.assertEqual([('key1',), ('key4',)], result_order)
936
990
def test__check_rebuild_no_changes(self):
937
locations, block = self.make_block(self._texts)
938
manager = groupcompress._LazyGroupContentManager(block)
939
# Request all the keys, which ensures that we won't rebuild
940
self.add_key_to_manager(('key1',), locations, block, manager)
941
self.add_key_to_manager(('key2',), locations, block, manager)
942
self.add_key_to_manager(('key3',), locations, block, manager)
943
self.add_key_to_manager(('key4',), locations, block, manager)
991
block, manager = self.make_block_and_full_manager(self._texts)
944
992
manager._check_rebuild_block()
945
993
self.assertIs(block, manager._block)
971
1019
self.assertEqual(('key4',), record.key)
972
1020
self.assertEqual(self._texts[record.key],
973
1021
record.get_bytes_as('fulltext'))
1023
def test_check_is_well_utilized_all_keys(self):
1024
block, manager = self.make_block_and_full_manager(self._texts)
1025
self.assertFalse(manager.check_is_well_utilized())
1026
# Though we can fake it by changing the recommended minimum size
1027
manager._full_enough_block_size = block._content_length
1028
self.assertTrue(manager.check_is_well_utilized())
1029
# Setting it just above causes it to fail
1030
manager._full_enough_block_size = block._content_length + 1
1031
self.assertFalse(manager.check_is_well_utilized())
1032
# Setting the mixed-block size doesn't do anything, because the content
1033
# is considered to not be 'mixed'
1034
manager._full_enough_mixed_block_size = block._content_length
1035
self.assertFalse(manager.check_is_well_utilized())
1037
def test_check_is_well_utilized_mixed_keys(self):
1043
texts[f1k1] = self._texts[('key1',)]
1044
texts[f1k2] = self._texts[('key2',)]
1045
texts[f2k1] = self._texts[('key3',)]
1046
texts[f2k2] = self._texts[('key4',)]
1047
block, manager = self.make_block_and_full_manager(texts)
1048
self.assertFalse(manager.check_is_well_utilized())
1049
manager._full_enough_block_size = block._content_length
1050
self.assertTrue(manager.check_is_well_utilized())
1051
manager._full_enough_block_size = block._content_length + 1
1052
self.assertFalse(manager.check_is_well_utilized())
1053
manager._full_enough_mixed_block_size = block._content_length
1054
self.assertTrue(manager.check_is_well_utilized())
1056
def test_check_is_well_utilized_partial_use(self):
1057
locations, block = self.make_block(self._texts)
1058
manager = groupcompress._LazyGroupContentManager(block)
1059
manager._full_enough_block_size = block._content_length
1060
self.add_key_to_manager(('key1',), locations, block, manager)
1061
self.add_key_to_manager(('key2',), locations, block, manager)
1062
# Just using the content from key1 and 2 is not enough to be considered
1064
self.assertFalse(manager.check_is_well_utilized())
1065
# However if we add key3, then we have enough, as we only require 75%
1067
self.add_key_to_manager(('key4',), locations, block, manager)
1068
self.assertTrue(manager.check_is_well_utilized())