669
607
                self.assertIs(block, record._manager._block)
 
671
 
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
672
 
        unvalidated = self.make_g_index_missing_parent()
 
673
 
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
674
 
        index = groupcompress._GCGraphIndex(combined,
 
675
 
            is_locked=lambda: True, parents=True,
 
676
 
            track_external_parent_refs=True)
 
677
 
        index.scan_unvalidated_index(unvalidated)
 
679
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
681
 
    def test_track_external_parent_refs(self):
 
682
 
        g_index = self.make_g_index('empty', 1, [])
 
683
 
        mod_index = btree_index.BTreeBuilder(1, 1)
 
684
 
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
685
 
        index = groupcompress._GCGraphIndex(combined,
 
686
 
            is_locked=lambda: True, parents=True,
 
687
 
            add_callback=mod_index.add_nodes,
 
688
 
            track_external_parent_refs=True)
 
690
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
692
 
            frozenset([('parent-1',), ('parent-2',)]),
 
693
 
            index.get_missing_parents())
 
695
 
    def make_source_with_b(self, a_parent, path):
 
696
 
        source = self.make_test_vf(True, dir=path)
 
697
 
        source.add_lines(('a',), (), ['lines\n'])
 
699
 
            b_parents = (('a',),)
 
702
 
        source.add_lines(('b',), b_parents, ['lines\n'])
 
705
 
    def do_inconsistent_inserts(self, inconsistency_fatal):
 
706
 
        target = self.make_test_vf(True, dir='target',
 
707
 
                                   inconsistency_fatal=inconsistency_fatal)
 
709
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
710
 
            target.insert_record_stream(source.get_record_stream(
 
711
 
                [('b',)], 'unordered', False))
 
713
 
    def test_inconsistent_redundant_inserts_warn(self):
 
714
 
        """Should not insert a record that is already present."""
 
716
 
        def warning(template, args):
 
717
 
            warnings.append(template % args)
 
718
 
        _trace_warning = trace.warning
 
719
 
        trace.warning = warning
 
721
 
            self.do_inconsistent_inserts(inconsistency_fatal=False)
 
723
 
            trace.warning = _trace_warning
 
724
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
725
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
728
 
    def test_inconsistent_redundant_inserts_raises(self):
 
729
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
730
 
                              inconsistency_fatal=True)
 
731
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
733
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
734
 
                              " 0 8', \(\(\('a',\),\),\)\)")
 
736
 
    def test_clear_cache(self):
 
737
 
        vf = self.make_source_with_b(True, 'source')
 
739
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
 
742
 
        self.assertTrue(len(vf._group_cache) > 0)
 
744
 
        self.assertEqual(0, len(vf._group_cache))
 
748
 
class StubGCVF(object):
 
749
 
    def __init__(self, canned_get_blocks=None):
 
750
 
        self._group_cache = {}
 
751
 
        self._canned_get_blocks = canned_get_blocks or []
 
752
 
    def _get_blocks(self, read_memos):
 
753
 
        return iter(self._canned_get_blocks)
 
756
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
 
757
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
 
759
 
    def test_add_key_new_read_memo(self):
 
760
 
        """Adding a key with an uncached read_memo new to this batch adds that
 
761
 
        read_memo to the list of memos to fetch.
 
763
 
        # locations are: index_memo, ignored, parents, ignored
 
764
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
 
765
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
 
767
 
        read_memo = ('fake index', 100, 50)
 
769
 
            ('key',): (read_memo + (None, None), None, None, None)}
 
770
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
771
 
        total_size = batcher.add_key(('key',))
 
772
 
        self.assertEqual(50, total_size)
 
773
 
        self.assertEqual([('key',)], batcher.keys)
 
774
 
        self.assertEqual([read_memo], batcher.memos_to_get)
 
776
 
    def test_add_key_duplicate_read_memo(self):
 
777
 
        """read_memos that occur multiple times in a batch will only be fetched
 
780
 
        read_memo = ('fake index', 100, 50)
 
781
 
        # Two keys, both sharing the same read memo (but different overall
 
784
 
            ('key1',): (read_memo + (0, 1), None, None, None),
 
785
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
 
786
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
787
 
        total_size = batcher.add_key(('key1',))
 
788
 
        total_size = batcher.add_key(('key2',))
 
789
 
        self.assertEqual(50, total_size)
 
790
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
 
791
 
        self.assertEqual([read_memo], batcher.memos_to_get)
 
793
 
    def test_add_key_cached_read_memo(self):
 
794
 
        """Adding a key with a cached read_memo will not cause that read_memo
 
795
 
        to be added to the list to fetch.
 
797
 
        read_memo = ('fake index', 100, 50)
 
799
 
        gcvf._group_cache[read_memo] = 'fake block'
 
801
 
            ('key',): (read_memo + (None, None), None, None, None)}
 
802
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
803
 
        total_size = batcher.add_key(('key',))
 
804
 
        self.assertEqual(0, total_size)
 
805
 
        self.assertEqual([('key',)], batcher.keys)
 
806
 
        self.assertEqual([], batcher.memos_to_get)
 
808
 
    def test_yield_factories_empty(self):
 
809
 
        """An empty batch yields no factories."""
 
810
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
 
811
 
        self.assertEqual([], list(batcher.yield_factories()))
 
813
 
    def test_yield_factories_calls_get_blocks(self):
 
814
 
        """Uncached memos are retrieved via get_blocks."""
 
815
 
        read_memo1 = ('fake index', 100, 50)
 
816
 
        read_memo2 = ('fake index', 150, 40)
 
819
 
                (read_memo1, groupcompress.GroupCompressBlock()),
 
820
 
                (read_memo2, groupcompress.GroupCompressBlock())])
 
822
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
 
823
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
824
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
825
 
        batcher.add_key(('key1',))
 
826
 
        batcher.add_key(('key2',))
 
827
 
        factories = list(batcher.yield_factories(full_flush=True))
 
828
 
        self.assertLength(2, factories)
 
829
 
        keys = [f.key for f in factories]
 
830
 
        kinds = [f.storage_kind for f in factories]
 
831
 
        self.assertEqual([('key1',), ('key2',)], keys)
 
832
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
 
834
 
    def test_yield_factories_flushing(self):
 
835
 
        """yield_factories holds back on yielding results from the final block
 
836
 
        unless passed full_flush=True.
 
838
 
        fake_block = groupcompress.GroupCompressBlock()
 
839
 
        read_memo = ('fake index', 100, 50)
 
841
 
        gcvf._group_cache[read_memo] = fake_block
 
843
 
            ('key',): (read_memo + (None, None), None, None, None)}
 
844
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
845
 
        batcher.add_key(('key',))
 
846
 
        self.assertEqual([], list(batcher.yield_factories()))
 
847
 
        factories = list(batcher.yield_factories(full_flush=True))
 
848
 
        self.assertLength(1, factories)
 
849
 
        self.assertEqual(('key',), factories[0].key)
 
850
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
 
853
610
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
856
613
        ('key1',): "this is a text\n"
 
857
 
                   "with a reasonable amount of compressible bytes\n"
 
858
 
                   "which can be shared between various other texts\n",
 
 
614
                   "with a reasonable amount of compressible bytes\n",
 
859
615
        ('key2',): "another text\n"
 
860
 
                   "with a reasonable amount of compressible bytes\n"
 
861
 
                   "which can be shared between various other texts\n",
 
 
616
                   "with a reasonable amount of compressible bytes\n",
 
862
617
        ('key3',): "yet another text which won't be extracted\n"
 
863
 
                   "with a reasonable amount of compressible bytes\n"
 
864
 
                   "which can be shared between various other texts\n",
 
 
618
                   "with a reasonable amount of compressible bytes\n",
 
865
619
        ('key4',): "this will be extracted\n"
 
866
620
                   "but references most of its bytes from\n"
 
867
621
                   "yet another text which won't be extracted\n"
 
868
 
                   "with a reasonable amount of compressible bytes\n"
 
869
 
                   "which can be shared between various other texts\n",
 
 
622
                   "with a reasonable amount of compressible bytes\n",
 
871
624
    def make_block(self, key_to_text):
 
872
625
        """Create a GroupCompressBlock, filling it with the given texts."""
 
 
1019
771
            self.assertEqual(('key4',), record.key)
 
1020
772
            self.assertEqual(self._texts[record.key],
 
1021
773
                             record.get_bytes_as('fulltext'))
 
1023
 
    def test_check_is_well_utilized_all_keys(self):
 
1024
 
        block, manager = self.make_block_and_full_manager(self._texts)
 
1025
 
        self.assertFalse(manager.check_is_well_utilized())
 
1026
 
        # Though we can fake it by changing the recommended minimum size
 
1027
 
        manager._full_enough_block_size = block._content_length
 
1028
 
        self.assertTrue(manager.check_is_well_utilized())
 
1029
 
        # Setting it just above causes it to fail
 
1030
 
        manager._full_enough_block_size = block._content_length + 1
 
1031
 
        self.assertFalse(manager.check_is_well_utilized())
 
1032
 
        # Setting the mixed-block size doesn't do anything, because the content
 
1033
 
        # is considered to not be 'mixed'
 
1034
 
        manager._full_enough_mixed_block_size = block._content_length
 
1035
 
        self.assertFalse(manager.check_is_well_utilized())
 
1037
 
    def test_check_is_well_utilized_mixed_keys(self):
 
1043
 
        texts[f1k1] = self._texts[('key1',)]
 
1044
 
        texts[f1k2] = self._texts[('key2',)]
 
1045
 
        texts[f2k1] = self._texts[('key3',)]
 
1046
 
        texts[f2k2] = self._texts[('key4',)]
 
1047
 
        block, manager = self.make_block_and_full_manager(texts)
 
1048
 
        self.assertFalse(manager.check_is_well_utilized())
 
1049
 
        manager._full_enough_block_size = block._content_length
 
1050
 
        self.assertTrue(manager.check_is_well_utilized())
 
1051
 
        manager._full_enough_block_size = block._content_length + 1
 
1052
 
        self.assertFalse(manager.check_is_well_utilized())
 
1053
 
        manager._full_enough_mixed_block_size = block._content_length
 
1054
 
        self.assertTrue(manager.check_is_well_utilized())
 
1056
 
    def test_check_is_well_utilized_partial_use(self):
 
1057
 
        locations, block = self.make_block(self._texts)
 
1058
 
        manager = groupcompress._LazyGroupContentManager(block)
 
1059
 
        manager._full_enough_block_size = block._content_length
 
1060
 
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1061
 
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1062
 
        # Just using the content from key1 and 2 is not enough to be considered
 
1064
 
        self.assertFalse(manager.check_is_well_utilized())
 
1065
 
        # However if we add key3, then we have enough, as we only require 75%
 
1067
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1068
 
        self.assertTrue(manager.check_is_well_utilized())