/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: John Arbash Meinel
  • Date: 2009-06-12 18:05:15 UTC
  • mto: (4371.4.5 vila-better-heads)
  • mto: This revision was merged to the branch mainline in revision 4449.
  • Revision ID: john@arbash-meinel.com-20090612180515-t0cwbjsnve094oik
Add a failing test for handling nodes that are in the same linear chain.

It fails because the ancestry skipping causes us to miss the fact that the two nodes
are actually directly related. We could check at the beginning, as the 
code used to do, but I think that will be incomplete for the more-than-two
heads cases.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
25
25
    index as _mod_index,
26
26
    osutils,
27
27
    tests,
28
 
    trace,
29
28
    versionedfile,
30
29
    )
31
30
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
 
31
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
33
32
 
34
33
 
35
34
def load_tests(standard_tests, module, loader):
39
38
    scenarios = [
40
39
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
40
        ]
42
 
    if compiled_groupcompress_feature.available():
 
41
    if CompiledGroupCompressFeature.available():
43
42
        scenarios.append(('C',
44
43
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
44
    return tests.multiply_tests(to_adapt, scenarios, result)
135
134
 
136
135
class TestPyrexGroupCompressor(TestGroupCompressor):
137
136
 
138
 
    _test_needs_features = [compiled_groupcompress_feature]
 
137
    _test_needs_features = [CompiledGroupCompressFeature]
139
138
    compressor = groupcompress.PyrexGroupCompressor
140
139
 
141
140
    def test_stats(self):
364
363
        raw_bytes = zlib.decompress(remaining_bytes)
365
364
        self.assertEqual(content, raw_bytes)
366
365
 
367
 
        # we should get the same results if using the chunked version
368
 
        gcb = groupcompress.GroupCompressBlock()
369
 
        gcb.set_chunked_content(['this is some content\n'
370
 
                                 'this content will be compressed\n'],
371
 
                                 len(content))
372
 
        old_bytes = bytes
373
 
        bytes = gcb.to_bytes()
374
 
        self.assertEqual(old_bytes, bytes)
375
 
 
376
366
    def test_partial_decomp(self):
377
367
        content_chunks = []
378
368
        # We need a sufficient amount of data so that zlib.decompress has
418
408
        # And the decompressor is finalized
419
409
        self.assertIs(None, block._z_content_decompressor)
420
410
 
421
 
    def test__ensure_all_content(self):
 
411
    def test_partial_decomp_no_known_length(self):
422
412
        content_chunks = []
423
 
        # We need a sufficient amount of data so that zlib.decompress has
424
 
        # partial decompression to work with. Most auto-generated data
425
 
        # compresses a bit too well, we want a combination, so we combine a sha
426
 
        # hash with compressible data.
427
413
        for i in xrange(2048):
428
414
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
429
415
            content_chunks.append(next_content)
437
423
        block._z_content = z_content
438
424
        block._z_content_length = len(z_content)
439
425
        block._compressor_name = 'zlib'
440
 
        block._content_length = 158634
 
426
        block._content_length = None # Don't tell the decompressed length
441
427
        self.assertIs(None, block._content)
442
 
        # The first _ensure_content got all of the required data
443
 
        block._ensure_content(158634)
 
428
        block._ensure_content(100)
 
429
        self.assertIsNot(None, block._content)
 
430
        # We have decompressed at least 100 bytes
 
431
        self.assertTrue(len(block._content) >= 100)
 
432
        # We have not decompressed the whole content
 
433
        self.assertTrue(len(block._content) < 158634)
 
434
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
435
        # ensuring content that we already have shouldn't cause any more data
 
436
        # to be extracted
 
437
        cur_len = len(block._content)
 
438
        block._ensure_content(cur_len - 10)
 
439
        self.assertEqual(cur_len, len(block._content))
 
440
        # Now we want a bit more content
 
441
        cur_len += 10
 
442
        block._ensure_content(cur_len)
 
443
        self.assertTrue(len(block._content) >= cur_len)
 
444
        self.assertTrue(len(block._content) < 158634)
 
445
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
446
        # And now lets finish
 
447
        block._ensure_content()
444
448
        self.assertEqualDiff(content, block._content)
445
 
        # And we should have released the _z_content_decompressor since it was
446
 
        # fully consumed
 
449
        # And the decompressor is finalized
447
450
        self.assertIs(None, block._z_content_decompressor)
448
451
 
449
452
    def test__dump(self):
459
462
                         ], block._dump())
460
463
 
461
464
 
462
 
class TestCaseWithGroupCompressVersionedFiles(
463
 
        tests.TestCaseWithMemoryTransport):
 
465
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
464
466
 
465
467
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
466
 
                     dir='.', inconsistency_fatal=True):
 
468
                     dir='.'):
467
469
        t = self.get_transport(dir)
468
470
        t.ensure_base()
469
471
        vf = groupcompress.make_pack_factory(graph=create_graph,
470
 
            delta=False, keylength=keylength,
471
 
            inconsistency_fatal=inconsistency_fatal)(t)
 
472
            delta=False, keylength=keylength)(t)
472
473
        if do_cleanup:
473
474
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
474
475
        return vf
526
527
                    'as-requested', False)]
527
528
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
528
529
 
529
 
    def test_insert_record_stream_reuses_blocks(self):
 
530
    def test_insert_record_stream_re_uses_blocks(self):
530
531
        vf = self.make_test_vf(True, dir='source')
531
532
        def grouped_stream(revision_ids, first_parents=()):
532
533
            parents = first_parents
570
571
        vf2 = self.make_test_vf(True, dir='target')
571
572
        # ordering in 'groupcompress' order, should actually swap the groups in
572
573
        # the target vf, but the groups themselves should not be disturbed.
573
 
        def small_size_stream():
574
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
 
                                               'groupcompress', False):
576
 
                record._manager._full_enough_block_size = \
577
 
                    record._manager._block._content_length
578
 
                yield record
579
 
                        
580
 
        vf2.insert_record_stream(small_size_stream())
 
574
        vf2.insert_record_stream(vf.get_record_stream(
 
575
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
581
576
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
577
                                       'groupcompress', False)
583
578
        vf2.writer.end()
588
583
                             record._manager._block._z_content)
589
584
        self.assertEqual(8, num_records)
590
585
 
591
 
    def test_insert_record_stream_packs_on_the_fly(self):
592
 
        vf = self.make_test_vf(True, dir='source')
593
 
        def grouped_stream(revision_ids, first_parents=()):
594
 
            parents = first_parents
595
 
            for revision_id in revision_ids:
596
 
                key = (revision_id,)
597
 
                record = versionedfile.FulltextContentFactory(
598
 
                    key, parents, None,
599
 
                    'some content that is\n'
600
 
                    'identical except for\n'
601
 
                    'revision_id:%s\n' % (revision_id,))
602
 
                yield record
603
 
                parents = (key,)
604
 
        # One group, a-d
605
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
606
 
        # Second group, e-h
607
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
 
                                               first_parents=(('d',),)))
609
 
        # Now copy the blocks into another vf, and see that the
610
 
        # insert_record_stream rebuilt a new block on-the-fly because of
611
 
        # under-utilization
612
 
        vf2 = self.make_test_vf(True, dir='target')
613
 
        vf2.insert_record_stream(vf.get_record_stream(
614
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
 
                                       'groupcompress', False)
617
 
        vf2.writer.end()
618
 
        num_records = 0
619
 
        # All of the records should be recombined into a single block
620
 
        block = None
621
 
        for record in stream:
622
 
            num_records += 1
623
 
            if block is None:
624
 
                block = record._manager._block
625
 
            else:
626
 
                self.assertIs(block, record._manager._block)
627
 
        self.assertEqual(8, num_records)
628
 
 
629
586
    def test__insert_record_stream_no_reuse_block(self):
630
587
        vf = self.make_test_vf(True, dir='source')
631
588
        def grouped_stream(revision_ids, first_parents=()):
692
649
            frozenset([('parent-1',), ('parent-2',)]),
693
650
            index.get_missing_parents())
694
651
 
695
 
    def make_source_with_b(self, a_parent, path):
696
 
        source = self.make_test_vf(True, dir=path)
697
 
        source.add_lines(('a',), (), ['lines\n'])
698
 
        if a_parent:
699
 
            b_parents = (('a',),)
700
 
        else:
701
 
            b_parents = ()
702
 
        source.add_lines(('b',), b_parents, ['lines\n'])
703
 
        return source
704
 
 
705
 
    def do_inconsistent_inserts(self, inconsistency_fatal):
706
 
        target = self.make_test_vf(True, dir='target',
707
 
                                   inconsistency_fatal=inconsistency_fatal)
708
 
        for x in range(2):
709
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
710
 
            target.insert_record_stream(source.get_record_stream(
711
 
                [('b',)], 'unordered', False))
712
 
 
713
 
    def test_inconsistent_redundant_inserts_warn(self):
714
 
        """Should not insert a record that is already present."""
715
 
        warnings = []
716
 
        def warning(template, args):
717
 
            warnings.append(template % args)
718
 
        _trace_warning = trace.warning
719
 
        trace.warning = warning
720
 
        try:
721
 
            self.do_inconsistent_inserts(inconsistency_fatal=False)
722
 
        finally:
723
 
            trace.warning = _trace_warning
724
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
726
 
                         warnings)
727
 
 
728
 
    def test_inconsistent_redundant_inserts_raises(self):
729
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
730
 
                              inconsistency_fatal=True)
731
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
732
 
                              " in add_records:"
733
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
 
                              " 0 8', \(\(\('a',\),\),\)\)")
735
 
 
736
 
    def test_clear_cache(self):
737
 
        vf = self.make_source_with_b(True, 'source')
738
 
        vf.writer.end()
739
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
740
 
                                           True):
741
 
            pass
742
 
        self.assertTrue(len(vf._group_cache) > 0)
743
 
        vf.clear_cache()
744
 
        self.assertEqual(0, len(vf._group_cache))
745
 
 
746
 
 
747
 
 
748
 
class StubGCVF(object):
749
 
    def __init__(self, canned_get_blocks=None):
750
 
        self._group_cache = {}
751
 
        self._canned_get_blocks = canned_get_blocks or []
752
 
    def _get_blocks(self, read_memos):
753
 
        return iter(self._canned_get_blocks)
754
 
    
755
 
 
756
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
758
 
    
759
 
    def test_add_key_new_read_memo(self):
760
 
        """Adding a key with an uncached read_memo new to this batch adds that
761
 
        read_memo to the list of memos to fetch.
762
 
        """
763
 
        # locations are: index_memo, ignored, parents, ignored
764
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
765
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
766
 
        # raw bytes needed.
767
 
        read_memo = ('fake index', 100, 50)
768
 
        locations = {
769
 
            ('key',): (read_memo + (None, None), None, None, None)}
770
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
771
 
        total_size = batcher.add_key(('key',))
772
 
        self.assertEqual(50, total_size)
773
 
        self.assertEqual([('key',)], batcher.keys)
774
 
        self.assertEqual([read_memo], batcher.memos_to_get)
775
 
 
776
 
    def test_add_key_duplicate_read_memo(self):
777
 
        """read_memos that occur multiple times in a batch will only be fetched
778
 
        once.
779
 
        """
780
 
        read_memo = ('fake index', 100, 50)
781
 
        # Two keys, both sharing the same read memo (but different overall
782
 
        # index_memos).
783
 
        locations = {
784
 
            ('key1',): (read_memo + (0, 1), None, None, None),
785
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
786
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
787
 
        total_size = batcher.add_key(('key1',))
788
 
        total_size = batcher.add_key(('key2',))
789
 
        self.assertEqual(50, total_size)
790
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
791
 
        self.assertEqual([read_memo], batcher.memos_to_get)
792
 
 
793
 
    def test_add_key_cached_read_memo(self):
794
 
        """Adding a key with a cached read_memo will not cause that read_memo
795
 
        to be added to the list to fetch.
796
 
        """
797
 
        read_memo = ('fake index', 100, 50)
798
 
        gcvf = StubGCVF()
799
 
        gcvf._group_cache[read_memo] = 'fake block'
800
 
        locations = {
801
 
            ('key',): (read_memo + (None, None), None, None, None)}
802
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
803
 
        total_size = batcher.add_key(('key',))
804
 
        self.assertEqual(0, total_size)
805
 
        self.assertEqual([('key',)], batcher.keys)
806
 
        self.assertEqual([], batcher.memos_to_get)
807
 
 
808
 
    def test_yield_factories_empty(self):
809
 
        """An empty batch yields no factories."""
810
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
811
 
        self.assertEqual([], list(batcher.yield_factories()))
812
 
 
813
 
    def test_yield_factories_calls_get_blocks(self):
814
 
        """Uncached memos are retrieved via get_blocks."""
815
 
        read_memo1 = ('fake index', 100, 50)
816
 
        read_memo2 = ('fake index', 150, 40)
817
 
        gcvf = StubGCVF(
818
 
            canned_get_blocks=[
819
 
                (read_memo1, groupcompress.GroupCompressBlock()),
820
 
                (read_memo2, groupcompress.GroupCompressBlock())])
821
 
        locations = {
822
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
823
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
824
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
825
 
        batcher.add_key(('key1',))
826
 
        batcher.add_key(('key2',))
827
 
        factories = list(batcher.yield_factories(full_flush=True))
828
 
        self.assertLength(2, factories)
829
 
        keys = [f.key for f in factories]
830
 
        kinds = [f.storage_kind for f in factories]
831
 
        self.assertEqual([('key1',), ('key2',)], keys)
832
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
833
 
 
834
 
    def test_yield_factories_flushing(self):
835
 
        """yield_factories holds back on yielding results from the final block
836
 
        unless passed full_flush=True.
837
 
        """
838
 
        fake_block = groupcompress.GroupCompressBlock()
839
 
        read_memo = ('fake index', 100, 50)
840
 
        gcvf = StubGCVF()
841
 
        gcvf._group_cache[read_memo] = fake_block
842
 
        locations = {
843
 
            ('key',): (read_memo + (None, None), None, None, None)}
844
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
845
 
        batcher.add_key(('key',))
846
 
        self.assertEqual([], list(batcher.yield_factories()))
847
 
        factories = list(batcher.yield_factories(full_flush=True))
848
 
        self.assertLength(1, factories)
849
 
        self.assertEqual(('key',), factories[0].key)
850
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
851
 
 
852
652
 
853
653
class TestLazyGroupCompress(tests.TestCaseWithTransport):
854
654
 
855
655
    _texts = {
856
656
        ('key1',): "this is a text\n"
857
 
                   "with a reasonable amount of compressible bytes\n"
858
 
                   "which can be shared between various other texts\n",
 
657
                   "with a reasonable amount of compressible bytes\n",
859
658
        ('key2',): "another text\n"
860
 
                   "with a reasonable amount of compressible bytes\n"
861
 
                   "which can be shared between various other texts\n",
 
659
                   "with a reasonable amount of compressible bytes\n",
862
660
        ('key3',): "yet another text which won't be extracted\n"
863
 
                   "with a reasonable amount of compressible bytes\n"
864
 
                   "which can be shared between various other texts\n",
 
661
                   "with a reasonable amount of compressible bytes\n",
865
662
        ('key4',): "this will be extracted\n"
866
663
                   "but references most of its bytes from\n"
867
664
                   "yet another text which won't be extracted\n"
868
 
                   "with a reasonable amount of compressible bytes\n"
869
 
                   "which can be shared between various other texts\n",
 
665
                   "with a reasonable amount of compressible bytes\n",
870
666
    }
871
667
    def make_block(self, key_to_text):
872
668
        """Create a GroupCompressBlock, filling it with the given texts."""
884
680
        start, end = locations[key]
885
681
        manager.add_factory(key, (), start, end)
886
682
 
887
 
    def make_block_and_full_manager(self, texts):
888
 
        locations, block = self.make_block(texts)
889
 
        manager = groupcompress._LazyGroupContentManager(block)
890
 
        for key in sorted(texts):
891
 
            self.add_key_to_manager(key, locations, block, manager)
892
 
        return block, manager
893
 
 
894
683
    def test_get_fulltexts(self):
895
684
        locations, block = self.make_block(self._texts)
896
685
        manager = groupcompress._LazyGroupContentManager(block)
947
736
        header_len = int(header_len)
948
737
        block_len = int(block_len)
949
738
        self.assertEqual('groupcompress-block', storage_kind)
950
 
        self.assertEqual(34, z_header_len)
951
 
        self.assertEqual(26, header_len)
 
739
        self.assertEqual(33, z_header_len)
 
740
        self.assertEqual(25, header_len)
952
741
        self.assertEqual(len(block_bytes), block_len)
953
742
        z_header = rest[:z_header_len]
954
743
        header = zlib.decompress(z_header)
988
777
        self.assertEqual([('key1',), ('key4',)], result_order)
989
778
 
990
779
    def test__check_rebuild_no_changes(self):
991
 
        block, manager = self.make_block_and_full_manager(self._texts)
 
780
        locations, block = self.make_block(self._texts)
 
781
        manager = groupcompress._LazyGroupContentManager(block)
 
782
        # Request all the keys, which ensures that we won't rebuild
 
783
        self.add_key_to_manager(('key1',), locations, block, manager)
 
784
        self.add_key_to_manager(('key2',), locations, block, manager)
 
785
        self.add_key_to_manager(('key3',), locations, block, manager)
 
786
        self.add_key_to_manager(('key4',), locations, block, manager)
992
787
        manager._check_rebuild_block()
993
788
        self.assertIs(block, manager._block)
994
789
 
1019
814
            self.assertEqual(('key4',), record.key)
1020
815
            self.assertEqual(self._texts[record.key],
1021
816
                             record.get_bytes_as('fulltext'))
1022
 
 
1023
 
    def test_check_is_well_utilized_all_keys(self):
1024
 
        block, manager = self.make_block_and_full_manager(self._texts)
1025
 
        self.assertFalse(manager.check_is_well_utilized())
1026
 
        # Though we can fake it by changing the recommended minimum size
1027
 
        manager._full_enough_block_size = block._content_length
1028
 
        self.assertTrue(manager.check_is_well_utilized())
1029
 
        # Setting it just above causes it to fail
1030
 
        manager._full_enough_block_size = block._content_length + 1
1031
 
        self.assertFalse(manager.check_is_well_utilized())
1032
 
        # Setting the mixed-block size doesn't do anything, because the content
1033
 
        # is considered to not be 'mixed'
1034
 
        manager._full_enough_mixed_block_size = block._content_length
1035
 
        self.assertFalse(manager.check_is_well_utilized())
1036
 
 
1037
 
    def test_check_is_well_utilized_mixed_keys(self):
1038
 
        texts = {}
1039
 
        f1k1 = ('f1', 'k1')
1040
 
        f1k2 = ('f1', 'k2')
1041
 
        f2k1 = ('f2', 'k1')
1042
 
        f2k2 = ('f2', 'k2')
1043
 
        texts[f1k1] = self._texts[('key1',)]
1044
 
        texts[f1k2] = self._texts[('key2',)]
1045
 
        texts[f2k1] = self._texts[('key3',)]
1046
 
        texts[f2k2] = self._texts[('key4',)]
1047
 
        block, manager = self.make_block_and_full_manager(texts)
1048
 
        self.assertFalse(manager.check_is_well_utilized())
1049
 
        manager._full_enough_block_size = block._content_length
1050
 
        self.assertTrue(manager.check_is_well_utilized())
1051
 
        manager._full_enough_block_size = block._content_length + 1
1052
 
        self.assertFalse(manager.check_is_well_utilized())
1053
 
        manager._full_enough_mixed_block_size = block._content_length
1054
 
        self.assertTrue(manager.check_is_well_utilized())
1055
 
 
1056
 
    def test_check_is_well_utilized_partial_use(self):
1057
 
        locations, block = self.make_block(self._texts)
1058
 
        manager = groupcompress._LazyGroupContentManager(block)
1059
 
        manager._full_enough_block_size = block._content_length
1060
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1061
 
        self.add_key_to_manager(('key2',), locations, block, manager)
1062
 
        # Just using the content from key1 and 2 is not enough to be considered
1063
 
        # 'complete'
1064
 
        self.assertFalse(manager.check_is_well_utilized())
1065
 
        # However if we add key3, then we have enough, as we only require 75%
1066
 
        # consumption
1067
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1068
 
        self.assertTrue(manager.check_is_well_utilized())