/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_btree_index.py

  • Committer: Jonathan Lange
  • Date: 2009-12-09 09:20:42 UTC
  • mfrom: (4881 +trunk)
  • mto: This revision was merged to the branch mainline in revision 4907.
  • Revision ID: jml@canonical.com-20091209092042-s2zgqcf8f39yzxpj
Merge trunk.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008 Canonical Ltd
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
23
23
from bzrlib import (
24
24
    btree_index,
25
25
    errors,
 
26
    fifo_cache,
 
27
    lru_cache,
 
28
    osutils,
26
29
    tests,
27
30
    )
28
31
from bzrlib.tests import (
121
124
 
122
125
class TestBTreeBuilder(BTreeTestCase):
123
126
 
 
127
    def test_clear_cache(self):
 
128
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
 
129
        # This is a no-op, but we need the api to be consistent with other
 
130
        # BTreeGraphIndex apis.
 
131
        builder.clear_cache()
 
132
 
124
133
    def test_empty_1_0(self):
125
134
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
126
135
        # NamedTemporaryFile dies on builder.finish().read(). weird.
152
161
        temp_file = builder.finish()
153
162
        content = temp_file.read()
154
163
        del temp_file
155
 
        self.assertEqual(158, len(content))
 
164
        self.assertEqual(131, len(content))
156
165
        self.assertEqual(
157
166
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
158
167
            "row_lengths=1\n",
176
185
        temp_file = builder.finish()
177
186
        content = temp_file.read()
178
187
        del temp_file
179
 
        self.assertEqual(264, len(content))
 
188
        self.assertEqual(238, len(content))
180
189
        self.assertEqual(
181
190
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
182
191
            "row_lengths=1\n",
242
251
        temp_file = builder.finish()
243
252
        content = temp_file.read()
244
253
        del temp_file
245
 
        self.assertEqual(181, len(content))
 
254
        self.assertEqual(155, len(content))
246
255
        self.assertEqual(
247
256
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
248
257
            "row_lengths=1\n",
350
359
        # Test the parts of the index that take up memory are doing so
351
360
        # predictably.
352
361
        self.assertEqual(1, len(builder._nodes))
353
 
        self.assertEqual(1, len(builder._keys))
354
362
        self.assertIs(None, builder._nodes_by_key)
355
363
        builder.add_node(*nodes[1])
356
364
        self.assertEqual(0, len(builder._nodes))
357
 
        self.assertEqual(0, len(builder._keys))
358
365
        self.assertIs(None, builder._nodes_by_key)
359
366
        self.assertEqual(1, len(builder._backing_indices))
360
367
        self.assertEqual(2, builder._backing_indices[0].key_count())
361
368
        # now back to memory
362
369
        builder.add_node(*nodes[2])
363
370
        self.assertEqual(1, len(builder._nodes))
364
 
        self.assertEqual(1, len(builder._keys))
365
371
        self.assertIs(None, builder._nodes_by_key)
366
372
        # And spills to a second backing index combing all
367
373
        builder.add_node(*nodes[3])
368
374
        self.assertEqual(0, len(builder._nodes))
369
 
        self.assertEqual(0, len(builder._keys))
370
375
        self.assertIs(None, builder._nodes_by_key)
371
376
        self.assertEqual(2, len(builder._backing_indices))
372
377
        self.assertEqual(None, builder._backing_indices[0])
375
380
        builder.add_node(*nodes[4])
376
381
        builder.add_node(*nodes[5])
377
382
        self.assertEqual(0, len(builder._nodes))
378
 
        self.assertEqual(0, len(builder._keys))
379
383
        self.assertIs(None, builder._nodes_by_key)
380
384
        self.assertEqual(2, len(builder._backing_indices))
381
385
        self.assertEqual(2, builder._backing_indices[0].key_count())
439
443
        # Test the parts of the index that take up memory are doing so
440
444
        # predictably.
441
445
        self.assertEqual(1, len(builder._nodes))
442
 
        self.assertEqual(1, len(builder._keys))
443
446
        self.assertIs(None, builder._nodes_by_key)
444
447
        builder.add_node(*nodes[1])
445
448
        self.assertEqual(0, len(builder._nodes))
446
 
        self.assertEqual(0, len(builder._keys))
447
449
        self.assertIs(None, builder._nodes_by_key)
448
450
        self.assertEqual(1, len(builder._backing_indices))
449
451
        self.assertEqual(2, builder._backing_indices[0].key_count())
450
452
        # now back to memory
451
453
        builder.add_node(*nodes[2])
452
454
        self.assertEqual(1, len(builder._nodes))
453
 
        self.assertEqual(1, len(builder._keys))
454
455
        self.assertIs(None, builder._nodes_by_key)
455
456
        # And spills to a second backing index but doesn't combine
456
457
        builder.add_node(*nodes[3])
457
458
        self.assertEqual(0, len(builder._nodes))
458
 
        self.assertEqual(0, len(builder._keys))
459
459
        self.assertIs(None, builder._nodes_by_key)
460
460
        self.assertEqual(2, len(builder._backing_indices))
461
461
        for backing_index in builder._backing_indices:
464
464
        builder.add_node(*nodes[4])
465
465
        builder.add_node(*nodes[5])
466
466
        self.assertEqual(0, len(builder._nodes))
467
 
        self.assertEqual(0, len(builder._keys))
468
467
        self.assertIs(None, builder._nodes_by_key)
469
468
        self.assertEqual(3, len(builder._backing_indices))
470
469
        for backing_index in builder._backing_indices:
529
528
        builder.add_node(*nodes[0])
530
529
        # Test the parts of the index that take up memory are doing so
531
530
        # predictably.
532
 
        self.assertEqual(1, len(builder._keys))
533
531
        self.assertEqual(1, len(builder._nodes))
534
532
        self.assertIs(None, builder._nodes_by_key)
535
533
        builder.add_node(*nodes[1])
536
 
        self.assertEqual(0, len(builder._keys))
537
534
        self.assertEqual(0, len(builder._nodes))
538
535
        self.assertIs(None, builder._nodes_by_key)
539
536
        self.assertEqual(1, len(builder._backing_indices))
542
539
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
543
540
        builder.add_node(*nodes[2])
544
541
        self.assertEqual(1, len(builder._nodes))
545
 
        self.assertEqual(1, len(builder._keys))
546
542
        self.assertIsNot(None, builder._nodes_by_key)
547
543
        self.assertNotEqual({}, builder._nodes_by_key)
548
544
        # We should have a new entry
550
546
        # And spills to a second backing index combing all
551
547
        builder.add_node(*nodes[3])
552
548
        self.assertEqual(0, len(builder._nodes))
553
 
        self.assertEqual(0, len(builder._keys))
554
549
        self.assertIs(None, builder._nodes_by_key)
555
550
        self.assertEqual(2, len(builder._backing_indices))
556
551
        self.assertEqual(None, builder._backing_indices[0])
559
554
        builder.add_node(*nodes[4])
560
555
        builder.add_node(*nodes[5])
561
556
        self.assertEqual(0, len(builder._nodes))
562
 
        self.assertEqual(0, len(builder._keys))
563
557
        self.assertIs(None, builder._nodes_by_key)
564
558
        self.assertEqual(2, len(builder._backing_indices))
565
559
        self.assertEqual(2, builder._backing_indices[0].key_count())
636
630
        size = trans.put_file('index', stream)
637
631
        return btree_index.BTreeGraphIndex(trans, 'index', size)
638
632
 
 
633
    def test_clear_cache(self):
 
634
        nodes = self.make_nodes(160, 2, 2)
 
635
        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
 
636
        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
 
637
        self.assertEqual([1, 4], index._row_lengths)
 
638
        self.assertIsNot(None, index._root_node)
 
639
        internal_node_pre_clear = index._internal_node_cache.keys()
 
640
        self.assertTrue(len(index._leaf_node_cache) > 0)
 
641
        index.clear_cache()
 
642
        # We don't touch _root_node or _internal_node_cache, both should be
 
643
        # small, and can save a round trip or two
 
644
        self.assertIsNot(None, index._root_node)
 
645
        # NOTE: We don't want to affect the _internal_node_cache, as we expect
 
646
        #       it will be small, and if we ever do touch this index again, it
 
647
        #       will save round-trips.  This assertion isn't very strong,
 
648
        #       becuase without a 3-level index, we don't have any internal
 
649
        #       nodes cached.
 
650
        self.assertEqual(internal_node_pre_clear,
 
651
                         index._internal_node_cache.keys())
 
652
        self.assertEqual(0, len(index._leaf_node_cache))
 
653
 
639
654
    def test_trivial_constructor(self):
640
655
        transport = get_transport('trace+' + self.get_url(''))
641
656
        index = btree_index.BTreeGraphIndex(transport, 'index', None)
688
703
        # The entire index should have been read, as it is one page long.
689
704
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
690
705
            transport._activity)
691
 
        self.assertEqual(1199, size)
 
706
        self.assertEqual(1173, size)
692
707
 
693
708
    def test__read_nodes_no_size_one_page_reads_once(self):
694
709
        self.make_index(nodes=[(('key',), 'value', ())])
742
757
        # The entire index should have been read linearly.
743
758
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
744
759
            transport._activity)
745
 
        self.assertEqual(1514, size)
 
760
        self.assertEqual(1488, size)
746
761
 
747
762
    def test_validate_two_pages(self):
748
763
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
980
995
            ])
981
996
        self.assertEqual(set([]), index.external_references(0))
982
997
 
 
998
    def test__find_ancestors_one_page(self):
 
999
        key1 = ('key-1',)
 
1000
        key2 = ('key-2',)
 
1001
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
 
1002
            (key1, 'value', ([key2],)),
 
1003
            (key2, 'value', ([],)),
 
1004
            ])
 
1005
        parent_map = {}
 
1006
        missing_keys = set()
 
1007
        search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
 
1008
        self.assertEqual({key1: (key2,), key2: ()}, parent_map)
 
1009
        self.assertEqual(set(), missing_keys)
 
1010
        self.assertEqual(set(), search_keys)
 
1011
 
 
1012
    def test__find_ancestors_one_page_w_missing(self):
 
1013
        key1 = ('key-1',)
 
1014
        key2 = ('key-2',)
 
1015
        key3 = ('key-3',)
 
1016
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
 
1017
            (key1, 'value', ([key2],)),
 
1018
            (key2, 'value', ([],)),
 
1019
            ])
 
1020
        parent_map = {}
 
1021
        missing_keys = set()
 
1022
        search_keys = index._find_ancestors([key2, key3], 0, parent_map,
 
1023
                                            missing_keys)
 
1024
        self.assertEqual({key2: ()}, parent_map)
 
1025
        # we know that key3 is missing because we read the page that it would
 
1026
        # otherwise be on
 
1027
        self.assertEqual(set([key3]), missing_keys)
 
1028
        self.assertEqual(set(), search_keys)
 
1029
 
 
1030
    def test__find_ancestors_one_parent_missing(self):
 
1031
        key1 = ('key-1',)
 
1032
        key2 = ('key-2',)
 
1033
        key3 = ('key-3',)
 
1034
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
 
1035
            (key1, 'value', ([key2],)),
 
1036
            (key2, 'value', ([key3],)),
 
1037
            ])
 
1038
        parent_map = {}
 
1039
        missing_keys = set()
 
1040
        search_keys = index._find_ancestors([key1], 0, parent_map,
 
1041
                                            missing_keys)
 
1042
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
 
1043
        self.assertEqual(set(), missing_keys)
 
1044
        # all we know is that key3 wasn't present on the page we were reading
 
1045
        # but if you look, the last key is key2 which comes before key3, so we
 
1046
        # don't know whether key3 would land on this page or not.
 
1047
        self.assertEqual(set([key3]), search_keys)
 
1048
        search_keys = index._find_ancestors(search_keys, 0, parent_map,
 
1049
                                            missing_keys)
 
1050
        # passing it back in, we are sure it is 'missing'
 
1051
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
 
1052
        self.assertEqual(set([key3]), missing_keys)
 
1053
        self.assertEqual(set([]), search_keys)
 
1054
 
 
1055
    def test__find_ancestors_dont_search_known(self):
 
1056
        key1 = ('key-1',)
 
1057
        key2 = ('key-2',)
 
1058
        key3 = ('key-3',)
 
1059
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
 
1060
            (key1, 'value', ([key2],)),
 
1061
            (key2, 'value', ([key3],)),
 
1062
            (key3, 'value', ([],)),
 
1063
            ])
 
1064
        # We already know about key2, so we won't try to search for key3
 
1065
        parent_map = {key2: (key3,)}
 
1066
        missing_keys = set()
 
1067
        search_keys = index._find_ancestors([key1], 0, parent_map,
 
1068
                                            missing_keys)
 
1069
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
 
1070
        self.assertEqual(set(), missing_keys)
 
1071
        self.assertEqual(set(), search_keys)
 
1072
 
 
1073
    def test__find_ancestors_multiple_pages(self):
 
1074
        # We need to use enough keys that we actually cause a split
 
1075
        start_time = 1249671539
 
1076
        email = "joebob@example.com"
 
1077
        nodes = []
 
1078
        ref_lists = ((),)
 
1079
        rev_keys = []
 
1080
        for i in xrange(400):
 
1081
            rev_id = '%s-%s-%s' % (email,
 
1082
                                   osutils.compact_date(start_time + i),
 
1083
                                   osutils.rand_chars(16))
 
1084
            rev_key = (rev_id,)
 
1085
            nodes.append((rev_key, 'value', ref_lists))
 
1086
            # We have a ref 'list' of length 1, with a list of parents, with 1
 
1087
            # parent which is a key
 
1088
            ref_lists = ((rev_key,),)
 
1089
            rev_keys.append(rev_key)
 
1090
        index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
 
1091
        self.assertEqual(400, index.key_count())
 
1092
        self.assertEqual(3, len(index._row_offsets))
 
1093
        nodes = dict(index._read_nodes([1, 2]))
 
1094
        l1 = nodes[1]
 
1095
        l2 = nodes[2]
 
1096
        min_l2_key = l2.min_key
 
1097
        max_l1_key = l1.max_key
 
1098
        self.assertTrue(max_l1_key < min_l2_key)
 
1099
        parents_min_l2_key = l2.keys[min_l2_key][1][0]
 
1100
        self.assertEqual((l1.max_key,), parents_min_l2_key)
 
1101
        # Now, whatever key we select that would fall on the second page,
 
1102
        # should give us all the parents until the page break
 
1103
        key_idx = rev_keys.index(min_l2_key)
 
1104
        next_key = rev_keys[key_idx+1]
 
1105
        # So now when we get the parent map, we should get the key we are
 
1106
        # looking for, min_l2_key, and then a reference to go look for the
 
1107
        # parent of that key
 
1108
        parent_map = {}
 
1109
        missing_keys = set()
 
1110
        search_keys = index._find_ancestors([next_key], 0, parent_map,
 
1111
                                            missing_keys)
 
1112
        self.assertEqual([min_l2_key, next_key], sorted(parent_map))
 
1113
        self.assertEqual(set(), missing_keys)
 
1114
        self.assertEqual(set([max_l1_key]), search_keys)
 
1115
        parent_map = {}
 
1116
        search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
 
1117
                                            missing_keys)
 
1118
        self.assertEqual(sorted(l1.keys), sorted(parent_map))
 
1119
        self.assertEqual(set(), missing_keys)
 
1120
        self.assertEqual(set(), search_keys)
 
1121
 
 
1122
    def test__find_ancestors_empty_index(self):
 
1123
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
 
1124
        parent_map = {}
 
1125
        missing_keys = set()
 
1126
        search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
 
1127
                                            missing_keys)
 
1128
        self.assertEqual(set(), search_keys)
 
1129
        self.assertEqual({}, parent_map)
 
1130
        self.assertEqual(set([('one',), ('two',)]), missing_keys)
 
1131
 
 
1132
    def test_supports_unlimited_cache(self):
 
1133
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
 
1134
        # We need enough nodes to cause a page split (so we have both an
 
1135
        # internal node and a couple leaf nodes. 500 seems to be enough.)
 
1136
        nodes = self.make_nodes(500, 1, 0)
 
1137
        for node in nodes:
 
1138
            builder.add_node(*node)
 
1139
        stream = builder.finish()
 
1140
        trans = get_transport(self.get_url())
 
1141
        size = trans.put_file('index', stream)
 
1142
        index = btree_index.BTreeGraphIndex(trans, 'index', size)
 
1143
        self.assertEqual(500, index.key_count())
 
1144
        # We have an internal node
 
1145
        self.assertEqual(2, len(index._row_lengths))
 
1146
        # We have at least 2 leaf nodes
 
1147
        self.assertTrue(index._row_lengths[-1] >= 2)
 
1148
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
 
1149
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
 
1150
                         index._leaf_node_cache._max_cache)
 
1151
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
 
1152
        self.assertEqual(100, index._internal_node_cache._max_cache)
 
1153
        # No change if unlimited_cache=False is passed
 
1154
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
 
1155
                                            unlimited_cache=False)
 
1156
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
 
1157
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
 
1158
                         index._leaf_node_cache._max_cache)
 
1159
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
 
1160
        self.assertEqual(100, index._internal_node_cache._max_cache)
 
1161
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
 
1162
                                            unlimited_cache=True)
 
1163
        self.assertIsInstance(index._leaf_node_cache, dict)
 
1164
        self.assertIs(type(index._internal_node_cache), dict)
 
1165
        # Exercise the lookup code
 
1166
        entries = set(index.iter_entries([n[0] for n in nodes]))
 
1167
        self.assertEqual(500, len(entries))
 
1168
 
983
1169
 
984
1170
class TestBTreeNodes(BTreeTestCase):
985
1171