615
621
builder.add_node(*nodes[0])
616
622
builder.add_node(*nodes[1])
617
623
builder.add_node(*nodes[0])
618
self.assertRaises(_mod_index.BadIndexDuplicateKey, builder.finish)
624
self.assertRaises(errors.BadIndexDuplicateKey, builder.finish)
621
627
class TestBTreeIndex(BTreeTestCase):
623
629
def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
624
630
builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
625
key_elements=key_elements)
631
key_elements=key_elements)
626
632
for key, value, references in nodes:
627
633
builder.add_node(key, value, references)
628
634
stream = builder.finish()
629
trans = transport.get_transport_from_url('trace+' + self.get_url())
635
trans = get_transport('trace+' + self.get_url())
630
636
size = trans.put_file('index', stream)
631
637
return btree_index.BTreeGraphIndex(trans, 'index', size)
633
def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
635
builder = btree_index.BTreeBuilder(key_elements=key_elements,
636
reference_lists=ref_lists)
637
builder.add_nodes(nodes)
638
transport = self.get_transport('')
639
# NamedTemporaryFile dies on builder.finish().read(). weird.
640
temp_file = builder.finish()
641
content = temp_file.read()
644
transport.put_bytes('index', (b' ' * offset) + content)
645
return btree_index.BTreeGraphIndex(transport, 'index', size=size,
648
def test_clear_cache(self):
649
nodes = self.make_nodes(160, 2, 2)
650
index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
651
self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
652
self.assertEqual([1, 4], index._row_lengths)
653
self.assertIsNot(None, index._root_node)
654
internal_node_pre_clear = set(index._internal_node_cache)
655
self.assertTrue(len(index._leaf_node_cache) > 0)
657
# We don't touch _root_node or _internal_node_cache, both should be
658
# small, and can save a round trip or two
659
self.assertIsNot(None, index._root_node)
660
# NOTE: We don't want to affect the _internal_node_cache, as we expect
661
# it will be small, and if we ever do touch this index again, it
662
# will save round-trips. This assertion isn't very strong,
663
# becuase without a 3-level index, we don't have any internal
665
self.assertEqual(internal_node_pre_clear,
666
set(index._internal_node_cache))
667
self.assertEqual(0, len(index._leaf_node_cache))
669
639
def test_trivial_constructor(self):
670
t = transport.get_transport_from_url('trace+' + self.get_url(''))
671
index = btree_index.BTreeGraphIndex(t, 'index', None)
640
transport = get_transport('trace+' + self.get_url(''))
641
index = btree_index.BTreeGraphIndex(transport, 'index', None)
672
642
# Checks the page size at load, but that isn't logged yet.
673
self.assertEqual([], t._activity)
643
self.assertEqual([], transport._activity)
675
645
def test_with_size_constructor(self):
676
t = transport.get_transport_from_url('trace+' + self.get_url(''))
677
index = btree_index.BTreeGraphIndex(t, 'index', 1)
646
transport = get_transport('trace+' + self.get_url(''))
647
index = btree_index.BTreeGraphIndex(transport, 'index', 1)
678
648
# Checks the page size at load, but that isn't logged yet.
679
self.assertEqual([], t._activity)
649
self.assertEqual([], transport._activity)
681
651
def test_empty_key_count_no_size(self):
682
652
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
683
t = transport.get_transport_from_url('trace+' + self.get_url(''))
684
t.put_file('index', builder.finish())
685
index = btree_index.BTreeGraphIndex(t, 'index', None)
687
self.assertEqual([], t._activity)
653
transport = get_transport('trace+' + self.get_url(''))
654
transport.put_file('index', builder.finish())
655
index = btree_index.BTreeGraphIndex(transport, 'index', None)
656
del transport._activity[:]
657
self.assertEqual([], transport._activity)
688
658
self.assertEqual(0, index.key_count())
689
659
# The entire index should have been requested (as we generally have the
690
660
# size available, and doing many small readvs is inappropriate).
691
661
# We can't tell how much was actually read here, but - check the code.
692
self.assertEqual([('get', 'index')], t._activity)
662
self.assertEqual([('get', 'index')], transport._activity)
694
664
def test_empty_key_count(self):
695
665
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
696
t = transport.get_transport_from_url('trace+' + self.get_url(''))
697
size = t.put_file('index', builder.finish())
666
transport = get_transport('trace+' + self.get_url(''))
667
size = transport.put_file('index', builder.finish())
698
668
self.assertEqual(72, size)
699
index = btree_index.BTreeGraphIndex(t, 'index', size)
701
self.assertEqual([], t._activity)
669
index = btree_index.BTreeGraphIndex(transport, 'index', size)
670
del transport._activity[:]
671
self.assertEqual([], transport._activity)
702
672
self.assertEqual(0, index.key_count())
703
673
# The entire index should have been read, as 4K > size
704
674
self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
707
677
def test_non_empty_key_count_2_2(self):
708
678
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
709
679
nodes = self.make_nodes(35, 2, 2)
710
680
for node in nodes:
711
681
builder.add_node(*node)
712
t = transport.get_transport_from_url('trace+' + self.get_url(''))
713
size = t.put_file('index', builder.finish())
714
index = btree_index.BTreeGraphIndex(t, 'index', size)
716
self.assertEqual([], t._activity)
682
transport = get_transport('trace+' + self.get_url(''))
683
size = transport.put_file('index', builder.finish())
684
index = btree_index.BTreeGraphIndex(transport, 'index', size)
685
del transport._activity[:]
686
self.assertEqual([], transport._activity)
717
687
self.assertEqual(70, index.key_count())
718
688
# The entire index should have been read, as it is one page long.
719
689
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
721
self.assertEqualApproxCompressed(1173, size)
723
def test_with_offset_no_size(self):
724
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
726
nodes=self.make_nodes(200, 1, 1))
727
index._size = None # throw away the size info
728
self.assertEqual(200, index.key_count())
730
def test_with_small_offset(self):
731
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
733
nodes=self.make_nodes(200, 1, 1))
734
self.assertEqual(200, index.key_count())
736
def test_with_large_offset(self):
737
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
739
nodes=self.make_nodes(200, 1, 1))
740
self.assertEqual(200, index.key_count())
691
self.assertEqual(1199, size)
742
693
def test__read_nodes_no_size_one_page_reads_once(self):
743
self.make_index(nodes=[((b'key',), b'value', ())])
744
trans = transport.get_transport_from_url('trace+' + self.get_url())
694
self.make_index(nodes=[(('key',), 'value', ())])
695
trans = get_transport('trace+' + self.get_url())
745
696
index = btree_index.BTreeGraphIndex(trans, 'index', None)
746
697
del trans._activity[:]
747
698
nodes = dict(index._read_nodes([0]))
748
self.assertEqual({0}, set(nodes))
699
self.assertEqual([0], nodes.keys())
750
self.assertEqual([(b'key',)], node.all_keys())
701
self.assertEqual([('key',)], node.keys.keys())
751
702
self.assertEqual([('get', 'index')], trans._activity)
753
704
def test__read_nodes_no_size_multiple_pages(self):
755
706
index.key_count()
756
707
num_pages = index._row_offsets[-1]
757
708
# Reopen with a traced transport and no size
758
trans = transport.get_transport_from_url('trace+' + self.get_url())
709
trans = get_transport('trace+' + self.get_url())
759
710
index = btree_index.BTreeGraphIndex(trans, 'index', None)
760
711
del trans._activity[:]
761
712
nodes = dict(index._read_nodes([0]))
762
self.assertEqual(list(range(num_pages)), sorted(nodes))
713
self.assertEqual(range(num_pages), nodes.keys())
764
715
def test_2_levels_key_count_2_2(self):
765
716
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
766
717
nodes = self.make_nodes(160, 2, 2)
767
718
for node in nodes:
768
719
builder.add_node(*node)
769
t = transport.get_transport_from_url('trace+' + self.get_url(''))
770
size = t.put_file('index', builder.finish())
771
self.assertEqualApproxCompressed(17692, size)
772
index = btree_index.BTreeGraphIndex(t, 'index', size)
774
self.assertEqual([], t._activity)
720
transport = get_transport('trace+' + self.get_url(''))
721
size = transport.put_file('index', builder.finish())
722
self.assertEqual(17692, size)
723
index = btree_index.BTreeGraphIndex(transport, 'index', size)
724
del transport._activity[:]
725
self.assertEqual([], transport._activity)
775
726
self.assertEqual(320, index.key_count())
776
727
# The entire index should not have been read.
777
728
self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
780
731
def test_validate_one_page(self):
781
732
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
782
733
nodes = self.make_nodes(45, 2, 2)
783
734
for node in nodes:
784
735
builder.add_node(*node)
785
t = transport.get_transport_from_url('trace+' + self.get_url(''))
786
size = t.put_file('index', builder.finish())
787
index = btree_index.BTreeGraphIndex(t, 'index', size)
789
self.assertEqual([], t._activity)
736
transport = get_transport('trace+' + self.get_url(''))
737
size = transport.put_file('index', builder.finish())
738
index = btree_index.BTreeGraphIndex(transport, 'index', size)
739
del transport._activity[:]
740
self.assertEqual([], transport._activity)
791
742
# The entire index should have been read linearly.
792
743
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
794
self.assertEqualApproxCompressed(1488, size)
745
self.assertEqual(1514, size)
796
747
def test_validate_two_pages(self):
797
748
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
798
749
nodes = self.make_nodes(80, 2, 2)
799
750
for node in nodes:
800
751
builder.add_node(*node)
801
t = transport.get_transport_from_url('trace+' + self.get_url(''))
802
size = t.put_file('index', builder.finish())
752
transport = get_transport('trace+' + self.get_url(''))
753
size = transport.put_file('index', builder.finish())
803
754
# Root page, 2 leaf pages
804
self.assertEqualApproxCompressed(9339, size)
805
index = btree_index.BTreeGraphIndex(t, 'index', size)
807
self.assertEqual([], t._activity)
755
self.assertEqual(9339, size)
756
index = btree_index.BTreeGraphIndex(transport, 'index', size)
757
del transport._activity[:]
758
self.assertEqual([], transport._activity)
809
rem = size - 8192 # Number of remaining bytes after second block
810
760
# The entire index should have been read linearly.
812
[('readv', 'index', [(0, 4096)], False, None),
813
('readv', 'index', [(4096, 4096), (8192, rem)], False, None)],
761
self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
762
('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
815
764
# XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
816
765
# node and make validate find them.
818
767
def test_eq_ne(self):
819
768
# two indices are equal when constructed with the same parameters:
820
t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
821
t2 = self.get_transport()
823
btree_index.BTreeGraphIndex(t1, 'index', None)
824
== btree_index.BTreeGraphIndex(t1, 'index', None))
826
btree_index.BTreeGraphIndex(t1, 'index', 20)
827
== btree_index.BTreeGraphIndex(t1, 'index', 20))
829
btree_index.BTreeGraphIndex(t1, 'index', 20)
830
== btree_index.BTreeGraphIndex(t2, 'index', 20))
832
btree_index.BTreeGraphIndex(t1, 'inde1', 20)
833
== btree_index.BTreeGraphIndex(t1, 'inde2', 20))
835
btree_index.BTreeGraphIndex(t1, 'index', 10)
836
== btree_index.BTreeGraphIndex(t1, 'index', 20))
838
btree_index.BTreeGraphIndex(t1, 'index', None)
839
!= btree_index.BTreeGraphIndex(t1, 'index', None))
841
btree_index.BTreeGraphIndex(t1, 'index', 20)
842
!= btree_index.BTreeGraphIndex(t1, 'index', 20))
844
btree_index.BTreeGraphIndex(t1, 'index', 20)
845
!= btree_index.BTreeGraphIndex(t2, 'index', 20))
847
btree_index.BTreeGraphIndex(t1, 'inde1', 20)
848
!= btree_index.BTreeGraphIndex(t1, 'inde2', 20))
850
btree_index.BTreeGraphIndex(t1, 'index', 10)
851
!= btree_index.BTreeGraphIndex(t1, 'index', 20))
853
def test_key_too_big(self):
854
# the size that matters here is the _compressed_ size of the key, so we can't
855
# do a simple character repeat.
856
bigKey = b''.join(b'%d' % n for n in range(btree_index._PAGE_SIZE))
857
self.assertRaises(_mod_index.BadIndexKey,
859
nodes=[((bigKey,), b'value', ())])
769
transport1 = get_transport('trace+' + self.get_url(''))
770
transport2 = get_transport(self.get_url(''))
772
btree_index.BTreeGraphIndex(transport1, 'index', None) ==
773
btree_index.BTreeGraphIndex(transport1, 'index', None))
775
btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
776
btree_index.BTreeGraphIndex(transport1, 'index', 20))
778
btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
779
btree_index.BTreeGraphIndex(transport2, 'index', 20))
781
btree_index.BTreeGraphIndex(transport1, 'inde1', 20) ==
782
btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
784
btree_index.BTreeGraphIndex(transport1, 'index', 10) ==
785
btree_index.BTreeGraphIndex(transport1, 'index', 20))
787
btree_index.BTreeGraphIndex(transport1, 'index', None) !=
788
btree_index.BTreeGraphIndex(transport1, 'index', None))
790
btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
791
btree_index.BTreeGraphIndex(transport1, 'index', 20))
793
btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
794
btree_index.BTreeGraphIndex(transport2, 'index', 20))
796
btree_index.BTreeGraphIndex(transport1, 'inde1', 20) !=
797
btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
799
btree_index.BTreeGraphIndex(transport1, 'index', 10) !=
800
btree_index.BTreeGraphIndex(transport1, 'index', 20))
861
802
def test_iter_all_only_root_no_size(self):
862
self.make_index(nodes=[((b'key',), b'value', ())])
863
t = transport.get_transport_from_url('trace+' + self.get_url(''))
864
index = btree_index.BTreeGraphIndex(t, 'index', None)
866
self.assertEqual([((b'key',), b'value')],
803
self.make_index(nodes=[(('key',), 'value', ())])
804
trans = get_transport('trace+' + self.get_url(''))
805
index = btree_index.BTreeGraphIndex(trans, 'index', None)
806
del trans._activity[:]
807
self.assertEqual([(('key',), 'value')],
867
808
[x[1:] for x in index.iter_all_entries()])
868
self.assertEqual([('get', 'index')], t._activity)
809
self.assertEqual([('get', 'index')], trans._activity)
870
811
def test_iter_all_entries_reads(self):
871
812
# iterating all entries reads the header, then does a linear
940
891
self.assertEqual(nodes[30], bare_nodes[0])
941
892
# Should have read the root node, then one leaf page:
942
893
self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
943
('readv', 'index', [(8192, 4096), ], False, None)],
894
('readv', 'index', [(8192, 4096), ], False, None)],
946
897
def test_iter_key_prefix_1_element_key_None(self):
947
898
index = self.make_index()
948
self.assertRaises(_mod_index.BadIndexKey, list,
949
index.iter_entries_prefix([(None, )]))
899
self.assertRaises(errors.BadIndexKey, list,
900
index.iter_entries_prefix([(None, )]))
951
902
def test_iter_key_prefix_wrong_length(self):
952
903
index = self.make_index()
953
self.assertRaises(_mod_index.BadIndexKey, list,
954
index.iter_entries_prefix([(b'foo', None)]))
904
self.assertRaises(errors.BadIndexKey, list,
905
index.iter_entries_prefix([('foo', None)]))
955
906
index = self.make_index(key_elements=2)
956
self.assertRaises(_mod_index.BadIndexKey, list,
957
index.iter_entries_prefix([(b'foo', )]))
958
self.assertRaises(_mod_index.BadIndexKey, list,
959
index.iter_entries_prefix([(b'foo', None, None)]))
907
self.assertRaises(errors.BadIndexKey, list,
908
index.iter_entries_prefix([('foo', )]))
909
self.assertRaises(errors.BadIndexKey, list,
910
index.iter_entries_prefix([('foo', None, None)]))
961
912
def test_iter_key_prefix_1_key_element_no_refs(self):
962
index = self.make_index(nodes=[
963
((b'name', ), b'data', ()),
964
((b'ref', ), b'refdata', ())])
965
self.assertEqual({(index, (b'name', ), b'data'),
966
(index, (b'ref', ), b'refdata')},
967
set(index.iter_entries_prefix([(b'name', ), (b'ref', )])))
913
index = self.make_index( nodes=[
914
(('name', ), 'data', ()),
915
(('ref', ), 'refdata', ())])
916
self.assertEqual(set([(index, ('name', ), 'data'),
917
(index, ('ref', ), 'refdata')]),
918
set(index.iter_entries_prefix([('name', ), ('ref', )])))
969
920
def test_iter_key_prefix_1_key_element_refs(self):
970
921
index = self.make_index(1, nodes=[
971
((b'name', ), b'data', ([(b'ref', )], )),
972
((b'ref', ), b'refdata', ([], ))])
973
self.assertEqual({(index, (b'name', ), b'data', (((b'ref',),),)),
974
(index, (b'ref', ), b'refdata', ((), ))},
975
set(index.iter_entries_prefix([(b'name', ), (b'ref', )])))
922
(('name', ), 'data', ([('ref', )], )),
923
(('ref', ), 'refdata', ([], ))])
924
self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),),)),
925
(index, ('ref', ), 'refdata', ((), ))]),
926
set(index.iter_entries_prefix([('name', ), ('ref', )])))
977
928
def test_iter_key_prefix_2_key_element_no_refs(self):
978
929
index = self.make_index(key_elements=2, nodes=[
979
((b'name', b'fin1'), b'data', ()),
980
((b'name', b'fin2'), b'beta', ()),
981
((b'ref', b'erence'), b'refdata', ())])
982
self.assertEqual({(index, (b'name', b'fin1'), b'data'),
983
(index, (b'ref', b'erence'), b'refdata')},
984
set(index.iter_entries_prefix(
985
[(b'name', b'fin1'), (b'ref', b'erence')])))
986
self.assertEqual({(index, (b'name', b'fin1'), b'data'),
987
(index, (b'name', b'fin2'), b'beta')},
988
set(index.iter_entries_prefix([(b'name', None)])))
930
(('name', 'fin1'), 'data', ()),
931
(('name', 'fin2'), 'beta', ()),
932
(('ref', 'erence'), 'refdata', ())])
933
self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
934
(index, ('ref', 'erence'), 'refdata')]),
935
set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
936
self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
937
(index, ('name', 'fin2'), 'beta')]),
938
set(index.iter_entries_prefix([('name', None)])))
990
940
def test_iter_key_prefix_2_key_element_refs(self):
991
941
index = self.make_index(1, key_elements=2, nodes=[
992
((b'name', b'fin1'), b'data', ([(b'ref', b'erence')], )),
993
((b'name', b'fin2'), b'beta', ([], )),
994
((b'ref', b'erence'), b'refdata', ([], ))])
996
{(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)),
997
(index, (b'ref', b'erence'), b'refdata', ((), ))},
998
set(index.iter_entries_prefix(
999
[(b'name', b'fin1'), (b'ref', b'erence')])))
1001
{(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)),
1002
(index, (b'name', b'fin2'), b'beta', ((), ))},
1003
set(index.iter_entries_prefix([(b'name', None)])))
942
(('name', 'fin1'), 'data', ([('ref', 'erence')], )),
943
(('name', 'fin2'), 'beta', ([], )),
944
(('ref', 'erence'), 'refdata', ([], ))])
945
self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
946
(index, ('ref', 'erence'), 'refdata', ((), ))]),
947
set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
948
self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
949
(index, ('name', 'fin2'), 'beta', ((), ))]),
950
set(index.iter_entries_prefix([('name', None)])))
1005
952
# XXX: external_references tests are duplicated in test_index. We
1006
953
# probably should have per_graph_index tests...
1011
958
def test_external_references_no_results(self):
1012
959
index = self.make_index(ref_lists=1, nodes=[
1013
((b'key',), b'value', ([],))])
960
(('key',), 'value', ([],))])
1014
961
self.assertEqual(set(), index.external_references(0))
1016
963
def test_external_references_missing_ref(self):
1017
missing_key = (b'missing',)
964
missing_key = ('missing',)
1018
965
index = self.make_index(ref_lists=1, nodes=[
1019
((b'key',), b'value', ([missing_key],))])
1020
self.assertEqual({missing_key}, index.external_references(0))
966
(('key',), 'value', ([missing_key],))])
967
self.assertEqual(set([missing_key]), index.external_references(0))
1022
969
def test_external_references_multiple_ref_lists(self):
1023
missing_key = (b'missing',)
970
missing_key = ('missing',)
1024
971
index = self.make_index(ref_lists=2, nodes=[
1025
((b'key',), b'value', ([], [missing_key]))])
972
(('key',), 'value', ([], [missing_key]))])
1026
973
self.assertEqual(set([]), index.external_references(0))
1027
self.assertEqual({missing_key}, index.external_references(1))
974
self.assertEqual(set([missing_key]), index.external_references(1))
1029
976
def test_external_references_two_records(self):
1030
977
index = self.make_index(ref_lists=1, nodes=[
1031
((b'key-1',), b'value', ([(b'key-2',)],)),
1032
((b'key-2',), b'value', ([],)),
978
(('key-1',), 'value', ([('key-2',)],)),
979
(('key-2',), 'value', ([],)),
1034
981
self.assertEqual(set([]), index.external_references(0))
1036
def test__find_ancestors_one_page(self):
1039
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1040
(key1, b'value', ([key2],)),
1041
(key2, b'value', ([],)),
1044
missing_keys = set()
1045
search_keys = index._find_ancestors(
1046
[key1], 0, parent_map, missing_keys)
1047
self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1048
self.assertEqual(set(), missing_keys)
1049
self.assertEqual(set(), search_keys)
1051
def test__find_ancestors_one_page_w_missing(self):
1055
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1056
(key1, b'value', ([key2],)),
1057
(key2, b'value', ([],)),
1060
missing_keys = set()
1061
search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1063
self.assertEqual({key2: ()}, parent_map)
1064
# we know that key3 is missing because we read the page that it would
1066
self.assertEqual({key3}, missing_keys)
1067
self.assertEqual(set(), search_keys)
1069
def test__find_ancestors_one_parent_missing(self):
1073
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1074
(key1, b'value', ([key2],)),
1075
(key2, b'value', ([key3],)),
1078
missing_keys = set()
1079
search_keys = index._find_ancestors([key1], 0, parent_map,
1081
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1082
self.assertEqual(set(), missing_keys)
1083
# all we know is that key3 wasn't present on the page we were reading
1084
# but if you look, the last key is key2 which comes before key3, so we
1085
# don't know whether key3 would land on this page or not.
1086
self.assertEqual({key3}, search_keys)
1087
search_keys = index._find_ancestors(search_keys, 0, parent_map,
1089
# passing it back in, we are sure it is 'missing'
1090
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1091
self.assertEqual({key3}, missing_keys)
1092
self.assertEqual(set([]), search_keys)
1094
def test__find_ancestors_dont_search_known(self):
1098
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1099
(key1, b'value', ([key2],)),
1100
(key2, b'value', ([key3],)),
1101
(key3, b'value', ([],)),
1103
# We already know about key2, so we won't try to search for key3
1104
parent_map = {key2: (key3,)}
1105
missing_keys = set()
1106
search_keys = index._find_ancestors([key1], 0, parent_map,
1108
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1109
self.assertEqual(set(), missing_keys)
1110
self.assertEqual(set(), search_keys)
1112
def test__find_ancestors_multiple_pages(self):
1113
# We need to use enough keys that we actually cause a split
1114
start_time = 1249671539
1115
email = "joebob@example.com"
1119
for i in range(400):
1120
rev_id = ('%s-%s-%s' % (email,
1121
osutils.compact_date(start_time + i),
1122
osutils.rand_chars(16))).encode('ascii')
1124
nodes.append((rev_key, b'value', ref_lists))
1125
# We have a ref 'list' of length 1, with a list of parents, with 1
1126
# parent which is a key
1127
ref_lists = ((rev_key,),)
1128
rev_keys.append(rev_key)
1129
index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1130
self.assertEqual(400, index.key_count())
1131
self.assertEqual(3, len(index._row_offsets))
1132
nodes = dict(index._read_nodes([1, 2]))
1135
min_l2_key = l2.min_key
1136
max_l1_key = l1.max_key
1137
self.assertTrue(max_l1_key < min_l2_key)
1138
parents_min_l2_key = l2[min_l2_key][1][0]
1139
self.assertEqual((l1.max_key,), parents_min_l2_key)
1140
# Now, whatever key we select that would fall on the second page,
1141
# should give us all the parents until the page break
1142
key_idx = rev_keys.index(min_l2_key)
1143
next_key = rev_keys[key_idx + 1]
1144
# So now when we get the parent map, we should get the key we are
1145
# looking for, min_l2_key, and then a reference to go look for the
1146
# parent of that key
1148
missing_keys = set()
1149
search_keys = index._find_ancestors([next_key], 0, parent_map,
1151
self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1152
self.assertEqual(set(), missing_keys)
1153
self.assertEqual({max_l1_key}, search_keys)
1155
search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1157
self.assertEqual(l1.all_keys(), sorted(parent_map))
1158
self.assertEqual(set(), missing_keys)
1159
self.assertEqual(set(), search_keys)
1161
def test__find_ancestors_empty_index(self):
1162
index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1164
missing_keys = set()
1165
search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1167
self.assertEqual(set(), search_keys)
1168
self.assertEqual({}, parent_map)
1169
self.assertEqual({('one',), ('two',)}, missing_keys)
1171
def test_supports_unlimited_cache(self):
1172
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
1173
# We need enough nodes to cause a page split (so we have both an
1174
# internal node and a couple leaf nodes. 500 seems to be enough.)
1175
nodes = self.make_nodes(500, 1, 0)
1177
builder.add_node(*node)
1178
stream = builder.finish()
1179
trans = self.get_transport()
1180
size = trans.put_file('index', stream)
1181
index = btree_index.BTreeGraphIndex(trans, 'index', size)
1182
self.assertEqual(500, index.key_count())
1183
# We have an internal node
1184
self.assertEqual(2, len(index._row_lengths))
1185
# We have at least 2 leaf nodes
1186
self.assertTrue(index._row_lengths[-1] >= 2)
1187
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1188
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1189
index._leaf_node_cache._max_cache)
1190
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1191
self.assertEqual(100, index._internal_node_cache._max_cache)
1192
# No change if unlimited_cache=False is passed
1193
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1194
unlimited_cache=False)
1195
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1196
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1197
index._leaf_node_cache._max_cache)
1198
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1199
self.assertEqual(100, index._internal_node_cache._max_cache)
1200
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1201
unlimited_cache=True)
1202
self.assertIsInstance(index._leaf_node_cache, dict)
1203
self.assertIs(type(index._internal_node_cache), dict)
1204
# Exercise the lookup code
1205
entries = set(index.iter_entries([n[0] for n in nodes]))
1206
self.assertEqual(500, len(entries))
1209
984
class TestBTreeNodes(BTreeTestCase):
1211
scenarios = btreeparser_scenarios()
986
def restore_parser(self):
987
btree_index._btree_serializer = self.saved_parser
1213
989
def setUp(self):
1214
super(TestBTreeNodes, self).setUp()
1215
self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
990
BTreeTestCase.setUp(self)
991
self.saved_parser = btree_index._btree_serializer
992
self.addCleanup(self.restore_parser)
993
btree_index._btree_serializer = self.parse_btree
1217
995
def test_LeafNode_1_0(self):
1218
node_bytes = (b"type=leaf\n"
1219
b"0000000000000000000000000000000000000000\x00\x00value:0\n"
1220
b"1111111111111111111111111111111111111111\x00\x00value:1\n"
1221
b"2222222222222222222222222222222222222222\x00\x00value:2\n"
1222
b"3333333333333333333333333333333333333333\x00\x00value:3\n"
1223
b"4444444444444444444444444444444444444444\x00\x00value:4\n")
996
node_bytes = ("type=leaf\n"
997
"0000000000000000000000000000000000000000\x00\x00value:0\n"
998
"1111111111111111111111111111111111111111\x00\x00value:1\n"
999
"2222222222222222222222222222222222222222\x00\x00value:2\n"
1000
"3333333333333333333333333333333333333333\x00\x00value:3\n"
1001
"4444444444444444444444444444444444444444\x00\x00value:4\n")
1224
1002
node = btree_index._LeafNode(node_bytes, 1, 0)
1225
1003
# We do direct access, or don't care about order, to leaf nodes most of
1226
1004
# the time, so a dict is useful:
1227
1005
self.assertEqual({
1228
(b"0000000000000000000000000000000000000000",): (b"value:0", ()),
1229
(b"1111111111111111111111111111111111111111",): (b"value:1", ()),
1230
(b"2222222222222222222222222222222222222222",): (b"value:2", ()),
1231
(b"3333333333333333333333333333333333333333",): (b"value:3", ()),
1232
(b"4444444444444444444444444444444444444444",): (b"value:4", ()),
1233
}, dict(node.all_items()))
1006
("0000000000000000000000000000000000000000",): ("value:0", ()),
1007
("1111111111111111111111111111111111111111",): ("value:1", ()),
1008
("2222222222222222222222222222222222222222",): ("value:2", ()),
1009
("3333333333333333333333333333333333333333",): ("value:3", ()),
1010
("4444444444444444444444444444444444444444",): ("value:4", ()),
1235
1013
def test_LeafNode_2_2(self):
1236
node_bytes = (b"type=leaf\n"
1237
b"00\x0000\x00\t00\x00ref00\x00value:0\n"
1238
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1239
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1240
b"11\x0044\x00\t11\x00ref00\x00value:4\n"
1014
node_bytes = ("type=leaf\n"
1015
"00\x0000\x00\t00\x00ref00\x00value:0\n"
1016
"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1017
"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1018
"11\x0044\x00\t11\x00ref00\x00value:4\n"
1243
1021
node = btree_index._LeafNode(node_bytes, 2, 2)
1244
1022
# We do direct access, or don't care about order, to leaf nodes most of
1245
1023
# the time, so a dict is useful:
1246
1024
self.assertEqual({
1247
(b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))),
1248
(b'00', b'11'): (b'value:1', (((b'00', b'ref00'),),
1249
((b'00', b'ref00'), (b'01', b'ref01')))),
1250
(b'11', b'33'): (b'value:3', (((b'11', b'ref22'),),
1251
((b'11', b'ref22'), (b'11', b'ref22')))),
1252
(b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),)))
1253
}, dict(node.all_items()))
1025
('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1026
('00', '11'): ('value:1',
1027
((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1028
('11', '33'): ('value:3',
1029
((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1030
('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1255
1033
def test_InternalNode_1(self):
1256
node_bytes = (b"type=internal\n"
1258
b"0000000000000000000000000000000000000000\n"
1259
b"1111111111111111111111111111111111111111\n"
1260
b"2222222222222222222222222222222222222222\n"
1261
b"3333333333333333333333333333333333333333\n"
1262
b"4444444444444444444444444444444444444444\n"
1034
node_bytes = ("type=internal\n"
1036
"0000000000000000000000000000000000000000\n"
1037
"1111111111111111111111111111111111111111\n"
1038
"2222222222222222222222222222222222222222\n"
1039
"3333333333333333333333333333333333333333\n"
1040
"4444444444444444444444444444444444444444\n"
1264
1042
node = btree_index._InternalNode(node_bytes)
1265
1043
# We want to bisect to find the right children from this node, so a
1266
1044
# vector is most useful.
1267
1045
self.assertEqual([
1268
(b"0000000000000000000000000000000000000000",),
1269
(b"1111111111111111111111111111111111111111",),
1270
(b"2222222222222222222222222222222222222222",),
1271
(b"3333333333333333333333333333333333333333",),
1272
(b"4444444444444444444444444444444444444444",),
1046
("0000000000000000000000000000000000000000",),
1047
("1111111111111111111111111111111111111111",),
1048
("2222222222222222222222222222222222222222",),
1049
("3333333333333333333333333333333333333333",),
1050
("4444444444444444444444444444444444444444",),
1274
1052
self.assertEqual(1, node.offset)
1276
1054
def test_LeafNode_2_2(self):
1277
node_bytes = (b"type=leaf\n"
1278
b"00\x0000\x00\t00\x00ref00\x00value:0\n"
1279
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1280
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1281
b"11\x0044\x00\t11\x00ref00\x00value:4\n"
1055
node_bytes = ("type=leaf\n"
1056
"00\x0000\x00\t00\x00ref00\x00value:0\n"
1057
"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1058
"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1059
"11\x0044\x00\t11\x00ref00\x00value:4\n"
1284
1062
node = btree_index._LeafNode(node_bytes, 2, 2)
1285
1063
# We do direct access, or don't care about order, to leaf nodes most of
1286
1064
# the time, so a dict is useful:
1287
1065
self.assertEqual({
1288
(b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))),
1289
(b'00', b'11'): (b'value:1', (((b'00', b'ref00'),),
1290
((b'00', b'ref00'), (b'01', b'ref01')))),
1291
(b'11', b'33'): (b'value:3', (((b'11', b'ref22'),),
1292
((b'11', b'ref22'), (b'11', b'ref22')))),
1293
(b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),)))
1294
}, dict(node.all_items()))
1066
('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1067
('00', '11'): ('value:1',
1068
((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1069
('11', '33'): ('value:3',
1070
((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1071
('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1296
1074
def assertFlattened(self, expected, key, value, refs):
1297
1075
flat_key, flat_line = self.parse_btree._flatten_node(
1298
1076
(None, key, value, refs), bool(refs))
1299
self.assertEqual(b'\x00'.join(key), flat_key)
1077
self.assertEqual('\x00'.join(key), flat_key)
1300
1078
self.assertEqual(expected, flat_line)
1302
1080
def test__flatten_node(self):
1303
self.assertFlattened(b'key\0\0value\n', (b'key',), b'value', [])
1304
self.assertFlattened(b'key\0tuple\0\0value str\n',
1305
(b'key', b'tuple'), b'value str', [])
1306
self.assertFlattened(b'key\0tuple\0triple\0\0value str\n',
1307
(b'key', b'tuple', b'triple'), b'value str', [])
1308
self.assertFlattened(b'k\0t\0s\0ref\0value str\n',
1309
(b'k', b't', b's'), b'value str', [[(b'ref',)]])
1310
self.assertFlattened(b'key\0tuple\0ref\0key\0value str\n',
1311
(b'key', b'tuple'), b'value str', [[(b'ref', b'key')]])
1312
self.assertFlattened(b"00\x0000\x00\t00\x00ref00\x00value:0\n",
1313
(b'00', b'00'), b'value:0', ((), ((b'00', b'ref00'),)))
1314
self.assertFlattened(
1315
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1316
(b'00', b'11'), b'value:1',
1317
(((b'00', b'ref00'),), ((b'00', b'ref00'), (b'01', b'ref01'))))
1318
self.assertFlattened(
1319
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1320
(b'11', b'33'), b'value:3',
1321
(((b'11', b'ref22'),), ((b'11', b'ref22'), (b'11', b'ref22'))))
1322
self.assertFlattened(
1323
b"11\x0044\x00\t11\x00ref00\x00value:4\n",
1324
(b'11', b'44'), b'value:4', ((), ((b'11', b'ref00'),)))
1081
self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
1082
self.assertFlattened('key\0tuple\0\0value str\n',
1083
('key', 'tuple'), 'value str', [])
1084
self.assertFlattened('key\0tuple\0triple\0\0value str\n',
1085
('key', 'tuple', 'triple'), 'value str', [])
1086
self.assertFlattened('k\0t\0s\0ref\0value str\n',
1087
('k', 't', 's'), 'value str', [[('ref',)]])
1088
self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
1089
('key', 'tuple'), 'value str', [[('ref', 'key')]])
1090
self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
1091
('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
1092
self.assertFlattened(
1093
"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1094
('00', '11'), 'value:1',
1095
((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
1096
self.assertFlattened(
1097
"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1098
('11', '33'), 'value:3',
1099
((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
1100
self.assertFlattened(
1101
"11\x0044\x00\t11\x00ref00\x00value:4\n",
1102
('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
1327
1105
class TestCompiledBtree(tests.TestCase):