/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3641.3.29 by John Arbash Meinel
Cleanup the copyright headers
1
# Copyright (C) 2008 Canonical Ltd
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
2
#
3
# This program is free software; you can redistribute it and/or modify
3641.3.29 by John Arbash Meinel
Cleanup the copyright headers
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3641.3.29 by John Arbash Meinel
Cleanup the copyright headers
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
16
#
17
18
"""Tests for btree indices."""
19
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
20
import pprint
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
21
import zlib
22
23
from bzrlib import (
24
    btree_index,
25
    errors,
26
    tests,
27
    )
28
from bzrlib.tests import (
29
    TestCaseWithTransport,
30
    TestScenarioApplier,
31
    adapt_tests,
32
    condition_isinstance,
33
    split_suite_by_condition,
34
    )
35
from bzrlib.transport import get_transport
36
37
38
def load_tests(standard_tests, module, loader):
39
    # parameterise the TestBTreeNodes tests
40
    node_tests, others = split_suite_by_condition(standard_tests,
41
        condition_isinstance(TestBTreeNodes))
42
    applier = TestScenarioApplier()
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
43
    import bzrlib._btree_serializer_py as py_module
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
44
    applier.scenarios = [('python', {'parse_btree': py_module})]
45
    if CompiledBtreeParserFeature.available():
46
        # Is there a way to do this that gets missing feature failures rather
47
        # than no indication to the user?
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
48
        import bzrlib._btree_serializer_c as c_module
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
49
        applier.scenarios.append(('C', {'parse_btree': c_module}))
50
    adapt_tests(node_tests, applier, others)
51
    return others
52
53
54
class _CompiledBtreeParserFeature(tests.Feature):
55
    def _probe(self):
56
        try:
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
57
            import bzrlib._btree_serializer_c
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
58
        except ImportError:
59
            return False
60
        return True
61
62
    def feature_name(self):
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
63
        return 'bzrlib._btree_serializer_c'
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
64
65
CompiledBtreeParserFeature = _CompiledBtreeParserFeature()
66
67
68
class BTreeTestCase(TestCaseWithTransport):
69
    # test names here are suffixed by the key length and reference list count
70
    # that they test.
71
72
    def setUp(self):
73
        TestCaseWithTransport.setUp(self)
74
        self._original_header = btree_index._RESERVED_HEADER_BYTES
75
        def restore():
76
            btree_index._RESERVED_HEADER_BYTES = self._original_header
77
        self.addCleanup(restore)
78
        btree_index._RESERVED_HEADER_BYTES = 100
79
80
    def make_nodes(self, count, key_elements, reference_lists):
81
        """Generate count*key_elements sample nodes."""
82
        keys = []
83
        for prefix_pos in range(key_elements):
84
            if key_elements - 1:
85
                prefix = (str(prefix_pos) * 40,)
86
            else:
87
                prefix = ()
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
88
            for pos in xrange(count):
89
                # TODO: This creates odd keys. When count == 100,000, it
90
                #       creates a 240 byte key
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
91
                key = prefix + (str(pos) * 40,)
92
                value = "value:%s" % pos
93
                if reference_lists:
94
                    # generate some references
95
                    refs = []
96
                    for list_pos in range(reference_lists):
97
                        # as many keys in each list as its index + the key depth
98
                        # mod 2 - this generates both 0 length lists and
99
                        # ones slightly longer than the number of lists.
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
100
                        # It also ensures we have non homogeneous lists.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
101
                        refs.append([])
102
                        for ref_pos in range(list_pos + pos % 2):
103
                            if pos % 2:
104
                                # refer to a nearby key
105
                                refs[-1].append(prefix + ("ref" + str(pos - 1) * 40,))
106
                            else:
107
                                # serial of this ref in the ref list
108
                                refs[-1].append(prefix + ("ref" + str(ref_pos) * 40,))
109
                        refs[-1] = tuple(refs[-1])
110
                    refs = tuple(refs)
111
                else:
112
                    refs = ()
113
                keys.append((key, value, refs))
114
        return keys
115
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
116
    def shrink_page_size(self):
117
        """Shrink the default page size so that less fits in a page."""
118
        old_page_size = btree_index._PAGE_SIZE
119
        def cleanup():
120
            btree_index._PAGE_SIZE = old_page_size
121
        self.addCleanup(cleanup)
122
        btree_index._PAGE_SIZE = 2048
123
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
124
125
class TestBTreeBuilder(BTreeTestCase):
126
127
    def test_empty_1_0(self):
128
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
129
        # NamedTemporaryFile dies on builder.finish().read(). weird.
130
        temp_file = builder.finish()
131
        content = temp_file.read()
132
        del temp_file
133
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
134
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
135
            "row_lengths=\n",
136
            content)
137
138
    def test_empty_2_1(self):
139
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
140
        # NamedTemporaryFile dies on builder.finish().read(). weird.
141
        temp_file = builder.finish()
142
        content = temp_file.read()
143
        del temp_file
144
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
145
            "B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
146
            "row_lengths=\n",
147
            content)
148
149
    def test_root_leaf_1_0(self):
150
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
151
        nodes = self.make_nodes(5, 1, 0)
152
        for node in nodes:
153
            builder.add_node(*node)
154
        # NamedTemporaryFile dies on builder.finish().read(). weird.
155
        temp_file = builder.finish()
156
        content = temp_file.read()
157
        del temp_file
158
        self.assertEqual(158, len(content))
159
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
160
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
161
            "row_lengths=1\n",
162
            content[:73])
163
        node_content = content[73:]
164
        node_bytes = zlib.decompress(node_content)
165
        expected_node = ("type=leaf\n"
166
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
167
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
168
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
169
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
170
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
171
        self.assertEqual(expected_node, node_bytes)
172
173
    def test_root_leaf_2_2(self):
174
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
175
        nodes = self.make_nodes(5, 2, 2)
176
        for node in nodes:
177
            builder.add_node(*node)
178
        # NamedTemporaryFile dies on builder.finish().read(). weird.
179
        temp_file = builder.finish()
180
        content = temp_file.read()
181
        del temp_file
182
        self.assertEqual(264, len(content))
183
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
184
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
185
            "row_lengths=1\n",
186
            content[:74])
187
        node_content = content[74:]
188
        node_bytes = zlib.decompress(node_content)
189
        expected_node = (
190
            "type=leaf\n"
191
            "0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
192
            "0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
193
            "0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
194
            "0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
195
            "0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
196
            "1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
197
            "1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
198
            "1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
199
            "1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
200
            "1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
201
            ""
202
            )
203
        self.assertEqual(expected_node, node_bytes)
204
205
    def test_2_leaves_1_0(self):
206
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
207
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
208
        for node in nodes:
209
            builder.add_node(*node)
210
        # NamedTemporaryFile dies on builder.finish().read(). weird.
211
        temp_file = builder.finish()
212
        content = temp_file.read()
213
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
214
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
215
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
216
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
217
            "row_lengths=1,2\n",
218
            content[:77])
219
        root = content[77:4096]
220
        leaf1 = content[4096:8192]
221
        leaf2 = content[8192:]
222
        root_bytes = zlib.decompress(root)
223
        expected_root = (
224
            "type=internal\n"
225
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
226
            ) + ("307" * 40) + "\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
227
        self.assertEqual(expected_root, root_bytes)
228
        # We already know serialisation works for leaves, check key selection:
229
        leaf1_bytes = zlib.decompress(leaf1)
230
        sorted_node_keys = sorted(node[0] for node in nodes)
231
        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
232
        self.assertEqual(231, len(node.keys))
233
        self.assertEqual(sorted_node_keys[:231], sorted(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
234
        leaf2_bytes = zlib.decompress(leaf2)
235
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
236
        self.assertEqual(400 - 231, len(node.keys))
237
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
238
239
    def test_last_page_rounded_1_layer(self):
240
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
241
        nodes = self.make_nodes(10, 1, 0)
242
        for node in nodes:
243
            builder.add_node(*node)
244
        # NamedTemporaryFile dies on builder.finish().read(). weird.
245
        temp_file = builder.finish()
246
        content = temp_file.read()
247
        del temp_file
248
        self.assertEqual(181, len(content))
249
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
250
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
251
            "row_lengths=1\n",
252
            content[:74])
253
        # Check thelast page is well formed
254
        leaf2 = content[74:]
255
        leaf2_bytes = zlib.decompress(leaf2)
256
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
257
        self.assertEqual(10, len(node.keys))
258
        sorted_node_keys = sorted(node[0] for node in nodes)
259
        self.assertEqual(sorted_node_keys, sorted(node.keys))
260
261
    def test_last_page_not_rounded_2_layer(self):
262
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
263
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
264
        for node in nodes:
265
            builder.add_node(*node)
266
        # NamedTemporaryFile dies on builder.finish().read(). weird.
267
        temp_file = builder.finish()
268
        content = temp_file.read()
269
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
270
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
271
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
272
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
273
            "row_lengths=1,2\n",
274
            content[:77])
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
275
        # Check the last page is well formed
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
276
        leaf2 = content[8192:]
277
        leaf2_bytes = zlib.decompress(leaf2)
278
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
279
        self.assertEqual(400 - 231, len(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
280
        sorted_node_keys = sorted(node[0] for node in nodes)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
281
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
282
283
    def test_three_level_tree_details(self):
284
        # The left most pointer in the second internal node in a row should
285
        # pointer to the second node that the internal node is for, _not_
286
        # the first, otherwise the first node overlaps with the last node of
287
        # the prior internal node on that row.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
288
        self.shrink_page_size()
289
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
290
        # 40K nodes is enough to create a two internal nodes on the second
291
        # level, with a 2K page size
292
        nodes = self.make_nodes(20000, 2, 2)
3641.3.5 by John Arbash Meinel
For iter_all and three_level tests adjust spill-at.
293
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
294
        for node in nodes:
295
            builder.add_node(*node)
296
        transport = get_transport('trace+' + self.get_url(''))
3641.3.7 by John Arbash Meinel
Make it easier to profile the btree writer time
297
        size = transport.put_file('index', self.time(builder.finish))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
298
        del builder
299
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
300
        # Seed the metadata, we're using internal calls now.
301
        index.key_count()
302
        self.assertEqual(3, len(index._row_lengths),
303
            "Not enough rows: %r" % index._row_lengths)
304
        self.assertEqual(4, len(index._row_offsets))
305
        self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
306
        internal_nodes = index._get_internal_nodes([0, 1, 2])
307
        root_node = internal_nodes[0]
308
        internal_node1 = internal_nodes[1]
309
        internal_node2 = internal_nodes[2]
310
        # The left most node node2 points at should be one after the right most
311
        # node pointed at by node1.
312
        self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
313
        # The left most key of the second node pointed at by internal_node2
314
        # should be its first key. We can check this by looking for its first key
315
        # in the second node it points at
316
        pos = index._row_offsets[2] + internal_node2.offset + 1
317
        leaf = index._get_leaf_nodes([pos])[pos]
318
        self.assertTrue(internal_node2.keys[0] in leaf.keys)
319
320
    def test_2_leaves_2_2(self):
321
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
322
        nodes = self.make_nodes(100, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
323
        for node in nodes:
324
            builder.add_node(*node)
325
        # NamedTemporaryFile dies on builder.finish().read(). weird.
326
        temp_file = builder.finish()
327
        content = temp_file.read()
328
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
329
        self.assertEqual(12643, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
330
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
331
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
332
            "row_lengths=1,3\n",
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
333
            content[:77])
334
        root = content[77:4096]
335
        leaf1 = content[4096:8192]
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
336
        leaf2 = content[8192:12288]
337
        leaf3 = content[12288:]
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
338
        root_bytes = zlib.decompress(root)
339
        expected_root = (
340
            "type=internal\n"
341
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
342
            + ("0" * 40) + "\x00" + ("91" * 40) + "\n"
343
            + ("1" * 40) + "\x00" + ("81" * 40) + "\n"
344
            )
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
345
        self.assertEqual(expected_root, root_bytes)
346
        # We assume the other leaf nodes have been written correctly - layering
347
        # FTW.
348
349
    def test_spill_index_stress_1_1(self):
350
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
351
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
352
        builder.add_node(*nodes[0])
353
        # Test the parts of the index that take up memory are doing so
354
        # predictably.
355
        self.assertEqual(1, len(builder._nodes))
356
        self.assertEqual(1, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
357
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
358
        builder.add_node(*nodes[1])
359
        self.assertEqual(0, len(builder._nodes))
360
        self.assertEqual(0, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
361
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
362
        self.assertEqual(1, len(builder._backing_indices))
363
        self.assertEqual(2, builder._backing_indices[0].key_count())
364
        # now back to memory
365
        builder.add_node(*nodes[2])
366
        self.assertEqual(1, len(builder._nodes))
367
        self.assertEqual(1, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
368
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
369
        # And spills to a second backing index combing all
370
        builder.add_node(*nodes[3])
371
        self.assertEqual(0, len(builder._nodes))
372
        self.assertEqual(0, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
373
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
374
        self.assertEqual(2, len(builder._backing_indices))
375
        self.assertEqual(None, builder._backing_indices[0])
376
        self.assertEqual(4, builder._backing_indices[1].key_count())
377
        # The next spills to the 2-len slot
378
        builder.add_node(*nodes[4])
379
        builder.add_node(*nodes[5])
380
        self.assertEqual(0, len(builder._nodes))
381
        self.assertEqual(0, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
382
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
383
        self.assertEqual(2, len(builder._backing_indices))
384
        self.assertEqual(2, builder._backing_indices[0].key_count())
385
        self.assertEqual(4, builder._backing_indices[1].key_count())
386
        # Next spill combines
387
        builder.add_node(*nodes[6])
388
        builder.add_node(*nodes[7])
389
        self.assertEqual(3, len(builder._backing_indices))
390
        self.assertEqual(None, builder._backing_indices[0])
391
        self.assertEqual(None, builder._backing_indices[1])
392
        self.assertEqual(8, builder._backing_indices[2].key_count())
393
        # And so forth - counting up in binary.
394
        builder.add_node(*nodes[8])
395
        builder.add_node(*nodes[9])
396
        self.assertEqual(3, len(builder._backing_indices))
397
        self.assertEqual(2, builder._backing_indices[0].key_count())
398
        self.assertEqual(None, builder._backing_indices[1])
399
        self.assertEqual(8, builder._backing_indices[2].key_count())
400
        builder.add_node(*nodes[10])
401
        builder.add_node(*nodes[11])
402
        self.assertEqual(3, len(builder._backing_indices))
403
        self.assertEqual(None, builder._backing_indices[0])
404
        self.assertEqual(4, builder._backing_indices[1].key_count())
405
        self.assertEqual(8, builder._backing_indices[2].key_count())
406
        builder.add_node(*nodes[12])
407
        # Test that memory and disk are both used for query methods; and that
408
        # None is skipped over happily.
409
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
410
            list(builder.iter_all_entries()))
411
        # Two nodes - one memory one disk
412
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
413
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
414
        self.assertEqual(13, builder.key_count())
415
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
416
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
417
        builder.add_node(*nodes[13])
418
        self.assertEqual(3, len(builder._backing_indices))
419
        self.assertEqual(2, builder._backing_indices[0].key_count())
420
        self.assertEqual(4, builder._backing_indices[1].key_count())
421
        self.assertEqual(8, builder._backing_indices[2].key_count())
422
        builder.add_node(*nodes[14])
423
        builder.add_node(*nodes[15])
424
        self.assertEqual(4, len(builder._backing_indices))
425
        self.assertEqual(None, builder._backing_indices[0])
426
        self.assertEqual(None, builder._backing_indices[1])
427
        self.assertEqual(None, builder._backing_indices[2])
428
        self.assertEqual(16, builder._backing_indices[3].key_count())
429
        # Now finish, and check we got a correctly ordered tree
430
        transport = self.get_transport('')
431
        size = transport.put_file('index', builder.finish())
432
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
433
        nodes = list(index.iter_all_entries())
434
        self.assertEqual(sorted(nodes), nodes)
435
        self.assertEqual(16, len(nodes))
436
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
437
    def test_set_optimize(self):
438
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
439
        builder.set_optimize(for_size=True)
440
        self.assertTrue(builder._optimize_for_size)
441
        builder.set_optimize(for_size=False)
442
        self.assertFalse(builder._optimize_for_size)
443
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
444
    def test_spill_index_stress_2_2(self):
445
        # test that references and longer keys don't confuse things.
446
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
447
            spill_at=2)
448
        nodes = self.make_nodes(16, 2, 2)
449
        builder.add_node(*nodes[0])
450
        # Test the parts of the index that take up memory are doing so
451
        # predictably.
452
        self.assertEqual(1, len(builder._keys))
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
453
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
454
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
455
        builder.add_node(*nodes[1])
456
        self.assertEqual(0, len(builder._keys))
457
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
458
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
459
        self.assertEqual(1, len(builder._backing_indices))
460
        self.assertEqual(2, builder._backing_indices[0].key_count())
461
        # now back to memory
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
462
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
463
        builder.add_node(*nodes[2])
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
464
        self.assertEqual(1, len(builder._nodes))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
465
        self.assertEqual(1, len(builder._keys))
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
466
        self.assertIsNot(None, builder._nodes_by_key)
467
        self.assertNotEqual({}, builder._nodes_by_key)
468
        # We should have a new entry
469
        self.assertNotEqual(old, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
470
        # And spills to a second backing index combing all
471
        builder.add_node(*nodes[3])
472
        self.assertEqual(0, len(builder._nodes))
473
        self.assertEqual(0, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
474
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
475
        self.assertEqual(2, len(builder._backing_indices))
476
        self.assertEqual(None, builder._backing_indices[0])
477
        self.assertEqual(4, builder._backing_indices[1].key_count())
478
        # The next spills to the 2-len slot
479
        builder.add_node(*nodes[4])
480
        builder.add_node(*nodes[5])
481
        self.assertEqual(0, len(builder._nodes))
482
        self.assertEqual(0, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
483
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
484
        self.assertEqual(2, len(builder._backing_indices))
485
        self.assertEqual(2, builder._backing_indices[0].key_count())
486
        self.assertEqual(4, builder._backing_indices[1].key_count())
487
        # Next spill combines
488
        builder.add_node(*nodes[6])
489
        builder.add_node(*nodes[7])
490
        self.assertEqual(3, len(builder._backing_indices))
491
        self.assertEqual(None, builder._backing_indices[0])
492
        self.assertEqual(None, builder._backing_indices[1])
493
        self.assertEqual(8, builder._backing_indices[2].key_count())
494
        # And so forth - counting up in binary.
495
        builder.add_node(*nodes[8])
496
        builder.add_node(*nodes[9])
497
        self.assertEqual(3, len(builder._backing_indices))
498
        self.assertEqual(2, builder._backing_indices[0].key_count())
499
        self.assertEqual(None, builder._backing_indices[1])
500
        self.assertEqual(8, builder._backing_indices[2].key_count())
501
        builder.add_node(*nodes[10])
502
        builder.add_node(*nodes[11])
503
        self.assertEqual(3, len(builder._backing_indices))
504
        self.assertEqual(None, builder._backing_indices[0])
505
        self.assertEqual(4, builder._backing_indices[1].key_count())
506
        self.assertEqual(8, builder._backing_indices[2].key_count())
507
        builder.add_node(*nodes[12])
508
        # Test that memory and disk are both used for query methods; and that
509
        # None is skipped over happily.
510
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
511
            list(builder.iter_all_entries()))
512
        # Two nodes - one memory one disk
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
513
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
514
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
515
        self.assertEqual(13, builder.key_count())
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
516
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
517
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
518
        builder.add_node(*nodes[13])
519
        self.assertEqual(3, len(builder._backing_indices))
520
        self.assertEqual(2, builder._backing_indices[0].key_count())
521
        self.assertEqual(4, builder._backing_indices[1].key_count())
522
        self.assertEqual(8, builder._backing_indices[2].key_count())
523
        builder.add_node(*nodes[14])
524
        builder.add_node(*nodes[15])
525
        self.assertEqual(4, len(builder._backing_indices))
526
        self.assertEqual(None, builder._backing_indices[0])
527
        self.assertEqual(None, builder._backing_indices[1])
528
        self.assertEqual(None, builder._backing_indices[2])
529
        self.assertEqual(16, builder._backing_indices[3].key_count())
530
        # Now finish, and check we got a correctly ordered tree
531
        transport = self.get_transport('')
532
        size = transport.put_file('index', builder.finish())
533
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
534
        nodes = list(index.iter_all_entries())
535
        self.assertEqual(sorted(nodes), nodes)
536
        self.assertEqual(16, len(nodes))
537
538
    def test_spill_index_duplicate_key_caught_on_finish(self):
539
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
540
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
541
        builder.add_node(*nodes[0])
542
        builder.add_node(*nodes[1])
543
        builder.add_node(*nodes[0])
544
        self.assertRaises(errors.BadIndexDuplicateKey, builder.finish)
545
546
547
class TestBTreeIndex(BTreeTestCase):
548
549
    def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
550
        builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
551
            key_elements=key_elements)
552
        for key, value, references in nodes:
553
            builder.add_node(key, value, references)
554
        stream = builder.finish()
555
        trans = get_transport('trace+' + self.get_url())
556
        size = trans.put_file('index', stream)
557
        return btree_index.BTreeGraphIndex(trans, 'index', size)
558
559
    def test_trivial_constructor(self):
560
        transport = get_transport('trace+' + self.get_url(''))
561
        index = btree_index.BTreeGraphIndex(transport, 'index', None)
562
        # Checks the page size at load, but that isn't logged yet.
563
        self.assertEqual([], transport._activity)
564
565
    def test_with_size_constructor(self):
566
        transport = get_transport('trace+' + self.get_url(''))
567
        index = btree_index.BTreeGraphIndex(transport, 'index', 1)
568
        # Checks the page size at load, but that isn't logged yet.
569
        self.assertEqual([], transport._activity)
570
571
    def test_empty_key_count_no_size(self):
572
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
573
        transport = get_transport('trace+' + self.get_url(''))
574
        transport.put_file('index', builder.finish())
575
        index = btree_index.BTreeGraphIndex(transport, 'index', None)
576
        del transport._activity[:]
577
        self.assertEqual([], transport._activity)
578
        self.assertEqual(0, index.key_count())
579
        # The entire index should have been requested (as we generally have the
580
        # size available, and doing many small readvs is inappropriate).
581
        # We can't tell how much was actually read here, but - check the code.
582
        self.assertEqual([('get', 'index'),
583
            ('readv', 'index', [(0, 72)], False, None)],
584
            transport._activity)
585
586
    def test_empty_key_count(self):
587
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
588
        transport = get_transport('trace+' + self.get_url(''))
589
        size = transport.put_file('index', builder.finish())
590
        self.assertEqual(72, size)
591
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
592
        del transport._activity[:]
593
        self.assertEqual([], transport._activity)
594
        self.assertEqual(0, index.key_count())
595
        # The entire index should have been read, as 4K > size
596
        self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
597
            transport._activity)
598
599
    def test_non_empty_key_count_2_2(self):
600
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
601
        nodes = self.make_nodes(35, 2, 2)
602
        for node in nodes:
603
            builder.add_node(*node)
604
        transport = get_transport('trace+' + self.get_url(''))
605
        size = transport.put_file('index', builder.finish())
606
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
607
        del transport._activity[:]
608
        self.assertEqual([], transport._activity)
609
        self.assertEqual(70, index.key_count())
610
        # The entire index should have been read, as it is one page long.
611
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
612
            transport._activity)
3641.5.8 by John Arbash Meinel
Fix up the test suite, now that we pack more efficiently.
613
        self.assertEqual(1199, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
614
615
    def test_2_levels_key_count_2_2(self):
616
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
617
        nodes = self.make_nodes(160, 2, 2)
618
        for node in nodes:
619
            builder.add_node(*node)
620
        transport = get_transport('trace+' + self.get_url(''))
621
        size = transport.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
622
        self.assertEqual(17692, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
623
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
624
        del transport._activity[:]
625
        self.assertEqual([], transport._activity)
626
        self.assertEqual(320, index.key_count())
627
        # The entire index should not have been read.
628
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
629
            transport._activity)
630
631
    def test_validate_one_page(self):
632
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
633
        nodes = self.make_nodes(45, 2, 2)
634
        for node in nodes:
635
            builder.add_node(*node)
636
        transport = get_transport('trace+' + self.get_url(''))
637
        size = transport.put_file('index', builder.finish())
638
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
639
        del transport._activity[:]
640
        self.assertEqual([], transport._activity)
641
        index.validate()
642
        # The entire index should have been read linearly.
643
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
644
            transport._activity)
645
        self.assertEqual(1514, size)
646
647
    def test_validate_two_pages(self):
648
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
649
        nodes = self.make_nodes(80, 2, 2)
650
        for node in nodes:
651
            builder.add_node(*node)
652
        transport = get_transport('trace+' + self.get_url(''))
653
        size = transport.put_file('index', builder.finish())
654
        # Root page, 2 leaf pages
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
655
        self.assertEqual(9339, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
656
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
657
        del transport._activity[:]
658
        self.assertEqual([], transport._activity)
659
        index.validate()
660
        # The entire index should have been read linearly.
661
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
662
            ('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
663
            transport._activity)
664
        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
665
        # node and make validate find them.
666
667
    def test_eq_ne(self):
668
        # two indices are equal when constructed with the same parameters:
669
        transport1 = get_transport('trace+' + self.get_url(''))
670
        transport2 = get_transport(self.get_url(''))
671
        self.assertTrue(
672
            btree_index.BTreeGraphIndex(transport1, 'index', None) ==
673
            btree_index.BTreeGraphIndex(transport1, 'index', None))
674
        self.assertTrue(
675
            btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
676
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
677
        self.assertFalse(
678
            btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
679
            btree_index.BTreeGraphIndex(transport2, 'index', 20))
680
        self.assertFalse(
681
            btree_index.BTreeGraphIndex(transport1, 'inde1', 20) ==
682
            btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
683
        self.assertFalse(
684
            btree_index.BTreeGraphIndex(transport1, 'index', 10) ==
685
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
686
        self.assertFalse(
687
            btree_index.BTreeGraphIndex(transport1, 'index', None) !=
688
            btree_index.BTreeGraphIndex(transport1, 'index', None))
689
        self.assertFalse(
690
            btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
691
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
692
        self.assertTrue(
693
            btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
694
            btree_index.BTreeGraphIndex(transport2, 'index', 20))
695
        self.assertTrue(
696
            btree_index.BTreeGraphIndex(transport1, 'inde1', 20) !=
697
            btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
698
        self.assertTrue(
699
            btree_index.BTreeGraphIndex(transport1, 'index', 10) !=
700
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
701
702
    def test_iter_all_entries_reads(self):
703
        # iterating all entries reads the header, then does a linear
704
        # read.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
705
        self.shrink_page_size()
3641.5.11 by John Arbash Meinel
Some small test cleanup
706
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
707
        # 20k nodes is enough to create a two internal nodes on the second
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
708
        # level, with a 2K page size
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
709
        nodes = self.make_nodes(10000, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
710
        for node in nodes:
711
            builder.add_node(*node)
712
        transport = get_transport('trace+' + self.get_url(''))
713
        size = transport.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
714
        self.assertEqual(1303220, size, 'number of expected bytes in the'
715
                                        ' output changed')
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
716
        page_size = btree_index._PAGE_SIZE
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
717
        del builder
718
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
719
        del transport._activity[:]
720
        self.assertEqual([], transport._activity)
3641.5.8 by John Arbash Meinel
Fix up the test suite, now that we pack more efficiently.
721
        found_nodes = self.time(list, index.iter_all_entries())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
722
        bare_nodes = []
723
        for node in found_nodes:
724
            self.assertTrue(node[0] is index)
725
            bare_nodes.append(node[1:])
726
        self.assertEqual(3, len(index._row_lengths),
727
            "Not enough rows: %r" % index._row_lengths)
728
        # Should be as long as the nodes we supplied
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
729
        self.assertEqual(20000, len(found_nodes))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
730
        # Should have the same content
731
        self.assertEqual(set(nodes), set(bare_nodes))
732
        # Should have done linear scan IO up the index, ignoring
733
        # the internal nodes:
734
        # The entire index should have been read
735
        total_pages = sum(index._row_lengths)
736
        self.assertEqual(total_pages, index._row_offsets[-1])
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
737
        self.assertEqual(1303220, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
738
        # The start of the leaves
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
739
        first_byte = index._row_offsets[-2] * page_size
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
740
        readv_request = []
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
741
        for offset in range(first_byte, size, page_size):
742
            readv_request.append((offset, page_size))
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
743
        # The last page is truncated
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
744
        readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
745
        expected = [('readv', 'index', [(0, page_size)], False, None),
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
746
             ('readv',  'index', readv_request, False, None)]
747
        if expected != transport._activity:
748
            self.assertEqualDiff(pprint.pformat(expected),
749
                                 pprint.pformat(transport._activity))
750
751
    def _test_iter_entries_references_resolved(self):
752
        index = self.make_index(1, nodes=[
753
            (('name', ), 'data', ([('ref', ), ('ref', )], )),
754
            (('ref', ), 'refdata', ([], ))])
755
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),('ref',)),)),
756
            (index, ('ref', ), 'refdata', ((), ))]),
757
            set(index.iter_entries([('name',), ('ref',)])))
758
759
    def test_iter_entries_references_2_refs_resolved(self):
760
        # iterating some entries reads just the pages needed. For now, to
761
        # get it working and start measuring, only 4K pages are read.
762
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
763
        # 80 nodes is enough to create a two-level index.
764
        nodes = self.make_nodes(160, 2, 2)
765
        for node in nodes:
766
            builder.add_node(*node)
767
        transport = get_transport('trace+' + self.get_url(''))
768
        size = transport.put_file('index', builder.finish())
769
        del builder
770
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
771
        del transport._activity[:]
772
        self.assertEqual([], transport._activity)
773
        # search for one key
774
        found_nodes = list(index.iter_entries([nodes[30][0]]))
775
        bare_nodes = []
776
        for node in found_nodes:
777
            self.assertTrue(node[0] is index)
778
            bare_nodes.append(node[1:])
779
        # Should be as long as the nodes we supplied
780
        self.assertEqual(1, len(found_nodes))
781
        # Should have the same content
782
        self.assertEqual(nodes[30], bare_nodes[0])
783
        # Should have read the root node, then one leaf page:
784
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
785
             ('readv',  'index', [(8192, 4096), ], False, None)],
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
786
            transport._activity)
787
788
    def test_iter_key_prefix_1_element_key_None(self):
789
        index = self.make_index()
790
        self.assertRaises(errors.BadIndexKey, list,
791
            index.iter_entries_prefix([(None, )]))
792
793
    def test_iter_key_prefix_wrong_length(self):
794
        index = self.make_index()
795
        self.assertRaises(errors.BadIndexKey, list,
796
            index.iter_entries_prefix([('foo', None)]))
797
        index = self.make_index(key_elements=2)
798
        self.assertRaises(errors.BadIndexKey, list,
799
            index.iter_entries_prefix([('foo', )]))
800
        self.assertRaises(errors.BadIndexKey, list,
801
            index.iter_entries_prefix([('foo', None, None)]))
802
803
    def test_iter_key_prefix_1_key_element_no_refs(self):
804
        index = self.make_index( nodes=[
805
            (('name', ), 'data', ()),
806
            (('ref', ), 'refdata', ())])
807
        self.assertEqual(set([(index, ('name', ), 'data'),
808
            (index, ('ref', ), 'refdata')]),
809
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
810
811
    def test_iter_key_prefix_1_key_element_refs(self):
812
        index = self.make_index(1, nodes=[
813
            (('name', ), 'data', ([('ref', )], )),
814
            (('ref', ), 'refdata', ([], ))])
815
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),),)),
816
            (index, ('ref', ), 'refdata', ((), ))]),
817
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
818
819
    def test_iter_key_prefix_2_key_element_no_refs(self):
820
        index = self.make_index(key_elements=2, nodes=[
821
            (('name', 'fin1'), 'data', ()),
822
            (('name', 'fin2'), 'beta', ()),
823
            (('ref', 'erence'), 'refdata', ())])
824
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
825
            (index, ('ref', 'erence'), 'refdata')]),
826
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
827
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
828
            (index, ('name', 'fin2'), 'beta')]),
829
            set(index.iter_entries_prefix([('name', None)])))
830
831
    def test_iter_key_prefix_2_key_element_refs(self):
832
        index = self.make_index(1, key_elements=2, nodes=[
833
            (('name', 'fin1'), 'data', ([('ref', 'erence')], )),
834
            (('name', 'fin2'), 'beta', ([], )),
835
            (('ref', 'erence'), 'refdata', ([], ))])
836
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
837
            (index, ('ref', 'erence'), 'refdata', ((), ))]),
838
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
839
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
840
            (index, ('name', 'fin2'), 'beta', ((), ))]),
841
            set(index.iter_entries_prefix([('name', None)])))
842
843
844
class TestBTreeNodes(BTreeTestCase):
845
846
    def restore_parser(self):
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
847
        btree_index._btree_serializer = self.saved_parser
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
848
849
    def setUp(self):
850
        BTreeTestCase.setUp(self)
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
851
        self.saved_parser = btree_index._btree_serializer
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
852
        self.addCleanup(self.restore_parser)
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
853
        btree_index._btree_serializer = self.parse_btree
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
854
855
    def test_LeafNode_1_0(self):
856
        node_bytes = ("type=leaf\n"
857
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
858
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
859
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
860
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
861
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
862
        node = btree_index._LeafNode(node_bytes, 1, 0)
863
        # We do direct access, or don't care about order, to leaf nodes most of
864
        # the time, so a dict is useful:
865
        self.assertEqual({
866
            ("0000000000000000000000000000000000000000",): ("value:0", ()),
867
            ("1111111111111111111111111111111111111111",): ("value:1", ()),
868
            ("2222222222222222222222222222222222222222",): ("value:2", ()),
869
            ("3333333333333333333333333333333333333333",): ("value:3", ()),
870
            ("4444444444444444444444444444444444444444",): ("value:4", ()),
871
            }, node.keys)
872
873
    def test_LeafNode_2_2(self):
874
        node_bytes = ("type=leaf\n"
875
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
876
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
877
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
878
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
879
            ""
880
            )
881
        node = btree_index._LeafNode(node_bytes, 2, 2)
882
        # We do direct access, or don't care about order, to leaf nodes most of
883
        # the time, so a dict is useful:
884
        self.assertEqual({
885
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
886
            ('00', '11'): ('value:1',
887
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
888
            ('11', '33'): ('value:3',
889
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
890
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
891
            }, node.keys)
892
893
    def test_InternalNode_1(self):
894
        node_bytes = ("type=internal\n"
895
            "offset=1\n"
896
            "0000000000000000000000000000000000000000\n"
897
            "1111111111111111111111111111111111111111\n"
898
            "2222222222222222222222222222222222222222\n"
899
            "3333333333333333333333333333333333333333\n"
900
            "4444444444444444444444444444444444444444\n"
901
            )
902
        node = btree_index._InternalNode(node_bytes)
903
        # We want to bisect to find the right children from this node, so a
904
        # vector is most useful.
905
        self.assertEqual([
906
            ("0000000000000000000000000000000000000000",),
907
            ("1111111111111111111111111111111111111111",),
908
            ("2222222222222222222222222222222222222222",),
909
            ("3333333333333333333333333333333333333333",),
910
            ("4444444444444444444444444444444444444444",),
911
            ], node.keys)
912
        self.assertEqual(1, node.offset)
913
914
    def test_LeafNode_2_2(self):
915
        node_bytes = ("type=leaf\n"
916
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
917
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
918
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
919
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
920
            ""
921
            )
922
        node = btree_index._LeafNode(node_bytes, 2, 2)
923
        # We do direct access, or don't care about order, to leaf nodes most of
924
        # the time, so a dict is useful:
925
        self.assertEqual({
926
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
927
            ('00', '11'): ('value:1',
928
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
929
            ('11', '33'): ('value:3',
930
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
931
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
932
            }, node.keys)
933
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
934
    def assertFlattened(self, expected, key, value, refs):
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
935
        flat_key, flat_line = self.parse_btree._flatten_node(
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
936
            (None, key, value, refs), bool(refs))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
937
        self.assertEqual('\x00'.join(key), flat_key)
938
        self.assertEqual(expected, flat_line)
939
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
940
    def test__flatten_node(self):
941
        self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
942
        self.assertFlattened('key\0tuple\0\0value str\n',
943
                             ('key', 'tuple'), 'value str', [])
944
        self.assertFlattened('key\0tuple\0triple\0\0value str\n',
945
                             ('key', 'tuple', 'triple'), 'value str', [])
946
        self.assertFlattened('k\0t\0s\0ref\0value str\n',
947
                             ('k', 't', 's'), 'value str', [[('ref',)]])
948
        self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
949
                             ('key', 'tuple'), 'value str', [[('ref', 'key')]])
950
        self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
951
            ('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
952
        self.assertFlattened(
953
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
954
            ('00', '11'), 'value:1',
955
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
956
        self.assertFlattened(
957
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
958
            ('11', '33'), 'value:3',
959
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
960
        self.assertFlattened(
961
            "11\x0044\x00\t11\x00ref00\x00value:4\n",
962
            ('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
963
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
964
965
class TestCompiledBtree(tests.TestCase):
966
967
    def test_exists(self):
968
        # This is just to let the user know if they don't have the feature
969
        # available
970
        self.requireFeature(CompiledBtreeParserFeature)
971
972
973
class TestMultiBisectRight(tests.TestCase):
974
975
    def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
976
        self.assertEqual(offsets,
977
                         btree_index.BTreeGraphIndex._multi_bisect_right(
978
                            search_keys, fixed_keys))
979
980
    def test_after(self):
981
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
982
        self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
983
                                    ['e', 'f', 'g'], ['a', 'b', 'c'])
984
985
    def test_before(self):
986
        self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
987
        self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
988
                                    ['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
989
990
    def test_exact(self):
991
        self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
992
        self.assertMultiBisectRight([(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
993
        self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
994
                                    ['a', 'c'], ['a', 'b', 'c'])
995
996
    def test_inbetween(self):
997
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
998
        self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
999
                                    ['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1000
1001
    def test_mixed(self):
1002
        self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1003
                                     (4, ['g', 'h'])],
1004
                                    ['a', 'b', 'd', 'e', 'g', 'h'],
1005
                                    ['c', 'd', 'f', 'g'])