1
# Copyright (C) 2008-2012, 2016 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
"""Tests for btree indices."""
35
TestCaseWithTransport,
43
load_tests = scenarios.load_tests_apply_scenarios
46
def btreeparser_scenarios():
47
import breezy.bzr._btree_serializer_py as py_module
48
scenarios = [('python', {'parse_btree': py_module})]
49
if compiled_btreeparser_feature.available():
50
scenarios.append(('C',
51
{'parse_btree': compiled_btreeparser_feature.module}))
55
compiled_btreeparser_feature = features.ModuleAvailableFeature(
56
'breezy.bzr._btree_serializer_pyx')
59
class BTreeTestCase(TestCaseWithTransport):
60
# test names here are suffixed by the key length and reference list count
64
super(BTreeTestCase, self).setUp()
65
self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
67
def make_nodes(self, count, key_elements, reference_lists):
68
"""Generate count*key_elements sample nodes."""
70
for prefix_pos in range(key_elements):
72
prefix = (str(prefix_pos) * 40,)
75
for pos in range(count):
76
# TODO: This creates odd keys. When count == 100,000, it
77
# creates a 240 byte key
78
key = prefix + (str(pos) * 40,)
79
value = "value:%s" % pos
81
# generate some references
83
for list_pos in range(reference_lists):
84
# as many keys in each list as its index + the key depth
85
# mod 2 - this generates both 0 length lists and
86
# ones slightly longer than the number of lists.
87
# It also ensures we have non homogeneous lists.
89
for ref_pos in range(list_pos + pos % 2):
91
# refer to a nearby key
92
refs[-1].append(prefix + ("ref" + str(pos - 1) * 40,))
94
# serial of this ref in the ref list
95
refs[-1].append(prefix + ("ref" + str(ref_pos) * 40,))
96
refs[-1] = tuple(refs[-1])
100
keys.append((key, value, refs))
103
def shrink_page_size(self):
104
"""Shrink the default page size so that less fits in a page."""
105
self.overrideAttr(btree_index, '_PAGE_SIZE')
106
btree_index._PAGE_SIZE = 2048
108
def assertEqualApproxCompressed(self, expected, actual, slop=6):
109
"""Check a count of compressed bytes is approximately as expected
111
Relying on compressed length being stable even with fixed inputs is
112
slightly bogus, but zlib is stable enough that this mostly works.
114
if not expected - slop < actual < expected + slop:
115
self.fail("Expected around %d bytes compressed but got %d" %
119
class TestBTreeBuilder(BTreeTestCase):
121
def test_clear_cache(self):
122
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
123
# This is a no-op, but we need the api to be consistent with other
124
# BTreeGraphIndex apis.
125
builder.clear_cache()
127
def test_empty_1_0(self):
128
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
129
# NamedTemporaryFile dies on builder.finish().read(). weird.
130
temp_file = builder.finish()
131
content = temp_file.read()
134
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
138
def test_empty_2_1(self):
139
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
140
# NamedTemporaryFile dies on builder.finish().read(). weird.
141
temp_file = builder.finish()
142
content = temp_file.read()
145
"B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
149
def test_root_leaf_1_0(self):
150
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
151
nodes = self.make_nodes(5, 1, 0)
153
builder.add_node(*node)
154
# NamedTemporaryFile dies on builder.finish().read(). weird.
155
temp_file = builder.finish()
156
content = temp_file.read()
158
self.assertEqual(131, len(content))
160
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
163
node_content = content[73:]
164
node_bytes = zlib.decompress(node_content)
165
expected_node = ("type=leaf\n"
166
"0000000000000000000000000000000000000000\x00\x00value:0\n"
167
"1111111111111111111111111111111111111111\x00\x00value:1\n"
168
"2222222222222222222222222222222222222222\x00\x00value:2\n"
169
"3333333333333333333333333333333333333333\x00\x00value:3\n"
170
"4444444444444444444444444444444444444444\x00\x00value:4\n")
171
self.assertEqual(expected_node, node_bytes)
173
def test_root_leaf_2_2(self):
174
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
175
nodes = self.make_nodes(5, 2, 2)
177
builder.add_node(*node)
178
# NamedTemporaryFile dies on builder.finish().read(). weird.
179
temp_file = builder.finish()
180
content = temp_file.read()
182
self.assertEqual(238, len(content))
184
"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
187
node_content = content[74:]
188
node_bytes = zlib.decompress(node_content)
191
"0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
192
"0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
193
"0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
194
"0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
195
"0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
196
"1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
197
"1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
198
"1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
199
"1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
200
"1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
203
self.assertEqual(expected_node, node_bytes)
205
def test_2_leaves_1_0(self):
206
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
207
nodes = self.make_nodes(400, 1, 0)
209
builder.add_node(*node)
210
# NamedTemporaryFile dies on builder.finish().read(). weird.
211
temp_file = builder.finish()
212
content = temp_file.read()
214
self.assertEqualApproxCompressed(9283, len(content))
216
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
219
root = content[77:4096]
220
leaf1 = content[4096:8192]
221
leaf2 = content[8192:]
222
root_bytes = zlib.decompress(root)
226
) + ("307" * 40) + "\n"
227
self.assertEqual(expected_root, root_bytes)
228
# We already know serialisation works for leaves, check key selection:
229
leaf1_bytes = zlib.decompress(leaf1)
230
sorted_node_keys = sorted(node[0] for node in nodes)
231
node = btree_index._LeafNode(leaf1_bytes, 1, 0)
232
self.assertEqual(231, len(node))
233
self.assertEqual(sorted_node_keys[:231], node.all_keys())
234
leaf2_bytes = zlib.decompress(leaf2)
235
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
236
self.assertEqual(400 - 231, len(node))
237
self.assertEqual(sorted_node_keys[231:], node.all_keys())
239
def test_last_page_rounded_1_layer(self):
240
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
241
nodes = self.make_nodes(10, 1, 0)
243
builder.add_node(*node)
244
# NamedTemporaryFile dies on builder.finish().read(). weird.
245
temp_file = builder.finish()
246
content = temp_file.read()
248
self.assertEqualApproxCompressed(155, len(content))
250
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
253
# Check thelast page is well formed
255
leaf2_bytes = zlib.decompress(leaf2)
256
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
257
self.assertEqual(10, len(node))
258
sorted_node_keys = sorted(node[0] for node in nodes)
259
self.assertEqual(sorted_node_keys, node.all_keys())
261
def test_last_page_not_rounded_2_layer(self):
262
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
263
nodes = self.make_nodes(400, 1, 0)
265
builder.add_node(*node)
266
# NamedTemporaryFile dies on builder.finish().read(). weird.
267
temp_file = builder.finish()
268
content = temp_file.read()
270
self.assertEqualApproxCompressed(9283, len(content))
272
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
275
# Check the last page is well formed
276
leaf2 = content[8192:]
277
leaf2_bytes = zlib.decompress(leaf2)
278
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
279
self.assertEqual(400 - 231, len(node))
280
sorted_node_keys = sorted(node[0] for node in nodes)
281
self.assertEqual(sorted_node_keys[231:], node.all_keys())
283
def test_three_level_tree_details(self):
284
# The left most pointer in the second internal node in a row should
285
# pointer to the second node that the internal node is for, _not_
286
# the first, otherwise the first node overlaps with the last node of
287
# the prior internal node on that row.
288
self.shrink_page_size()
289
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
290
# 40K nodes is enough to create a two internal nodes on the second
291
# level, with a 2K page size
292
nodes = self.make_nodes(20000, 2, 2)
295
builder.add_node(*node)
296
t = transport.get_transport_from_url('trace+' + self.get_url(''))
297
size = t.put_file('index', self.time(builder.finish))
299
index = btree_index.BTreeGraphIndex(t, 'index', size)
300
# Seed the metadata, we're using internal calls now.
302
self.assertEqual(3, len(index._row_lengths),
303
"Not enough rows: %r" % index._row_lengths)
304
self.assertEqual(4, len(index._row_offsets))
305
self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
306
internal_nodes = index._get_internal_nodes([0, 1, 2])
307
root_node = internal_nodes[0]
308
internal_node1 = internal_nodes[1]
309
internal_node2 = internal_nodes[2]
310
# The left most node node2 points at should be one after the right most
311
# node pointed at by node1.
312
self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
313
# The left most key of the second node pointed at by internal_node2
314
# should be its first key. We can check this by looking for its first key
315
# in the second node it points at
316
pos = index._row_offsets[2] + internal_node2.offset + 1
317
leaf = index._get_leaf_nodes([pos])[pos]
318
self.assertTrue(internal_node2.keys[0] in leaf)
320
def test_2_leaves_2_2(self):
321
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
322
nodes = self.make_nodes(100, 2, 2)
324
builder.add_node(*node)
325
# NamedTemporaryFile dies on builder.finish().read(). weird.
326
temp_file = builder.finish()
327
content = temp_file.read()
329
self.assertEqualApproxCompressed(12643, len(content))
331
"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
334
root = content[77:4096]
335
leaf1 = content[4096:8192]
336
leaf2 = content[8192:12288]
337
leaf3 = content[12288:]
338
root_bytes = zlib.decompress(root)
342
+ ("0" * 40) + "\x00" + ("91" * 40) + "\n"
343
+ ("1" * 40) + "\x00" + ("81" * 40) + "\n"
345
self.assertEqual(expected_root, root_bytes)
346
# We assume the other leaf nodes have been written correctly - layering
349
def test_spill_index_stress_1_1(self):
350
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
351
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
352
builder.add_node(*nodes[0])
353
# Test the parts of the index that take up memory are doing so
355
self.assertEqual(1, len(builder._nodes))
356
self.assertIs(None, builder._nodes_by_key)
357
builder.add_node(*nodes[1])
358
self.assertEqual(0, len(builder._nodes))
359
self.assertIs(None, builder._nodes_by_key)
360
self.assertEqual(1, len(builder._backing_indices))
361
self.assertEqual(2, builder._backing_indices[0].key_count())
363
builder.add_node(*nodes[2])
364
self.assertEqual(1, len(builder._nodes))
365
self.assertIs(None, builder._nodes_by_key)
366
# And spills to a second backing index combing all
367
builder.add_node(*nodes[3])
368
self.assertEqual(0, len(builder._nodes))
369
self.assertIs(None, builder._nodes_by_key)
370
self.assertEqual(2, len(builder._backing_indices))
371
self.assertEqual(None, builder._backing_indices[0])
372
self.assertEqual(4, builder._backing_indices[1].key_count())
373
# The next spills to the 2-len slot
374
builder.add_node(*nodes[4])
375
builder.add_node(*nodes[5])
376
self.assertEqual(0, len(builder._nodes))
377
self.assertIs(None, builder._nodes_by_key)
378
self.assertEqual(2, len(builder._backing_indices))
379
self.assertEqual(2, builder._backing_indices[0].key_count())
380
self.assertEqual(4, builder._backing_indices[1].key_count())
381
# Next spill combines
382
builder.add_node(*nodes[6])
383
builder.add_node(*nodes[7])
384
self.assertEqual(3, len(builder._backing_indices))
385
self.assertEqual(None, builder._backing_indices[0])
386
self.assertEqual(None, builder._backing_indices[1])
387
self.assertEqual(8, builder._backing_indices[2].key_count())
388
# And so forth - counting up in binary.
389
builder.add_node(*nodes[8])
390
builder.add_node(*nodes[9])
391
self.assertEqual(3, len(builder._backing_indices))
392
self.assertEqual(2, builder._backing_indices[0].key_count())
393
self.assertEqual(None, builder._backing_indices[1])
394
self.assertEqual(8, builder._backing_indices[2].key_count())
395
builder.add_node(*nodes[10])
396
builder.add_node(*nodes[11])
397
self.assertEqual(3, len(builder._backing_indices))
398
self.assertEqual(None, builder._backing_indices[0])
399
self.assertEqual(4, builder._backing_indices[1].key_count())
400
self.assertEqual(8, builder._backing_indices[2].key_count())
401
builder.add_node(*nodes[12])
402
# Test that memory and disk are both used for query methods; and that
403
# None is skipped over happily.
404
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
405
list(builder.iter_all_entries()))
406
# Two nodes - one memory one disk
407
self.assertEqual({(builder,) + node for node in nodes[11:13]},
408
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
409
self.assertEqual(13, builder.key_count())
410
self.assertEqual({(builder,) + node for node in nodes[11:13]},
411
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
412
builder.add_node(*nodes[13])
413
self.assertEqual(3, len(builder._backing_indices))
414
self.assertEqual(2, builder._backing_indices[0].key_count())
415
self.assertEqual(4, builder._backing_indices[1].key_count())
416
self.assertEqual(8, builder._backing_indices[2].key_count())
417
builder.add_node(*nodes[14])
418
builder.add_node(*nodes[15])
419
self.assertEqual(4, len(builder._backing_indices))
420
self.assertEqual(None, builder._backing_indices[0])
421
self.assertEqual(None, builder._backing_indices[1])
422
self.assertEqual(None, builder._backing_indices[2])
423
self.assertEqual(16, builder._backing_indices[3].key_count())
424
# Now finish, and check we got a correctly ordered tree
425
t = self.get_transport('')
426
size = t.put_file('index', builder.finish())
427
index = btree_index.BTreeGraphIndex(t, 'index', size)
428
nodes = list(index.iter_all_entries())
429
self.assertEqual(sorted(nodes), nodes)
430
self.assertEqual(16, len(nodes))
432
def test_spill_index_stress_1_1_no_combine(self):
433
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
434
builder.set_optimize(for_size=False, combine_backing_indices=False)
435
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
436
builder.add_node(*nodes[0])
437
# Test the parts of the index that take up memory are doing so
439
self.assertEqual(1, len(builder._nodes))
440
self.assertIs(None, builder._nodes_by_key)
441
builder.add_node(*nodes[1])
442
self.assertEqual(0, len(builder._nodes))
443
self.assertIs(None, builder._nodes_by_key)
444
self.assertEqual(1, len(builder._backing_indices))
445
self.assertEqual(2, builder._backing_indices[0].key_count())
447
builder.add_node(*nodes[2])
448
self.assertEqual(1, len(builder._nodes))
449
self.assertIs(None, builder._nodes_by_key)
450
# And spills to a second backing index but doesn't combine
451
builder.add_node(*nodes[3])
452
self.assertEqual(0, len(builder._nodes))
453
self.assertIs(None, builder._nodes_by_key)
454
self.assertEqual(2, len(builder._backing_indices))
455
for backing_index in builder._backing_indices:
456
self.assertEqual(2, backing_index.key_count())
457
# The next spills to the 3rd slot
458
builder.add_node(*nodes[4])
459
builder.add_node(*nodes[5])
460
self.assertEqual(0, len(builder._nodes))
461
self.assertIs(None, builder._nodes_by_key)
462
self.assertEqual(3, len(builder._backing_indices))
463
for backing_index in builder._backing_indices:
464
self.assertEqual(2, backing_index.key_count())
465
# Now spill a few more, and check that we don't combine
466
builder.add_node(*nodes[6])
467
builder.add_node(*nodes[7])
468
builder.add_node(*nodes[8])
469
builder.add_node(*nodes[9])
470
builder.add_node(*nodes[10])
471
builder.add_node(*nodes[11])
472
builder.add_node(*nodes[12])
473
self.assertEqual(6, len(builder._backing_indices))
474
for backing_index in builder._backing_indices:
475
self.assertEqual(2, backing_index.key_count())
476
# Test that memory and disk are both used for query methods; and that
477
# None is skipped over happily.
478
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
479
list(builder.iter_all_entries()))
480
# Two nodes - one memory one disk
481
self.assertEqual({(builder,) + node for node in nodes[11:13]},
482
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
483
self.assertEqual(13, builder.key_count())
484
self.assertEqual({(builder,) + node for node in nodes[11:13]},
485
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
486
builder.add_node(*nodes[13])
487
builder.add_node(*nodes[14])
488
builder.add_node(*nodes[15])
489
self.assertEqual(8, len(builder._backing_indices))
490
for backing_index in builder._backing_indices:
491
self.assertEqual(2, backing_index.key_count())
492
# Now finish, and check we got a correctly ordered tree
493
transport = self.get_transport('')
494
size = transport.put_file('index', builder.finish())
495
index = btree_index.BTreeGraphIndex(transport, 'index', size)
496
nodes = list(index.iter_all_entries())
497
self.assertEqual(sorted(nodes), nodes)
498
self.assertEqual(16, len(nodes))
500
def test_set_optimize(self):
501
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
502
builder.set_optimize(for_size=True)
503
self.assertTrue(builder._optimize_for_size)
504
builder.set_optimize(for_size=False)
505
self.assertFalse(builder._optimize_for_size)
506
# test that we can set combine_backing_indices without effecting
509
builder._optimize_for_size = obj
510
builder.set_optimize(combine_backing_indices=False)
511
self.assertFalse(builder._combine_backing_indices)
512
self.assertIs(obj, builder._optimize_for_size)
513
builder.set_optimize(combine_backing_indices=True)
514
self.assertTrue(builder._combine_backing_indices)
515
self.assertIs(obj, builder._optimize_for_size)
517
def test_spill_index_stress_2_2(self):
518
# test that references and longer keys don't confuse things.
519
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
521
nodes = self.make_nodes(16, 2, 2)
522
builder.add_node(*nodes[0])
523
# Test the parts of the index that take up memory are doing so
525
self.assertEqual(1, len(builder._nodes))
526
self.assertIs(None, builder._nodes_by_key)
527
builder.add_node(*nodes[1])
528
self.assertEqual(0, len(builder._nodes))
529
self.assertIs(None, builder._nodes_by_key)
530
self.assertEqual(1, len(builder._backing_indices))
531
self.assertEqual(2, builder._backing_indices[0].key_count())
533
old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
534
builder.add_node(*nodes[2])
535
self.assertEqual(1, len(builder._nodes))
536
self.assertIsNot(None, builder._nodes_by_key)
537
self.assertNotEqual({}, builder._nodes_by_key)
538
# We should have a new entry
539
self.assertNotEqual(old, builder._nodes_by_key)
540
# And spills to a second backing index combing all
541
builder.add_node(*nodes[3])
542
self.assertEqual(0, len(builder._nodes))
543
self.assertIs(None, builder._nodes_by_key)
544
self.assertEqual(2, len(builder._backing_indices))
545
self.assertEqual(None, builder._backing_indices[0])
546
self.assertEqual(4, builder._backing_indices[1].key_count())
547
# The next spills to the 2-len slot
548
builder.add_node(*nodes[4])
549
builder.add_node(*nodes[5])
550
self.assertEqual(0, len(builder._nodes))
551
self.assertIs(None, builder._nodes_by_key)
552
self.assertEqual(2, len(builder._backing_indices))
553
self.assertEqual(2, builder._backing_indices[0].key_count())
554
self.assertEqual(4, builder._backing_indices[1].key_count())
555
# Next spill combines
556
builder.add_node(*nodes[6])
557
builder.add_node(*nodes[7])
558
self.assertEqual(3, len(builder._backing_indices))
559
self.assertEqual(None, builder._backing_indices[0])
560
self.assertEqual(None, builder._backing_indices[1])
561
self.assertEqual(8, builder._backing_indices[2].key_count())
562
# And so forth - counting up in binary.
563
builder.add_node(*nodes[8])
564
builder.add_node(*nodes[9])
565
self.assertEqual(3, len(builder._backing_indices))
566
self.assertEqual(2, builder._backing_indices[0].key_count())
567
self.assertEqual(None, builder._backing_indices[1])
568
self.assertEqual(8, builder._backing_indices[2].key_count())
569
builder.add_node(*nodes[10])
570
builder.add_node(*nodes[11])
571
self.assertEqual(3, len(builder._backing_indices))
572
self.assertEqual(None, builder._backing_indices[0])
573
self.assertEqual(4, builder._backing_indices[1].key_count())
574
self.assertEqual(8, builder._backing_indices[2].key_count())
575
builder.add_node(*nodes[12])
576
# Test that memory and disk are both used for query methods; and that
577
# None is skipped over happily.
578
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
579
list(builder.iter_all_entries()))
580
# Two nodes - one memory one disk
581
self.assertEqual({(builder,) + node for node in nodes[11:13]},
582
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
583
self.assertEqual(13, builder.key_count())
584
self.assertEqual({(builder,) + node for node in nodes[11:13]},
585
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
586
builder.add_node(*nodes[13])
587
self.assertEqual(3, len(builder._backing_indices))
588
self.assertEqual(2, builder._backing_indices[0].key_count())
589
self.assertEqual(4, builder._backing_indices[1].key_count())
590
self.assertEqual(8, builder._backing_indices[2].key_count())
591
builder.add_node(*nodes[14])
592
builder.add_node(*nodes[15])
593
self.assertEqual(4, len(builder._backing_indices))
594
self.assertEqual(None, builder._backing_indices[0])
595
self.assertEqual(None, builder._backing_indices[1])
596
self.assertEqual(None, builder._backing_indices[2])
597
self.assertEqual(16, builder._backing_indices[3].key_count())
598
# Now finish, and check we got a correctly ordered tree
599
transport = self.get_transport('')
600
size = transport.put_file('index', builder.finish())
601
index = btree_index.BTreeGraphIndex(transport, 'index', size)
602
nodes = list(index.iter_all_entries())
603
self.assertEqual(sorted(nodes), nodes)
604
self.assertEqual(16, len(nodes))
606
def test_spill_index_duplicate_key_caught_on_finish(self):
607
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
608
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
609
builder.add_node(*nodes[0])
610
builder.add_node(*nodes[1])
611
builder.add_node(*nodes[0])
612
self.assertRaises(errors.BadIndexDuplicateKey, builder.finish)
615
class TestBTreeIndex(BTreeTestCase):
617
def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
618
builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
619
key_elements=key_elements)
620
for key, value, references in nodes:
621
builder.add_node(key, value, references)
622
stream = builder.finish()
623
trans = transport.get_transport_from_url('trace+' + self.get_url())
624
size = trans.put_file('index', stream)
625
return btree_index.BTreeGraphIndex(trans, 'index', size)
627
def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
629
builder = btree_index.BTreeBuilder(key_elements=key_elements,
630
reference_lists=ref_lists)
631
builder.add_nodes(nodes)
632
transport = self.get_transport('')
633
# NamedTemporaryFile dies on builder.finish().read(). weird.
634
temp_file = builder.finish()
635
content = temp_file.read()
638
transport.put_bytes('index', (' '*offset)+content)
639
return btree_index.BTreeGraphIndex(transport, 'index', size=size,
642
def test_clear_cache(self):
643
nodes = self.make_nodes(160, 2, 2)
644
index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
645
self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
646
self.assertEqual([1, 4], index._row_lengths)
647
self.assertIsNot(None, index._root_node)
648
internal_node_pre_clear = set(index._internal_node_cache)
649
self.assertTrue(len(index._leaf_node_cache) > 0)
651
# We don't touch _root_node or _internal_node_cache, both should be
652
# small, and can save a round trip or two
653
self.assertIsNot(None, index._root_node)
654
# NOTE: We don't want to affect the _internal_node_cache, as we expect
655
# it will be small, and if we ever do touch this index again, it
656
# will save round-trips. This assertion isn't very strong,
657
# becuase without a 3-level index, we don't have any internal
659
self.assertEqual(internal_node_pre_clear,
660
set(index._internal_node_cache))
661
self.assertEqual(0, len(index._leaf_node_cache))
663
def test_trivial_constructor(self):
664
t = transport.get_transport_from_url('trace+' + self.get_url(''))
665
index = btree_index.BTreeGraphIndex(t, 'index', None)
666
# Checks the page size at load, but that isn't logged yet.
667
self.assertEqual([], t._activity)
669
def test_with_size_constructor(self):
670
t = transport.get_transport_from_url('trace+' + self.get_url(''))
671
index = btree_index.BTreeGraphIndex(t, 'index', 1)
672
# Checks the page size at load, but that isn't logged yet.
673
self.assertEqual([], t._activity)
675
def test_empty_key_count_no_size(self):
676
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
677
t = transport.get_transport_from_url('trace+' + self.get_url(''))
678
t.put_file('index', builder.finish())
679
index = btree_index.BTreeGraphIndex(t, 'index', None)
681
self.assertEqual([], t._activity)
682
self.assertEqual(0, index.key_count())
683
# The entire index should have been requested (as we generally have the
684
# size available, and doing many small readvs is inappropriate).
685
# We can't tell how much was actually read here, but - check the code.
686
self.assertEqual([('get', 'index')], t._activity)
688
def test_empty_key_count(self):
689
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
690
t = transport.get_transport_from_url('trace+' + self.get_url(''))
691
size = t.put_file('index', builder.finish())
692
self.assertEqual(72, size)
693
index = btree_index.BTreeGraphIndex(t, 'index', size)
695
self.assertEqual([], t._activity)
696
self.assertEqual(0, index.key_count())
697
# The entire index should have been read, as 4K > size
698
self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
701
def test_non_empty_key_count_2_2(self):
702
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
703
nodes = self.make_nodes(35, 2, 2)
705
builder.add_node(*node)
706
t = transport.get_transport_from_url('trace+' + self.get_url(''))
707
size = t.put_file('index', builder.finish())
708
index = btree_index.BTreeGraphIndex(t, 'index', size)
710
self.assertEqual([], t._activity)
711
self.assertEqual(70, index.key_count())
712
# The entire index should have been read, as it is one page long.
713
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
715
self.assertEqualApproxCompressed(1173, size)
717
def test_with_offset_no_size(self):
718
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
720
nodes=self.make_nodes(200, 1, 1))
721
index._size = None # throw away the size info
722
self.assertEqual(200, index.key_count())
724
def test_with_small_offset(self):
725
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
727
nodes=self.make_nodes(200, 1, 1))
728
self.assertEqual(200, index.key_count())
730
def test_with_large_offset(self):
731
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
733
nodes=self.make_nodes(200, 1, 1))
734
self.assertEqual(200, index.key_count())
736
def test__read_nodes_no_size_one_page_reads_once(self):
737
self.make_index(nodes=[(('key',), 'value', ())])
738
trans = transport.get_transport_from_url('trace+' + self.get_url())
739
index = btree_index.BTreeGraphIndex(trans, 'index', None)
740
del trans._activity[:]
741
nodes = dict(index._read_nodes([0]))
742
self.assertEqual({0}, set(nodes))
744
self.assertEqual([('key',)], node.all_keys())
745
self.assertEqual([('get', 'index')], trans._activity)
747
def test__read_nodes_no_size_multiple_pages(self):
748
index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
750
num_pages = index._row_offsets[-1]
751
# Reopen with a traced transport and no size
752
trans = transport.get_transport_from_url('trace+' + self.get_url())
753
index = btree_index.BTreeGraphIndex(trans, 'index', None)
754
del trans._activity[:]
755
nodes = dict(index._read_nodes([0]))
756
self.assertEqual(list(range(num_pages)), sorted(nodes))
758
def test_2_levels_key_count_2_2(self):
759
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
760
nodes = self.make_nodes(160, 2, 2)
762
builder.add_node(*node)
763
t = transport.get_transport_from_url('trace+' + self.get_url(''))
764
size = t.put_file('index', builder.finish())
765
self.assertEqualApproxCompressed(17692, size)
766
index = btree_index.BTreeGraphIndex(t, 'index', size)
768
self.assertEqual([], t._activity)
769
self.assertEqual(320, index.key_count())
770
# The entire index should not have been read.
771
self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
774
def test_validate_one_page(self):
775
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
776
nodes = self.make_nodes(45, 2, 2)
778
builder.add_node(*node)
779
t = transport.get_transport_from_url('trace+' + self.get_url(''))
780
size = t.put_file('index', builder.finish())
781
index = btree_index.BTreeGraphIndex(t, 'index', size)
783
self.assertEqual([], t._activity)
785
# The entire index should have been read linearly.
786
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
788
self.assertEqualApproxCompressed(1488, size)
790
def test_validate_two_pages(self):
791
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
792
nodes = self.make_nodes(80, 2, 2)
794
builder.add_node(*node)
795
t = transport.get_transport_from_url('trace+' + self.get_url(''))
796
size = t.put_file('index', builder.finish())
797
# Root page, 2 leaf pages
798
self.assertEqualApproxCompressed(9339, size)
799
index = btree_index.BTreeGraphIndex(t, 'index', size)
801
self.assertEqual([], t._activity)
803
rem = size - 8192 # Number of remaining bytes after second block
804
# The entire index should have been read linearly.
806
[('readv', 'index', [(0, 4096)], False, None),
807
('readv', 'index', [(4096, 4096), (8192, rem)], False, None)],
809
# XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
810
# node and make validate find them.
812
def test_eq_ne(self):
813
# two indices are equal when constructed with the same parameters:
814
t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
815
t2 = self.get_transport()
817
btree_index.BTreeGraphIndex(t1, 'index', None) ==
818
btree_index.BTreeGraphIndex(t1, 'index', None))
820
btree_index.BTreeGraphIndex(t1, 'index', 20) ==
821
btree_index.BTreeGraphIndex(t1, 'index', 20))
823
btree_index.BTreeGraphIndex(t1, 'index', 20) ==
824
btree_index.BTreeGraphIndex(t2, 'index', 20))
826
btree_index.BTreeGraphIndex(t1, 'inde1', 20) ==
827
btree_index.BTreeGraphIndex(t1, 'inde2', 20))
829
btree_index.BTreeGraphIndex(t1, 'index', 10) ==
830
btree_index.BTreeGraphIndex(t1, 'index', 20))
832
btree_index.BTreeGraphIndex(t1, 'index', None) !=
833
btree_index.BTreeGraphIndex(t1, 'index', None))
835
btree_index.BTreeGraphIndex(t1, 'index', 20) !=
836
btree_index.BTreeGraphIndex(t1, 'index', 20))
838
btree_index.BTreeGraphIndex(t1, 'index', 20) !=
839
btree_index.BTreeGraphIndex(t2, 'index', 20))
841
btree_index.BTreeGraphIndex(t1, 'inde1', 20) !=
842
btree_index.BTreeGraphIndex(t1, 'inde2', 20))
844
btree_index.BTreeGraphIndex(t1, 'index', 10) !=
845
btree_index.BTreeGraphIndex(t1, 'index', 20))
847
def test_key_too_big(self):
848
# the size that matters here is the _compressed_ size of the key, so we can't
849
# do a simple character repeat.
850
bigKey = ''.join(map(repr, range(btree_index._PAGE_SIZE)))
851
self.assertRaises(errors.BadIndexKey,
853
nodes=[((bigKey,), 'value', ())])
855
def test_iter_all_only_root_no_size(self):
856
self.make_index(nodes=[(('key',), 'value', ())])
857
t = transport.get_transport_from_url('trace+' + self.get_url(''))
858
index = btree_index.BTreeGraphIndex(t, 'index', None)
860
self.assertEqual([(('key',), 'value')],
861
[x[1:] for x in index.iter_all_entries()])
862
self.assertEqual([('get', 'index')], t._activity)
864
def test_iter_all_entries_reads(self):
865
# iterating all entries reads the header, then does a linear
867
self.shrink_page_size()
868
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
869
# 20k nodes is enough to create a two internal nodes on the second
870
# level, with a 2K page size
871
nodes = self.make_nodes(10000, 2, 2)
873
builder.add_node(*node)
874
t = transport.get_transport_from_url('trace+' + self.get_url(''))
875
size = t.put_file('index', builder.finish())
876
page_size = btree_index._PAGE_SIZE
878
index = btree_index.BTreeGraphIndex(t, 'index', size)
880
self.assertEqual([], t._activity)
881
found_nodes = self.time(list, index.iter_all_entries())
883
for node in found_nodes:
884
self.assertTrue(node[0] is index)
885
bare_nodes.append(node[1:])
886
self.assertEqual(3, len(index._row_lengths),
887
"Not enough rows: %r" % index._row_lengths)
888
# Should be as long as the nodes we supplied
889
self.assertEqual(20000, len(found_nodes))
890
# Should have the same content
891
self.assertEqual(set(nodes), set(bare_nodes))
892
# Should have done linear scan IO up the index, ignoring
893
# the internal nodes:
894
# The entire index should have been read
895
total_pages = sum(index._row_lengths)
896
self.assertEqual(total_pages, index._row_offsets[-1])
897
self.assertEqualApproxCompressed(1303220, size)
898
# The start of the leaves
899
first_byte = index._row_offsets[-2] * page_size
901
for offset in range(first_byte, size, page_size):
902
readv_request.append((offset, page_size))
903
# The last page is truncated
904
readv_request[-1] = (readv_request[-1][0], size % page_size)
905
expected = [('readv', 'index', [(0, page_size)], False, None),
906
('readv', 'index', readv_request, False, None)]
907
if expected != t._activity:
908
self.assertEqualDiff(pprint.pformat(expected),
909
pprint.pformat(t._activity))
911
def _test_iter_entries_references_resolved(self):
912
index = self.make_index(1, nodes=[
913
(('name', ), 'data', ([('ref', ), ('ref', )], )),
914
(('ref', ), 'refdata', ([], ))])
915
self.assertEqual({(index, ('name', ), 'data', ((('ref',),('ref',)),)),
916
(index, ('ref', ), 'refdata', ((), ))},
917
set(index.iter_entries([('name',), ('ref',)])))
919
def test_iter_entries_references_2_refs_resolved(self):
920
# iterating some entries reads just the pages needed. For now, to
921
# get it working and start measuring, only 4K pages are read.
922
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
923
# 80 nodes is enough to create a two-level index.
924
nodes = self.make_nodes(160, 2, 2)
926
builder.add_node(*node)
927
t = transport.get_transport_from_url('trace+' + self.get_url(''))
928
size = t.put_file('index', builder.finish())
930
index = btree_index.BTreeGraphIndex(t, 'index', size)
932
self.assertEqual([], t._activity)
934
found_nodes = list(index.iter_entries([nodes[30][0]]))
936
for node in found_nodes:
937
self.assertTrue(node[0] is index)
938
bare_nodes.append(node[1:])
939
# Should be as long as the nodes we supplied
940
self.assertEqual(1, len(found_nodes))
941
# Should have the same content
942
self.assertEqual(nodes[30], bare_nodes[0])
943
# Should have read the root node, then one leaf page:
944
self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
945
('readv', 'index', [(8192, 4096), ], False, None)],
948
def test_iter_key_prefix_1_element_key_None(self):
949
index = self.make_index()
950
self.assertRaises(errors.BadIndexKey, list,
951
index.iter_entries_prefix([(None, )]))
953
def test_iter_key_prefix_wrong_length(self):
954
index = self.make_index()
955
self.assertRaises(errors.BadIndexKey, list,
956
index.iter_entries_prefix([('foo', None)]))
957
index = self.make_index(key_elements=2)
958
self.assertRaises(errors.BadIndexKey, list,
959
index.iter_entries_prefix([('foo', )]))
960
self.assertRaises(errors.BadIndexKey, list,
961
index.iter_entries_prefix([('foo', None, None)]))
963
def test_iter_key_prefix_1_key_element_no_refs(self):
964
index = self.make_index( nodes=[
965
(('name', ), 'data', ()),
966
(('ref', ), 'refdata', ())])
967
self.assertEqual({(index, ('name', ), 'data'),
968
(index, ('ref', ), 'refdata')},
969
set(index.iter_entries_prefix([('name', ), ('ref', )])))
971
def test_iter_key_prefix_1_key_element_refs(self):
972
index = self.make_index(1, nodes=[
973
(('name', ), 'data', ([('ref', )], )),
974
(('ref', ), 'refdata', ([], ))])
975
self.assertEqual({(index, ('name', ), 'data', ((('ref',),),)),
976
(index, ('ref', ), 'refdata', ((), ))},
977
set(index.iter_entries_prefix([('name', ), ('ref', )])))
979
def test_iter_key_prefix_2_key_element_no_refs(self):
980
index = self.make_index(key_elements=2, nodes=[
981
(('name', 'fin1'), 'data', ()),
982
(('name', 'fin2'), 'beta', ()),
983
(('ref', 'erence'), 'refdata', ())])
984
self.assertEqual({(index, ('name', 'fin1'), 'data'),
985
(index, ('ref', 'erence'), 'refdata')},
986
set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
987
self.assertEqual({(index, ('name', 'fin1'), 'data'),
988
(index, ('name', 'fin2'), 'beta')},
989
set(index.iter_entries_prefix([('name', None)])))
991
def test_iter_key_prefix_2_key_element_refs(self):
992
index = self.make_index(1, key_elements=2, nodes=[
993
(('name', 'fin1'), 'data', ([('ref', 'erence')], )),
994
(('name', 'fin2'), 'beta', ([], )),
995
(('ref', 'erence'), 'refdata', ([], ))])
996
self.assertEqual({(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
997
(index, ('ref', 'erence'), 'refdata', ((), ))},
998
set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
999
self.assertEqual({(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
1000
(index, ('name', 'fin2'), 'beta', ((), ))},
1001
set(index.iter_entries_prefix([('name', None)])))
1003
# XXX: external_references tests are duplicated in test_index. We
1004
# probably should have per_graph_index tests...
1005
def test_external_references_no_refs(self):
1006
index = self.make_index(ref_lists=0, nodes=[])
1007
self.assertRaises(ValueError, index.external_references, 0)
1009
def test_external_references_no_results(self):
1010
index = self.make_index(ref_lists=1, nodes=[
1011
(('key',), 'value', ([],))])
1012
self.assertEqual(set(), index.external_references(0))
1014
def test_external_references_missing_ref(self):
1015
missing_key = ('missing',)
1016
index = self.make_index(ref_lists=1, nodes=[
1017
(('key',), 'value', ([missing_key],))])
1018
self.assertEqual({missing_key}, index.external_references(0))
1020
def test_external_references_multiple_ref_lists(self):
1021
missing_key = ('missing',)
1022
index = self.make_index(ref_lists=2, nodes=[
1023
(('key',), 'value', ([], [missing_key]))])
1024
self.assertEqual(set([]), index.external_references(0))
1025
self.assertEqual({missing_key}, index.external_references(1))
1027
def test_external_references_two_records(self):
1028
index = self.make_index(ref_lists=1, nodes=[
1029
(('key-1',), 'value', ([('key-2',)],)),
1030
(('key-2',), 'value', ([],)),
1032
self.assertEqual(set([]), index.external_references(0))
1034
def test__find_ancestors_one_page(self):
1037
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1038
(key1, 'value', ([key2],)),
1039
(key2, 'value', ([],)),
1042
missing_keys = set()
1043
search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
1044
self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1045
self.assertEqual(set(), missing_keys)
1046
self.assertEqual(set(), search_keys)
1048
def test__find_ancestors_one_page_w_missing(self):
1052
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1053
(key1, 'value', ([key2],)),
1054
(key2, 'value', ([],)),
1057
missing_keys = set()
1058
search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1060
self.assertEqual({key2: ()}, parent_map)
1061
# we know that key3 is missing because we read the page that it would
1063
self.assertEqual({key3}, missing_keys)
1064
self.assertEqual(set(), search_keys)
1066
def test__find_ancestors_one_parent_missing(self):
1070
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1071
(key1, 'value', ([key2],)),
1072
(key2, 'value', ([key3],)),
1075
missing_keys = set()
1076
search_keys = index._find_ancestors([key1], 0, parent_map,
1078
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1079
self.assertEqual(set(), missing_keys)
1080
# all we know is that key3 wasn't present on the page we were reading
1081
# but if you look, the last key is key2 which comes before key3, so we
1082
# don't know whether key3 would land on this page or not.
1083
self.assertEqual({key3}, search_keys)
1084
search_keys = index._find_ancestors(search_keys, 0, parent_map,
1086
# passing it back in, we are sure it is 'missing'
1087
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1088
self.assertEqual({key3}, missing_keys)
1089
self.assertEqual(set([]), search_keys)
1091
def test__find_ancestors_dont_search_known(self):
1095
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1096
(key1, 'value', ([key2],)),
1097
(key2, 'value', ([key3],)),
1098
(key3, 'value', ([],)),
1100
# We already know about key2, so we won't try to search for key3
1101
parent_map = {key2: (key3,)}
1102
missing_keys = set()
1103
search_keys = index._find_ancestors([key1], 0, parent_map,
1105
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1106
self.assertEqual(set(), missing_keys)
1107
self.assertEqual(set(), search_keys)
1109
def test__find_ancestors_multiple_pages(self):
1110
# We need to use enough keys that we actually cause a split
1111
start_time = 1249671539
1112
email = "joebob@example.com"
1116
for i in range(400):
1117
rev_id = '%s-%s-%s' % (email,
1118
osutils.compact_date(start_time + i),
1119
osutils.rand_chars(16))
1121
nodes.append((rev_key, 'value', ref_lists))
1122
# We have a ref 'list' of length 1, with a list of parents, with 1
1123
# parent which is a key
1124
ref_lists = ((rev_key,),)
1125
rev_keys.append(rev_key)
1126
index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1127
self.assertEqual(400, index.key_count())
1128
self.assertEqual(3, len(index._row_offsets))
1129
nodes = dict(index._read_nodes([1, 2]))
1132
min_l2_key = l2.min_key
1133
max_l1_key = l1.max_key
1134
self.assertTrue(max_l1_key < min_l2_key)
1135
parents_min_l2_key = l2[min_l2_key][1][0]
1136
self.assertEqual((l1.max_key,), parents_min_l2_key)
1137
# Now, whatever key we select that would fall on the second page,
1138
# should give us all the parents until the page break
1139
key_idx = rev_keys.index(min_l2_key)
1140
next_key = rev_keys[key_idx+1]
1141
# So now when we get the parent map, we should get the key we are
1142
# looking for, min_l2_key, and then a reference to go look for the
1143
# parent of that key
1145
missing_keys = set()
1146
search_keys = index._find_ancestors([next_key], 0, parent_map,
1148
self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1149
self.assertEqual(set(), missing_keys)
1150
self.assertEqual({max_l1_key}, search_keys)
1152
search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1154
self.assertEqual(l1.all_keys(), sorted(parent_map))
1155
self.assertEqual(set(), missing_keys)
1156
self.assertEqual(set(), search_keys)
1158
def test__find_ancestors_empty_index(self):
1159
index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1161
missing_keys = set()
1162
search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1164
self.assertEqual(set(), search_keys)
1165
self.assertEqual({}, parent_map)
1166
self.assertEqual({('one',), ('two',)}, missing_keys)
1168
def test_supports_unlimited_cache(self):
1169
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
1170
# We need enough nodes to cause a page split (so we have both an
1171
# internal node and a couple leaf nodes. 500 seems to be enough.)
1172
nodes = self.make_nodes(500, 1, 0)
1174
builder.add_node(*node)
1175
stream = builder.finish()
1176
trans = self.get_transport()
1177
size = trans.put_file('index', stream)
1178
index = btree_index.BTreeGraphIndex(trans, 'index', size)
1179
self.assertEqual(500, index.key_count())
1180
# We have an internal node
1181
self.assertEqual(2, len(index._row_lengths))
1182
# We have at least 2 leaf nodes
1183
self.assertTrue(index._row_lengths[-1] >= 2)
1184
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1185
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1186
index._leaf_node_cache._max_cache)
1187
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1188
self.assertEqual(100, index._internal_node_cache._max_cache)
1189
# No change if unlimited_cache=False is passed
1190
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1191
unlimited_cache=False)
1192
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1193
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1194
index._leaf_node_cache._max_cache)
1195
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1196
self.assertEqual(100, index._internal_node_cache._max_cache)
1197
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1198
unlimited_cache=True)
1199
self.assertIsInstance(index._leaf_node_cache, dict)
1200
self.assertIs(type(index._internal_node_cache), dict)
1201
# Exercise the lookup code
1202
entries = set(index.iter_entries([n[0] for n in nodes]))
1203
self.assertEqual(500, len(entries))
1206
class TestBTreeNodes(BTreeTestCase):
1208
scenarios = btreeparser_scenarios()
1211
super(TestBTreeNodes, self).setUp()
1212
self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
1214
def test_LeafNode_1_0(self):
1215
node_bytes = ("type=leaf\n"
1216
"0000000000000000000000000000000000000000\x00\x00value:0\n"
1217
"1111111111111111111111111111111111111111\x00\x00value:1\n"
1218
"2222222222222222222222222222222222222222\x00\x00value:2\n"
1219
"3333333333333333333333333333333333333333\x00\x00value:3\n"
1220
"4444444444444444444444444444444444444444\x00\x00value:4\n")
1221
node = btree_index._LeafNode(node_bytes, 1, 0)
1222
# We do direct access, or don't care about order, to leaf nodes most of
1223
# the time, so a dict is useful:
1225
("0000000000000000000000000000000000000000",): ("value:0", ()),
1226
("1111111111111111111111111111111111111111",): ("value:1", ()),
1227
("2222222222222222222222222222222222222222",): ("value:2", ()),
1228
("3333333333333333333333333333333333333333",): ("value:3", ()),
1229
("4444444444444444444444444444444444444444",): ("value:4", ()),
1230
}, dict(node.all_items()))
1232
def test_LeafNode_2_2(self):
1233
node_bytes = ("type=leaf\n"
1234
"00\x0000\x00\t00\x00ref00\x00value:0\n"
1235
"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1236
"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1237
"11\x0044\x00\t11\x00ref00\x00value:4\n"
1240
node = btree_index._LeafNode(node_bytes, 2, 2)
1241
# We do direct access, or don't care about order, to leaf nodes most of
1242
# the time, so a dict is useful:
1244
('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1245
('00', '11'): ('value:1',
1246
((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1247
('11', '33'): ('value:3',
1248
((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1249
('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1250
}, dict(node.all_items()))
1252
def test_InternalNode_1(self):
1253
node_bytes = ("type=internal\n"
1255
"0000000000000000000000000000000000000000\n"
1256
"1111111111111111111111111111111111111111\n"
1257
"2222222222222222222222222222222222222222\n"
1258
"3333333333333333333333333333333333333333\n"
1259
"4444444444444444444444444444444444444444\n"
1261
node = btree_index._InternalNode(node_bytes)
1262
# We want to bisect to find the right children from this node, so a
1263
# vector is most useful.
1265
("0000000000000000000000000000000000000000",),
1266
("1111111111111111111111111111111111111111",),
1267
("2222222222222222222222222222222222222222",),
1268
("3333333333333333333333333333333333333333",),
1269
("4444444444444444444444444444444444444444",),
1271
self.assertEqual(1, node.offset)
1273
def test_LeafNode_2_2(self):
1274
node_bytes = ("type=leaf\n"
1275
"00\x0000\x00\t00\x00ref00\x00value:0\n"
1276
"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1277
"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1278
"11\x0044\x00\t11\x00ref00\x00value:4\n"
1281
node = btree_index._LeafNode(node_bytes, 2, 2)
1282
# We do direct access, or don't care about order, to leaf nodes most of
1283
# the time, so a dict is useful:
1285
('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1286
('00', '11'): ('value:1',
1287
((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1288
('11', '33'): ('value:3',
1289
((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1290
('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1291
}, dict(node.all_items()))
1293
def assertFlattened(self, expected, key, value, refs):
1294
flat_key, flat_line = self.parse_btree._flatten_node(
1295
(None, key, value, refs), bool(refs))
1296
self.assertEqual('\x00'.join(key), flat_key)
1297
self.assertEqual(expected, flat_line)
1299
def test__flatten_node(self):
1300
self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
1301
self.assertFlattened('key\0tuple\0\0value str\n',
1302
('key', 'tuple'), 'value str', [])
1303
self.assertFlattened('key\0tuple\0triple\0\0value str\n',
1304
('key', 'tuple', 'triple'), 'value str', [])
1305
self.assertFlattened('k\0t\0s\0ref\0value str\n',
1306
('k', 't', 's'), 'value str', [[('ref',)]])
1307
self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
1308
('key', 'tuple'), 'value str', [[('ref', 'key')]])
1309
self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
1310
('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
1311
self.assertFlattened(
1312
"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1313
('00', '11'), 'value:1',
1314
((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
1315
self.assertFlattened(
1316
"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1317
('11', '33'), 'value:3',
1318
((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
1319
self.assertFlattened(
1320
"11\x0044\x00\t11\x00ref00\x00value:4\n",
1321
('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
1324
class TestCompiledBtree(tests.TestCase):
1326
def test_exists(self):
1327
# This is just to let the user know if they don't have the feature
1329
self.requireFeature(compiled_btreeparser_feature)
1332
class TestMultiBisectRight(tests.TestCase):
1334
def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1335
self.assertEqual(offsets,
1336
btree_index.BTreeGraphIndex._multi_bisect_right(
1337
search_keys, fixed_keys))
1339
def test_after(self):
1340
self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1341
self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1342
['e', 'f', 'g'], ['a', 'b', 'c'])
1344
def test_before(self):
1345
self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1346
self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1347
['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1349
def test_exact(self):
1350
self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1351
self.assertMultiBisectRight([(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1352
self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1353
['a', 'c'], ['a', 'b', 'c'])
1355
def test_inbetween(self):
1356
self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1357
self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1358
['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1360
def test_mixed(self):
1361
self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1363
['a', 'b', 'd', 'e', 'g', 'h'],
1364
['c', 'd', 'f', 'g'])
1367
class TestExpandOffsets(tests.TestCase):
1369
def make_index(self, size, recommended_pages=None):
1370
"""Make an index with a generic size.
1372
This doesn't actually create anything on disk, it just primes a
1373
BTreeGraphIndex with the recommended information.
1375
index = btree_index.BTreeGraphIndex(
1376
transport.get_transport_from_url('memory:///'),
1377
'test-index', size=size)
1378
if recommended_pages is not None:
1379
index._recommended_pages = recommended_pages
1382
def set_cached_offsets(self, index, cached_offsets):
1383
"""Monkeypatch to give a canned answer for _get_offsets_for...()."""
1384
def _get_offsets_to_cached_pages():
1385
cached = set(cached_offsets)
1387
index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
1389
def prepare_index(self, index, node_ref_lists, key_length, key_count,
1390
row_lengths, cached_offsets):
1391
"""Setup the BTreeGraphIndex with some pre-canned information."""
1392
index.node_ref_lists = node_ref_lists
1393
index._key_length = key_length
1394
index._key_count = key_count
1395
index._row_lengths = row_lengths
1396
index._compute_row_offsets()
1397
index._root_node = btree_index._InternalNode('internal\noffset=0\n')
1398
self.set_cached_offsets(index, cached_offsets)
1400
def make_100_node_index(self):
1401
index = self.make_index(4096*100, 6)
1402
# Consider we've already made a single request at the middle
1403
self.prepare_index(index, node_ref_lists=0, key_length=1,
1404
key_count=1000, row_lengths=[1, 99],
1405
cached_offsets=[0, 50])
1408
def make_1000_node_index(self):
1409
index = self.make_index(4096*1000, 6)
1410
# Pretend we've already made a single request in the middle
1411
self.prepare_index(index, node_ref_lists=0, key_length=1,
1412
key_count=90000, row_lengths=[1, 9, 990],
1413
cached_offsets=[0, 5, 500])
1416
def assertNumPages(self, expected_pages, index, size):
1418
self.assertEqual(expected_pages, index._compute_total_pages_in_index())
1420
def assertExpandOffsets(self, expected, index, offsets):
1421
self.assertEqual(expected, index._expand_offsets(offsets),
1422
'We did not get the expected value after expanding'
1425
def test_default_recommended_pages(self):
1426
index = self.make_index(None)
1427
# local transport recommends 4096 byte reads, which is 1 page
1428
self.assertEqual(1, index._recommended_pages)
1430
def test__compute_total_pages_in_index(self):
1431
index = self.make_index(None)
1432
self.assertNumPages(1, index, 1024)
1433
self.assertNumPages(1, index, 4095)
1434
self.assertNumPages(1, index, 4096)
1435
self.assertNumPages(2, index, 4097)
1436
self.assertNumPages(2, index, 8192)
1437
self.assertNumPages(76, index, 4096*75 + 10)
1439
def test__find_layer_start_and_stop(self):
1440
index = self.make_1000_node_index()
1441
self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1442
self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1443
self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1444
self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1445
self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1446
self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1448
def test_unknown_size(self):
1449
# We should not expand if we don't know the file size
1450
index = self.make_index(None, 10)
1451
self.assertExpandOffsets([0], index, [0])
1452
self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
1454
def test_more_than_recommended(self):
1455
index = self.make_index(4096*100, 2)
1456
self.assertExpandOffsets([1, 10], index, [1, 10])
1457
self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
1459
def test_read_all_from_root(self):
1460
index = self.make_index(4096*10, 20)
1461
self.assertExpandOffsets(list(range(10)), index, [0])
1463
def test_read_all_when_cached(self):
1464
# We've read enough that we can grab all the rest in a single request
1465
index = self.make_index(4096*10, 5)
1466
self.prepare_index(index, node_ref_lists=0, key_length=1,
1467
key_count=1000, row_lengths=[1, 9],
1468
cached_offsets=[0, 1, 2, 5, 6])
1469
# It should fill the remaining nodes, regardless of the one requested
1470
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1471
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1472
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
1474
def test_no_root_node(self):
1475
index = self.make_index(4096*10, 5)
1476
self.assertExpandOffsets([0], index, [0])
1478
def test_include_neighbors(self):
1479
index = self.make_100_node_index()
1480
# We expand in both directions, until we have at least 'recommended'
1482
self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1483
self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
1484
# If we hit an 'edge' we continue in the other direction
1485
self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1486
self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
1488
# Requesting many nodes will expand all locations equally
1489
self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1490
self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
1493
def test_stop_at_cached(self):
1494
index = self.make_100_node_index()
1495
self.set_cached_offsets(index, [0, 10, 19])
1496
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1497
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1498
self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1499
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1500
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1501
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
1503
def test_cannot_fully_expand(self):
1504
index = self.make_100_node_index()
1505
self.set_cached_offsets(index, [0, 10, 12])
1506
# We don't go into an endless loop if we are bound by cached nodes
1507
self.assertExpandOffsets([11], index, [11])
1509
def test_overlap(self):
1510
index = self.make_100_node_index()
1511
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1512
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
1514
def test_stay_within_layer(self):
1515
index = self.make_1000_node_index()
1516
# When expanding a request, we won't read nodes from the next layer
1517
self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1518
self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1519
self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1520
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1521
self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
1523
self.set_cached_offsets(index, [0, 4, 12])
1524
self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1525
self.assertExpandOffsets([10, 11], index, [11])
1527
def test_small_requests_unexpanded(self):
1528
index = self.make_100_node_index()
1529
self.set_cached_offsets(index, [0])
1530
self.assertExpandOffsets([1], index, [1])
1531
self.assertExpandOffsets([50], index, [50])
1532
# If we request more than one node, then we'll expand
1533
self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
1535
# The first pass does not expand
1536
index = self.make_1000_node_index()
1537
self.set_cached_offsets(index, [0])
1538
self.assertExpandOffsets([1], index, [1])
1539
self.set_cached_offsets(index, [0, 1])
1540
self.assertExpandOffsets([100], index, [100])
1541
self.set_cached_offsets(index, [0, 1, 100])
1542
# But after the first depth, we will expand
1543
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1544
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1545
self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1546
self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,