1
# Copyright (C) 2008-2012, 2016 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
"""Tests for btree indices."""
35
TestCaseWithTransport,
43
load_tests = scenarios.load_tests_apply_scenarios
46
def btreeparser_scenarios():
47
import breezy.bzr._btree_serializer_py as py_module
48
scenarios = [('python', {'parse_btree': py_module})]
49
if compiled_btreeparser_feature.available():
50
scenarios.append(('C',
51
{'parse_btree': compiled_btreeparser_feature.module}))
55
compiled_btreeparser_feature = features.ModuleAvailableFeature(
56
'breezy.bzr._btree_serializer_pyx')
59
class BTreeTestCase(TestCaseWithTransport):
60
# test names here are suffixed by the key length and reference list count
64
super(BTreeTestCase, self).setUp()
65
self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
67
def make_nodes(self, count, key_elements, reference_lists):
68
"""Generate count*key_elements sample nodes."""
69
def _pos_to_key(pos, lead=b""):
70
return (lead + (b"%d" % pos) * 40,)
72
for prefix_pos in range(key_elements):
74
prefix = _pos_to_key(prefix_pos)
77
for pos in range(count):
78
# TODO: This creates odd keys. When count == 100,000, it
79
# creates a 240 byte key
80
key = prefix + _pos_to_key(pos)
81
value = b"value:%d" % pos
83
# generate some references
85
for list_pos in range(reference_lists):
86
# as many keys in each list as its index + the key depth
87
# mod 2 - this generates both 0 length lists and
88
# ones slightly longer than the number of lists.
89
# It also ensures we have non homogeneous lists.
91
for ref_pos in range(list_pos + pos % 2):
93
# refer to a nearby key
94
refs[-1].append(prefix + _pos_to_key(pos - 1, b"ref"))
96
# serial of this ref in the ref list
97
refs[-1].append(prefix + _pos_to_key(ref_pos, b"ref"))
98
refs[-1] = tuple(refs[-1])
102
keys.append((key, value, refs))
105
def shrink_page_size(self):
106
"""Shrink the default page size so that less fits in a page."""
107
self.overrideAttr(btree_index, '_PAGE_SIZE')
108
btree_index._PAGE_SIZE = 2048
110
def assertEqualApproxCompressed(self, expected, actual, slop=6):
111
"""Check a count of compressed bytes is approximately as expected
113
Relying on compressed length being stable even with fixed inputs is
114
slightly bogus, but zlib is stable enough that this mostly works.
116
if not expected - slop < actual < expected + slop:
117
self.fail("Expected around %d bytes compressed but got %d" %
121
class TestBTreeBuilder(BTreeTestCase):
123
def test_clear_cache(self):
124
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
125
# This is a no-op, but we need the api to be consistent with other
126
# BTreeGraphIndex apis.
127
builder.clear_cache()
129
def test_empty_1_0(self):
130
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
131
# NamedTemporaryFile dies on builder.finish().read(). weird.
132
temp_file = builder.finish()
133
content = temp_file.read()
136
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
140
def test_empty_2_1(self):
141
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
142
# NamedTemporaryFile dies on builder.finish().read(). weird.
143
temp_file = builder.finish()
144
content = temp_file.read()
147
b"B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
151
def test_root_leaf_1_0(self):
152
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
153
nodes = self.make_nodes(5, 1, 0)
155
builder.add_node(*node)
156
# NamedTemporaryFile dies on builder.finish().read(). weird.
157
temp_file = builder.finish()
158
content = temp_file.read()
160
self.assertEqual(131, len(content))
162
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
165
node_content = content[73:]
166
node_bytes = zlib.decompress(node_content)
167
expected_node = (b"type=leaf\n"
168
b"0000000000000000000000000000000000000000\x00\x00value:0\n"
169
b"1111111111111111111111111111111111111111\x00\x00value:1\n"
170
b"2222222222222222222222222222222222222222\x00\x00value:2\n"
171
b"3333333333333333333333333333333333333333\x00\x00value:3\n"
172
b"4444444444444444444444444444444444444444\x00\x00value:4\n")
173
self.assertEqual(expected_node, node_bytes)
175
def test_root_leaf_2_2(self):
176
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
177
nodes = self.make_nodes(5, 2, 2)
179
builder.add_node(*node)
180
# NamedTemporaryFile dies on builder.finish().read(). weird.
181
temp_file = builder.finish()
182
content = temp_file.read()
184
self.assertEqual(238, len(content))
186
b"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
189
node_content = content[74:]
190
node_bytes = zlib.decompress(node_content)
193
b"0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
194
b"0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
195
b"0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
196
b"0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
197
b"0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
198
b"1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
199
b"1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
200
b"1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
201
b"1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
202
b"1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
205
self.assertEqual(expected_node, node_bytes)
207
def test_2_leaves_1_0(self):
208
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
209
nodes = self.make_nodes(400, 1, 0)
211
builder.add_node(*node)
212
# NamedTemporaryFile dies on builder.finish().read(). weird.
213
temp_file = builder.finish()
214
content = temp_file.read()
216
self.assertEqualApproxCompressed(9283, len(content))
218
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
219
b"row_lengths=1,2\n",
221
root = content[77:4096]
222
leaf1 = content[4096:8192]
223
leaf2 = content[8192:]
224
root_bytes = zlib.decompress(root)
228
) + (b"307" * 40) + b"\n"
229
self.assertEqual(expected_root, root_bytes)
230
# We already know serialisation works for leaves, check key selection:
231
leaf1_bytes = zlib.decompress(leaf1)
232
sorted_node_keys = sorted(node[0] for node in nodes)
233
node = btree_index._LeafNode(leaf1_bytes, 1, 0)
234
self.assertEqual(231, len(node))
235
self.assertEqual(sorted_node_keys[:231], node.all_keys())
236
leaf2_bytes = zlib.decompress(leaf2)
237
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
238
self.assertEqual(400 - 231, len(node))
239
self.assertEqual(sorted_node_keys[231:], node.all_keys())
241
def test_last_page_rounded_1_layer(self):
242
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
243
nodes = self.make_nodes(10, 1, 0)
245
builder.add_node(*node)
246
# NamedTemporaryFile dies on builder.finish().read(). weird.
247
temp_file = builder.finish()
248
content = temp_file.read()
250
self.assertEqualApproxCompressed(155, len(content))
252
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
255
# Check thelast page is well formed
257
leaf2_bytes = zlib.decompress(leaf2)
258
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
259
self.assertEqual(10, len(node))
260
sorted_node_keys = sorted(node[0] for node in nodes)
261
self.assertEqual(sorted_node_keys, node.all_keys())
263
def test_last_page_not_rounded_2_layer(self):
264
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
265
nodes = self.make_nodes(400, 1, 0)
267
builder.add_node(*node)
268
# NamedTemporaryFile dies on builder.finish().read(). weird.
269
temp_file = builder.finish()
270
content = temp_file.read()
272
self.assertEqualApproxCompressed(9283, len(content))
274
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
275
b"row_lengths=1,2\n",
277
# Check the last page is well formed
278
leaf2 = content[8192:]
279
leaf2_bytes = zlib.decompress(leaf2)
280
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
281
self.assertEqual(400 - 231, len(node))
282
sorted_node_keys = sorted(node[0] for node in nodes)
283
self.assertEqual(sorted_node_keys[231:], node.all_keys())
285
def test_three_level_tree_details(self):
286
# The left most pointer in the second internal node in a row should
287
# pointer to the second node that the internal node is for, _not_
288
# the first, otherwise the first node overlaps with the last node of
289
# the prior internal node on that row.
290
self.shrink_page_size()
291
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
292
# 40K nodes is enough to create a two internal nodes on the second
293
# level, with a 2K page size
294
nodes = self.make_nodes(20000, 2, 2)
297
builder.add_node(*node)
298
t = transport.get_transport_from_url('trace+' + self.get_url(''))
299
size = t.put_file('index', self.time(builder.finish))
301
index = btree_index.BTreeGraphIndex(t, 'index', size)
302
# Seed the metadata, we're using internal calls now.
304
self.assertEqual(3, len(index._row_lengths),
305
"Not enough rows: %r" % index._row_lengths)
306
self.assertEqual(4, len(index._row_offsets))
307
self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
308
internal_nodes = index._get_internal_nodes([0, 1, 2])
309
root_node = internal_nodes[0]
310
internal_node1 = internal_nodes[1]
311
internal_node2 = internal_nodes[2]
312
# The left most node node2 points at should be one after the right most
313
# node pointed at by node1.
314
self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
315
# The left most key of the second node pointed at by internal_node2
316
# should be its first key. We can check this by looking for its first key
317
# in the second node it points at
318
pos = index._row_offsets[2] + internal_node2.offset + 1
319
leaf = index._get_leaf_nodes([pos])[pos]
320
self.assertTrue(internal_node2.keys[0] in leaf)
322
def test_2_leaves_2_2(self):
323
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
324
nodes = self.make_nodes(100, 2, 2)
326
builder.add_node(*node)
327
# NamedTemporaryFile dies on builder.finish().read(). weird.
328
temp_file = builder.finish()
329
content = temp_file.read()
331
self.assertEqualApproxCompressed(12643, len(content))
333
b"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
334
b"row_lengths=1,3\n",
336
root = content[77:4096]
337
leaf1 = content[4096:8192]
338
leaf2 = content[8192:12288]
339
leaf3 = content[12288:]
340
root_bytes = zlib.decompress(root)
344
+ (b"0" * 40) + b"\x00" + (b"91" * 40) + b"\n"
345
+ (b"1" * 40) + b"\x00" + (b"81" * 40) + b"\n"
347
self.assertEqual(expected_root, root_bytes)
348
# We assume the other leaf nodes have been written correctly - layering
351
def test_spill_index_stress_1_1(self):
352
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
353
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
354
builder.add_node(*nodes[0])
355
# Test the parts of the index that take up memory are doing so
357
self.assertEqual(1, len(builder._nodes))
358
self.assertIs(None, builder._nodes_by_key)
359
builder.add_node(*nodes[1])
360
self.assertEqual(0, len(builder._nodes))
361
self.assertIs(None, builder._nodes_by_key)
362
self.assertEqual(1, len(builder._backing_indices))
363
self.assertEqual(2, builder._backing_indices[0].key_count())
365
builder.add_node(*nodes[2])
366
self.assertEqual(1, len(builder._nodes))
367
self.assertIs(None, builder._nodes_by_key)
368
# And spills to a second backing index combing all
369
builder.add_node(*nodes[3])
370
self.assertEqual(0, len(builder._nodes))
371
self.assertIs(None, builder._nodes_by_key)
372
self.assertEqual(2, len(builder._backing_indices))
373
self.assertEqual(None, builder._backing_indices[0])
374
self.assertEqual(4, builder._backing_indices[1].key_count())
375
# The next spills to the 2-len slot
376
builder.add_node(*nodes[4])
377
builder.add_node(*nodes[5])
378
self.assertEqual(0, len(builder._nodes))
379
self.assertIs(None, builder._nodes_by_key)
380
self.assertEqual(2, len(builder._backing_indices))
381
self.assertEqual(2, builder._backing_indices[0].key_count())
382
self.assertEqual(4, builder._backing_indices[1].key_count())
383
# Next spill combines
384
builder.add_node(*nodes[6])
385
builder.add_node(*nodes[7])
386
self.assertEqual(3, len(builder._backing_indices))
387
self.assertEqual(None, builder._backing_indices[0])
388
self.assertEqual(None, builder._backing_indices[1])
389
self.assertEqual(8, builder._backing_indices[2].key_count())
390
# And so forth - counting up in binary.
391
builder.add_node(*nodes[8])
392
builder.add_node(*nodes[9])
393
self.assertEqual(3, len(builder._backing_indices))
394
self.assertEqual(2, builder._backing_indices[0].key_count())
395
self.assertEqual(None, builder._backing_indices[1])
396
self.assertEqual(8, builder._backing_indices[2].key_count())
397
builder.add_node(*nodes[10])
398
builder.add_node(*nodes[11])
399
self.assertEqual(3, len(builder._backing_indices))
400
self.assertEqual(None, builder._backing_indices[0])
401
self.assertEqual(4, builder._backing_indices[1].key_count())
402
self.assertEqual(8, builder._backing_indices[2].key_count())
403
builder.add_node(*nodes[12])
404
# Test that memory and disk are both used for query methods; and that
405
# None is skipped over happily.
406
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
407
list(builder.iter_all_entries()))
408
# Two nodes - one memory one disk
409
self.assertEqual({(builder,) + node for node in nodes[11:13]},
410
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
411
self.assertEqual(13, builder.key_count())
412
self.assertEqual({(builder,) + node for node in nodes[11:13]},
413
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
414
builder.add_node(*nodes[13])
415
self.assertEqual(3, len(builder._backing_indices))
416
self.assertEqual(2, builder._backing_indices[0].key_count())
417
self.assertEqual(4, builder._backing_indices[1].key_count())
418
self.assertEqual(8, builder._backing_indices[2].key_count())
419
builder.add_node(*nodes[14])
420
builder.add_node(*nodes[15])
421
self.assertEqual(4, len(builder._backing_indices))
422
self.assertEqual(None, builder._backing_indices[0])
423
self.assertEqual(None, builder._backing_indices[1])
424
self.assertEqual(None, builder._backing_indices[2])
425
self.assertEqual(16, builder._backing_indices[3].key_count())
426
# Now finish, and check we got a correctly ordered tree
427
t = self.get_transport('')
428
size = t.put_file('index', builder.finish())
429
index = btree_index.BTreeGraphIndex(t, 'index', size)
430
nodes = list(index.iter_all_entries())
431
self.assertEqual(sorted(nodes), nodes)
432
self.assertEqual(16, len(nodes))
434
def test_spill_index_stress_1_1_no_combine(self):
435
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
436
builder.set_optimize(for_size=False, combine_backing_indices=False)
437
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
438
builder.add_node(*nodes[0])
439
# Test the parts of the index that take up memory are doing so
441
self.assertEqual(1, len(builder._nodes))
442
self.assertIs(None, builder._nodes_by_key)
443
builder.add_node(*nodes[1])
444
self.assertEqual(0, len(builder._nodes))
445
self.assertIs(None, builder._nodes_by_key)
446
self.assertEqual(1, len(builder._backing_indices))
447
self.assertEqual(2, builder._backing_indices[0].key_count())
449
builder.add_node(*nodes[2])
450
self.assertEqual(1, len(builder._nodes))
451
self.assertIs(None, builder._nodes_by_key)
452
# And spills to a second backing index but doesn't combine
453
builder.add_node(*nodes[3])
454
self.assertEqual(0, len(builder._nodes))
455
self.assertIs(None, builder._nodes_by_key)
456
self.assertEqual(2, len(builder._backing_indices))
457
for backing_index in builder._backing_indices:
458
self.assertEqual(2, backing_index.key_count())
459
# The next spills to the 3rd slot
460
builder.add_node(*nodes[4])
461
builder.add_node(*nodes[5])
462
self.assertEqual(0, len(builder._nodes))
463
self.assertIs(None, builder._nodes_by_key)
464
self.assertEqual(3, len(builder._backing_indices))
465
for backing_index in builder._backing_indices:
466
self.assertEqual(2, backing_index.key_count())
467
# Now spill a few more, and check that we don't combine
468
builder.add_node(*nodes[6])
469
builder.add_node(*nodes[7])
470
builder.add_node(*nodes[8])
471
builder.add_node(*nodes[9])
472
builder.add_node(*nodes[10])
473
builder.add_node(*nodes[11])
474
builder.add_node(*nodes[12])
475
self.assertEqual(6, len(builder._backing_indices))
476
for backing_index in builder._backing_indices:
477
self.assertEqual(2, backing_index.key_count())
478
# Test that memory and disk are both used for query methods; and that
479
# None is skipped over happily.
480
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
481
list(builder.iter_all_entries()))
482
# Two nodes - one memory one disk
483
self.assertEqual({(builder,) + node for node in nodes[11:13]},
484
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
485
self.assertEqual(13, builder.key_count())
486
self.assertEqual({(builder,) + node for node in nodes[11:13]},
487
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
488
builder.add_node(*nodes[13])
489
builder.add_node(*nodes[14])
490
builder.add_node(*nodes[15])
491
self.assertEqual(8, len(builder._backing_indices))
492
for backing_index in builder._backing_indices:
493
self.assertEqual(2, backing_index.key_count())
494
# Now finish, and check we got a correctly ordered tree
495
transport = self.get_transport('')
496
size = transport.put_file('index', builder.finish())
497
index = btree_index.BTreeGraphIndex(transport, 'index', size)
498
nodes = list(index.iter_all_entries())
499
self.assertEqual(sorted(nodes), nodes)
500
self.assertEqual(16, len(nodes))
502
def test_set_optimize(self):
503
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
504
builder.set_optimize(for_size=True)
505
self.assertTrue(builder._optimize_for_size)
506
builder.set_optimize(for_size=False)
507
self.assertFalse(builder._optimize_for_size)
508
# test that we can set combine_backing_indices without effecting
511
builder._optimize_for_size = obj
512
builder.set_optimize(combine_backing_indices=False)
513
self.assertFalse(builder._combine_backing_indices)
514
self.assertIs(obj, builder._optimize_for_size)
515
builder.set_optimize(combine_backing_indices=True)
516
self.assertTrue(builder._combine_backing_indices)
517
self.assertIs(obj, builder._optimize_for_size)
519
def test_spill_index_stress_2_2(self):
520
# test that references and longer keys don't confuse things.
521
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
523
nodes = self.make_nodes(16, 2, 2)
524
builder.add_node(*nodes[0])
525
# Test the parts of the index that take up memory are doing so
527
self.assertEqual(1, len(builder._nodes))
528
self.assertIs(None, builder._nodes_by_key)
529
builder.add_node(*nodes[1])
530
self.assertEqual(0, len(builder._nodes))
531
self.assertIs(None, builder._nodes_by_key)
532
self.assertEqual(1, len(builder._backing_indices))
533
self.assertEqual(2, builder._backing_indices[0].key_count())
535
old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
536
builder.add_node(*nodes[2])
537
self.assertEqual(1, len(builder._nodes))
538
self.assertIsNot(None, builder._nodes_by_key)
539
self.assertNotEqual({}, builder._nodes_by_key)
540
# We should have a new entry
541
self.assertNotEqual(old, builder._nodes_by_key)
542
# And spills to a second backing index combing all
543
builder.add_node(*nodes[3])
544
self.assertEqual(0, len(builder._nodes))
545
self.assertIs(None, builder._nodes_by_key)
546
self.assertEqual(2, len(builder._backing_indices))
547
self.assertEqual(None, builder._backing_indices[0])
548
self.assertEqual(4, builder._backing_indices[1].key_count())
549
# The next spills to the 2-len slot
550
builder.add_node(*nodes[4])
551
builder.add_node(*nodes[5])
552
self.assertEqual(0, len(builder._nodes))
553
self.assertIs(None, builder._nodes_by_key)
554
self.assertEqual(2, len(builder._backing_indices))
555
self.assertEqual(2, builder._backing_indices[0].key_count())
556
self.assertEqual(4, builder._backing_indices[1].key_count())
557
# Next spill combines
558
builder.add_node(*nodes[6])
559
builder.add_node(*nodes[7])
560
self.assertEqual(3, len(builder._backing_indices))
561
self.assertEqual(None, builder._backing_indices[0])
562
self.assertEqual(None, builder._backing_indices[1])
563
self.assertEqual(8, builder._backing_indices[2].key_count())
564
# And so forth - counting up in binary.
565
builder.add_node(*nodes[8])
566
builder.add_node(*nodes[9])
567
self.assertEqual(3, len(builder._backing_indices))
568
self.assertEqual(2, builder._backing_indices[0].key_count())
569
self.assertEqual(None, builder._backing_indices[1])
570
self.assertEqual(8, builder._backing_indices[2].key_count())
571
builder.add_node(*nodes[10])
572
builder.add_node(*nodes[11])
573
self.assertEqual(3, len(builder._backing_indices))
574
self.assertEqual(None, builder._backing_indices[0])
575
self.assertEqual(4, builder._backing_indices[1].key_count())
576
self.assertEqual(8, builder._backing_indices[2].key_count())
577
builder.add_node(*nodes[12])
578
# Test that memory and disk are both used for query methods; and that
579
# None is skipped over happily.
580
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
581
list(builder.iter_all_entries()))
582
# Two nodes - one memory one disk
583
self.assertEqual({(builder,) + node for node in nodes[11:13]},
584
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
585
self.assertEqual(13, builder.key_count())
586
self.assertEqual({(builder,) + node for node in nodes[11:13]},
587
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
588
builder.add_node(*nodes[13])
589
self.assertEqual(3, len(builder._backing_indices))
590
self.assertEqual(2, builder._backing_indices[0].key_count())
591
self.assertEqual(4, builder._backing_indices[1].key_count())
592
self.assertEqual(8, builder._backing_indices[2].key_count())
593
builder.add_node(*nodes[14])
594
builder.add_node(*nodes[15])
595
self.assertEqual(4, len(builder._backing_indices))
596
self.assertEqual(None, builder._backing_indices[0])
597
self.assertEqual(None, builder._backing_indices[1])
598
self.assertEqual(None, builder._backing_indices[2])
599
self.assertEqual(16, builder._backing_indices[3].key_count())
600
# Now finish, and check we got a correctly ordered tree
601
transport = self.get_transport('')
602
size = transport.put_file('index', builder.finish())
603
index = btree_index.BTreeGraphIndex(transport, 'index', size)
604
nodes = list(index.iter_all_entries())
605
self.assertEqual(sorted(nodes), nodes)
606
self.assertEqual(16, len(nodes))
608
def test_spill_index_duplicate_key_caught_on_finish(self):
609
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
610
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
611
builder.add_node(*nodes[0])
612
builder.add_node(*nodes[1])
613
builder.add_node(*nodes[0])
614
self.assertRaises(errors.BadIndexDuplicateKey, builder.finish)
617
class TestBTreeIndex(BTreeTestCase):
619
def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
620
builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
621
key_elements=key_elements)
622
for key, value, references in nodes:
623
builder.add_node(key, value, references)
624
stream = builder.finish()
625
trans = transport.get_transport_from_url('trace+' + self.get_url())
626
size = trans.put_file('index', stream)
627
return btree_index.BTreeGraphIndex(trans, 'index', size)
629
def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
631
builder = btree_index.BTreeBuilder(key_elements=key_elements,
632
reference_lists=ref_lists)
633
builder.add_nodes(nodes)
634
transport = self.get_transport('')
635
# NamedTemporaryFile dies on builder.finish().read(). weird.
636
temp_file = builder.finish()
637
content = temp_file.read()
640
transport.put_bytes('index', (b' '*offset)+content)
641
return btree_index.BTreeGraphIndex(transport, 'index', size=size,
644
def test_clear_cache(self):
645
nodes = self.make_nodes(160, 2, 2)
646
index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
647
self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
648
self.assertEqual([1, 4], index._row_lengths)
649
self.assertIsNot(None, index._root_node)
650
internal_node_pre_clear = set(index._internal_node_cache)
651
self.assertTrue(len(index._leaf_node_cache) > 0)
653
# We don't touch _root_node or _internal_node_cache, both should be
654
# small, and can save a round trip or two
655
self.assertIsNot(None, index._root_node)
656
# NOTE: We don't want to affect the _internal_node_cache, as we expect
657
# it will be small, and if we ever do touch this index again, it
658
# will save round-trips. This assertion isn't very strong,
659
# becuase without a 3-level index, we don't have any internal
661
self.assertEqual(internal_node_pre_clear,
662
set(index._internal_node_cache))
663
self.assertEqual(0, len(index._leaf_node_cache))
665
def test_trivial_constructor(self):
666
t = transport.get_transport_from_url('trace+' + self.get_url(''))
667
index = btree_index.BTreeGraphIndex(t, 'index', None)
668
# Checks the page size at load, but that isn't logged yet.
669
self.assertEqual([], t._activity)
671
def test_with_size_constructor(self):
672
t = transport.get_transport_from_url('trace+' + self.get_url(''))
673
index = btree_index.BTreeGraphIndex(t, 'index', 1)
674
# Checks the page size at load, but that isn't logged yet.
675
self.assertEqual([], t._activity)
677
def test_empty_key_count_no_size(self):
678
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
679
t = transport.get_transport_from_url('trace+' + self.get_url(''))
680
t.put_file('index', builder.finish())
681
index = btree_index.BTreeGraphIndex(t, 'index', None)
683
self.assertEqual([], t._activity)
684
self.assertEqual(0, index.key_count())
685
# The entire index should have been requested (as we generally have the
686
# size available, and doing many small readvs is inappropriate).
687
# We can't tell how much was actually read here, but - check the code.
688
self.assertEqual([('get', 'index')], t._activity)
690
def test_empty_key_count(self):
691
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
692
t = transport.get_transport_from_url('trace+' + self.get_url(''))
693
size = t.put_file('index', builder.finish())
694
self.assertEqual(72, size)
695
index = btree_index.BTreeGraphIndex(t, 'index', size)
697
self.assertEqual([], t._activity)
698
self.assertEqual(0, index.key_count())
699
# The entire index should have been read, as 4K > size
700
self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
703
def test_non_empty_key_count_2_2(self):
704
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
705
nodes = self.make_nodes(35, 2, 2)
707
builder.add_node(*node)
708
t = transport.get_transport_from_url('trace+' + self.get_url(''))
709
size = t.put_file('index', builder.finish())
710
index = btree_index.BTreeGraphIndex(t, 'index', size)
712
self.assertEqual([], t._activity)
713
self.assertEqual(70, index.key_count())
714
# The entire index should have been read, as it is one page long.
715
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
717
self.assertEqualApproxCompressed(1173, size)
719
def test_with_offset_no_size(self):
720
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
722
nodes=self.make_nodes(200, 1, 1))
723
index._size = None # throw away the size info
724
self.assertEqual(200, index.key_count())
726
def test_with_small_offset(self):
727
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
729
nodes=self.make_nodes(200, 1, 1))
730
self.assertEqual(200, index.key_count())
732
def test_with_large_offset(self):
733
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
735
nodes=self.make_nodes(200, 1, 1))
736
self.assertEqual(200, index.key_count())
738
def test__read_nodes_no_size_one_page_reads_once(self):
739
self.make_index(nodes=[((b'key',), b'value', ())])
740
trans = transport.get_transport_from_url('trace+' + self.get_url())
741
index = btree_index.BTreeGraphIndex(trans, 'index', None)
742
del trans._activity[:]
743
nodes = dict(index._read_nodes([0]))
744
self.assertEqual({0}, set(nodes))
746
self.assertEqual([(b'key',)], node.all_keys())
747
self.assertEqual([('get', 'index')], trans._activity)
749
def test__read_nodes_no_size_multiple_pages(self):
750
index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
752
num_pages = index._row_offsets[-1]
753
# Reopen with a traced transport and no size
754
trans = transport.get_transport_from_url('trace+' + self.get_url())
755
index = btree_index.BTreeGraphIndex(trans, 'index', None)
756
del trans._activity[:]
757
nodes = dict(index._read_nodes([0]))
758
self.assertEqual(list(range(num_pages)), sorted(nodes))
760
def test_2_levels_key_count_2_2(self):
761
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
762
nodes = self.make_nodes(160, 2, 2)
764
builder.add_node(*node)
765
t = transport.get_transport_from_url('trace+' + self.get_url(''))
766
size = t.put_file('index', builder.finish())
767
self.assertEqualApproxCompressed(17692, size)
768
index = btree_index.BTreeGraphIndex(t, 'index', size)
770
self.assertEqual([], t._activity)
771
self.assertEqual(320, index.key_count())
772
# The entire index should not have been read.
773
self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
776
def test_validate_one_page(self):
777
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
778
nodes = self.make_nodes(45, 2, 2)
780
builder.add_node(*node)
781
t = transport.get_transport_from_url('trace+' + self.get_url(''))
782
size = t.put_file('index', builder.finish())
783
index = btree_index.BTreeGraphIndex(t, 'index', size)
785
self.assertEqual([], t._activity)
787
# The entire index should have been read linearly.
788
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
790
self.assertEqualApproxCompressed(1488, size)
792
def test_validate_two_pages(self):
793
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
794
nodes = self.make_nodes(80, 2, 2)
796
builder.add_node(*node)
797
t = transport.get_transport_from_url('trace+' + self.get_url(''))
798
size = t.put_file('index', builder.finish())
799
# Root page, 2 leaf pages
800
self.assertEqualApproxCompressed(9339, size)
801
index = btree_index.BTreeGraphIndex(t, 'index', size)
803
self.assertEqual([], t._activity)
805
rem = size - 8192 # Number of remaining bytes after second block
806
# The entire index should have been read linearly.
808
[('readv', 'index', [(0, 4096)], False, None),
809
('readv', 'index', [(4096, 4096), (8192, rem)], False, None)],
811
# XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
812
# node and make validate find them.
814
def test_eq_ne(self):
815
# two indices are equal when constructed with the same parameters:
816
t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
817
t2 = self.get_transport()
819
btree_index.BTreeGraphIndex(t1, 'index', None) ==
820
btree_index.BTreeGraphIndex(t1, 'index', None))
822
btree_index.BTreeGraphIndex(t1, 'index', 20) ==
823
btree_index.BTreeGraphIndex(t1, 'index', 20))
825
btree_index.BTreeGraphIndex(t1, 'index', 20) ==
826
btree_index.BTreeGraphIndex(t2, 'index', 20))
828
btree_index.BTreeGraphIndex(t1, 'inde1', 20) ==
829
btree_index.BTreeGraphIndex(t1, 'inde2', 20))
831
btree_index.BTreeGraphIndex(t1, 'index', 10) ==
832
btree_index.BTreeGraphIndex(t1, 'index', 20))
834
btree_index.BTreeGraphIndex(t1, 'index', None) !=
835
btree_index.BTreeGraphIndex(t1, 'index', None))
837
btree_index.BTreeGraphIndex(t1, 'index', 20) !=
838
btree_index.BTreeGraphIndex(t1, 'index', 20))
840
btree_index.BTreeGraphIndex(t1, 'index', 20) !=
841
btree_index.BTreeGraphIndex(t2, 'index', 20))
843
btree_index.BTreeGraphIndex(t1, 'inde1', 20) !=
844
btree_index.BTreeGraphIndex(t1, 'inde2', 20))
846
btree_index.BTreeGraphIndex(t1, 'index', 10) !=
847
btree_index.BTreeGraphIndex(t1, 'index', 20))
849
def test_key_too_big(self):
850
# the size that matters here is the _compressed_ size of the key, so we can't
851
# do a simple character repeat.
852
bigKey = b''.join(b'%d' % n for n in range(btree_index._PAGE_SIZE))
853
self.assertRaises(errors.BadIndexKey,
855
nodes=[((bigKey,), b'value', ())])
857
def test_iter_all_only_root_no_size(self):
858
self.make_index(nodes=[((b'key',), b'value', ())])
859
t = transport.get_transport_from_url('trace+' + self.get_url(''))
860
index = btree_index.BTreeGraphIndex(t, 'index', None)
862
self.assertEqual([((b'key',), b'value')],
863
[x[1:] for x in index.iter_all_entries()])
864
self.assertEqual([('get', 'index')], t._activity)
866
def test_iter_all_entries_reads(self):
867
# iterating all entries reads the header, then does a linear
869
self.shrink_page_size()
870
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
871
# 20k nodes is enough to create a two internal nodes on the second
872
# level, with a 2K page size
873
nodes = self.make_nodes(10000, 2, 2)
875
builder.add_node(*node)
876
t = transport.get_transport_from_url('trace+' + self.get_url(''))
877
size = t.put_file('index', builder.finish())
878
page_size = btree_index._PAGE_SIZE
880
index = btree_index.BTreeGraphIndex(t, 'index', size)
882
self.assertEqual([], t._activity)
883
found_nodes = self.time(list, index.iter_all_entries())
885
for node in found_nodes:
886
self.assertTrue(node[0] is index)
887
bare_nodes.append(node[1:])
888
self.assertEqual(3, len(index._row_lengths),
889
"Not enough rows: %r" % index._row_lengths)
890
# Should be as long as the nodes we supplied
891
self.assertEqual(20000, len(found_nodes))
892
# Should have the same content
893
self.assertEqual(set(nodes), set(bare_nodes))
894
# Should have done linear scan IO up the index, ignoring
895
# the internal nodes:
896
# The entire index should have been read
897
total_pages = sum(index._row_lengths)
898
self.assertEqual(total_pages, index._row_offsets[-1])
899
self.assertEqualApproxCompressed(1303220, size)
900
# The start of the leaves
901
first_byte = index._row_offsets[-2] * page_size
903
for offset in range(first_byte, size, page_size):
904
readv_request.append((offset, page_size))
905
# The last page is truncated
906
readv_request[-1] = (readv_request[-1][0], size % page_size)
907
expected = [('readv', 'index', [(0, page_size)], False, None),
908
('readv', 'index', readv_request, False, None)]
909
if expected != t._activity:
910
self.assertEqualDiff(pprint.pformat(expected),
911
pprint.pformat(t._activity))
913
def test_iter_entries_references_2_refs_resolved(self):
914
# iterating some entries reads just the pages needed. For now, to
915
# get it working and start measuring, only 4K pages are read.
916
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
917
# 80 nodes is enough to create a two-level index.
918
nodes = self.make_nodes(160, 2, 2)
920
builder.add_node(*node)
921
t = transport.get_transport_from_url('trace+' + self.get_url(''))
922
size = t.put_file('index', builder.finish())
924
index = btree_index.BTreeGraphIndex(t, 'index', size)
926
self.assertEqual([], t._activity)
928
found_nodes = list(index.iter_entries([nodes[30][0]]))
930
for node in found_nodes:
931
self.assertTrue(node[0] is index)
932
bare_nodes.append(node[1:])
933
# Should be as long as the nodes we supplied
934
self.assertEqual(1, len(found_nodes))
935
# Should have the same content
936
self.assertEqual(nodes[30], bare_nodes[0])
937
# Should have read the root node, then one leaf page:
938
self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
939
('readv', 'index', [(8192, 4096), ], False, None)],
942
def test_iter_key_prefix_1_element_key_None(self):
943
index = self.make_index()
944
self.assertRaises(errors.BadIndexKey, list,
945
index.iter_entries_prefix([(None, )]))
947
def test_iter_key_prefix_wrong_length(self):
948
index = self.make_index()
949
self.assertRaises(errors.BadIndexKey, list,
950
index.iter_entries_prefix([(b'foo', None)]))
951
index = self.make_index(key_elements=2)
952
self.assertRaises(errors.BadIndexKey, list,
953
index.iter_entries_prefix([(b'foo', )]))
954
self.assertRaises(errors.BadIndexKey, list,
955
index.iter_entries_prefix([(b'foo', None, None)]))
957
def test_iter_key_prefix_1_key_element_no_refs(self):
958
index = self.make_index(nodes=[
959
((b'name', ), b'data', ()),
960
((b'ref', ), b'refdata', ())])
961
self.assertEqual({(index, (b'name', ), b'data'),
962
(index, (b'ref', ), b'refdata')},
963
set(index.iter_entries_prefix([(b'name', ), (b'ref', )])))
965
def test_iter_key_prefix_1_key_element_refs(self):
966
index = self.make_index(1, nodes=[
967
((b'name', ), b'data', ([(b'ref', )], )),
968
((b'ref', ), b'refdata', ([], ))])
969
self.assertEqual({(index, (b'name', ), b'data', (((b'ref',),),)),
970
(index, (b'ref', ), b'refdata', ((), ))},
971
set(index.iter_entries_prefix([(b'name', ), (b'ref', )])))
973
def test_iter_key_prefix_2_key_element_no_refs(self):
974
index = self.make_index(key_elements=2, nodes=[
975
((b'name', b'fin1'), b'data', ()),
976
((b'name', b'fin2'), b'beta', ()),
977
((b'ref', b'erence'), b'refdata', ())])
978
self.assertEqual({(index, (b'name', b'fin1'), b'data'),
979
(index, (b'ref', b'erence'), b'refdata')},
980
set(index.iter_entries_prefix(
981
[(b'name', b'fin1'), (b'ref', b'erence')])))
982
self.assertEqual({(index, (b'name', b'fin1'), b'data'),
983
(index, (b'name', b'fin2'), b'beta')},
984
set(index.iter_entries_prefix([(b'name', None)])))
986
def test_iter_key_prefix_2_key_element_refs(self):
987
index = self.make_index(1, key_elements=2, nodes=[
988
((b'name', b'fin1'), b'data', ([(b'ref', b'erence')], )),
989
((b'name', b'fin2'), b'beta', ([], )),
990
((b'ref', b'erence'), b'refdata', ([], ))])
992
{(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)),
993
(index, (b'ref', b'erence'), b'refdata', ((), ))},
994
set(index.iter_entries_prefix(
995
[(b'name', b'fin1'), (b'ref', b'erence')])))
997
{(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)),
998
(index, (b'name', b'fin2'), b'beta', ((), ))},
999
set(index.iter_entries_prefix([(b'name', None)])))
1001
# XXX: external_references tests are duplicated in test_index. We
1002
# probably should have per_graph_index tests...
1003
def test_external_references_no_refs(self):
1004
index = self.make_index(ref_lists=0, nodes=[])
1005
self.assertRaises(ValueError, index.external_references, 0)
1007
def test_external_references_no_results(self):
1008
index = self.make_index(ref_lists=1, nodes=[
1009
((b'key',), b'value', ([],))])
1010
self.assertEqual(set(), index.external_references(0))
1012
def test_external_references_missing_ref(self):
1013
missing_key = (b'missing',)
1014
index = self.make_index(ref_lists=1, nodes=[
1015
((b'key',), b'value', ([missing_key],))])
1016
self.assertEqual({missing_key}, index.external_references(0))
1018
def test_external_references_multiple_ref_lists(self):
1019
missing_key = (b'missing',)
1020
index = self.make_index(ref_lists=2, nodes=[
1021
((b'key',), b'value', ([], [missing_key]))])
1022
self.assertEqual(set([]), index.external_references(0))
1023
self.assertEqual({missing_key}, index.external_references(1))
1025
def test_external_references_two_records(self):
1026
index = self.make_index(ref_lists=1, nodes=[
1027
((b'key-1',), b'value', ([(b'key-2',)],)),
1028
((b'key-2',), b'value', ([],)),
1030
self.assertEqual(set([]), index.external_references(0))
1032
def test__find_ancestors_one_page(self):
1035
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1036
(key1, b'value', ([key2],)),
1037
(key2, b'value', ([],)),
1040
missing_keys = set()
1041
search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
1042
self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1043
self.assertEqual(set(), missing_keys)
1044
self.assertEqual(set(), search_keys)
1046
def test__find_ancestors_one_page_w_missing(self):
1050
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1051
(key1, b'value', ([key2],)),
1052
(key2, b'value', ([],)),
1055
missing_keys = set()
1056
search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1058
self.assertEqual({key2: ()}, parent_map)
1059
# we know that key3 is missing because we read the page that it would
1061
self.assertEqual({key3}, missing_keys)
1062
self.assertEqual(set(), search_keys)
1064
def test__find_ancestors_one_parent_missing(self):
1068
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1069
(key1, b'value', ([key2],)),
1070
(key2, b'value', ([key3],)),
1073
missing_keys = set()
1074
search_keys = index._find_ancestors([key1], 0, parent_map,
1076
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1077
self.assertEqual(set(), missing_keys)
1078
# all we know is that key3 wasn't present on the page we were reading
1079
# but if you look, the last key is key2 which comes before key3, so we
1080
# don't know whether key3 would land on this page or not.
1081
self.assertEqual({key3}, search_keys)
1082
search_keys = index._find_ancestors(search_keys, 0, parent_map,
1084
# passing it back in, we are sure it is 'missing'
1085
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1086
self.assertEqual({key3}, missing_keys)
1087
self.assertEqual(set([]), search_keys)
1089
def test__find_ancestors_dont_search_known(self):
1093
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1094
(key1, b'value', ([key2],)),
1095
(key2, b'value', ([key3],)),
1096
(key3, b'value', ([],)),
1098
# We already know about key2, so we won't try to search for key3
1099
parent_map = {key2: (key3,)}
1100
missing_keys = set()
1101
search_keys = index._find_ancestors([key1], 0, parent_map,
1103
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1104
self.assertEqual(set(), missing_keys)
1105
self.assertEqual(set(), search_keys)
1107
def test__find_ancestors_multiple_pages(self):
1108
# We need to use enough keys that we actually cause a split
1109
start_time = 1249671539
1110
email = "joebob@example.com"
1114
for i in range(400):
1115
rev_id = ('%s-%s-%s' % (email,
1116
osutils.compact_date(start_time + i),
1117
osutils.rand_chars(16))).encode('ascii')
1119
nodes.append((rev_key, b'value', ref_lists))
1120
# We have a ref 'list' of length 1, with a list of parents, with 1
1121
# parent which is a key
1122
ref_lists = ((rev_key,),)
1123
rev_keys.append(rev_key)
1124
index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1125
self.assertEqual(400, index.key_count())
1126
self.assertEqual(3, len(index._row_offsets))
1127
nodes = dict(index._read_nodes([1, 2]))
1130
min_l2_key = l2.min_key
1131
max_l1_key = l1.max_key
1132
self.assertTrue(max_l1_key < min_l2_key)
1133
parents_min_l2_key = l2[min_l2_key][1][0]
1134
self.assertEqual((l1.max_key,), parents_min_l2_key)
1135
# Now, whatever key we select that would fall on the second page,
1136
# should give us all the parents until the page break
1137
key_idx = rev_keys.index(min_l2_key)
1138
next_key = rev_keys[key_idx+1]
1139
# So now when we get the parent map, we should get the key we are
1140
# looking for, min_l2_key, and then a reference to go look for the
1141
# parent of that key
1143
missing_keys = set()
1144
search_keys = index._find_ancestors([next_key], 0, parent_map,
1146
self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1147
self.assertEqual(set(), missing_keys)
1148
self.assertEqual({max_l1_key}, search_keys)
1150
search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1152
self.assertEqual(l1.all_keys(), sorted(parent_map))
1153
self.assertEqual(set(), missing_keys)
1154
self.assertEqual(set(), search_keys)
1156
def test__find_ancestors_empty_index(self):
1157
index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1159
missing_keys = set()
1160
search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1162
self.assertEqual(set(), search_keys)
1163
self.assertEqual({}, parent_map)
1164
self.assertEqual({('one',), ('two',)}, missing_keys)
1166
def test_supports_unlimited_cache(self):
1167
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
1168
# We need enough nodes to cause a page split (so we have both an
1169
# internal node and a couple leaf nodes. 500 seems to be enough.)
1170
nodes = self.make_nodes(500, 1, 0)
1172
builder.add_node(*node)
1173
stream = builder.finish()
1174
trans = self.get_transport()
1175
size = trans.put_file('index', stream)
1176
index = btree_index.BTreeGraphIndex(trans, 'index', size)
1177
self.assertEqual(500, index.key_count())
1178
# We have an internal node
1179
self.assertEqual(2, len(index._row_lengths))
1180
# We have at least 2 leaf nodes
1181
self.assertTrue(index._row_lengths[-1] >= 2)
1182
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1183
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1184
index._leaf_node_cache._max_cache)
1185
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1186
self.assertEqual(100, index._internal_node_cache._max_cache)
1187
# No change if unlimited_cache=False is passed
1188
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1189
unlimited_cache=False)
1190
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1191
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1192
index._leaf_node_cache._max_cache)
1193
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1194
self.assertEqual(100, index._internal_node_cache._max_cache)
1195
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1196
unlimited_cache=True)
1197
self.assertIsInstance(index._leaf_node_cache, dict)
1198
self.assertIs(type(index._internal_node_cache), dict)
1199
# Exercise the lookup code
1200
entries = set(index.iter_entries([n[0] for n in nodes]))
1201
self.assertEqual(500, len(entries))
1204
class TestBTreeNodes(BTreeTestCase):
1206
scenarios = btreeparser_scenarios()
1209
super(TestBTreeNodes, self).setUp()
1210
self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
1212
def test_LeafNode_1_0(self):
1213
node_bytes = (b"type=leaf\n"
1214
b"0000000000000000000000000000000000000000\x00\x00value:0\n"
1215
b"1111111111111111111111111111111111111111\x00\x00value:1\n"
1216
b"2222222222222222222222222222222222222222\x00\x00value:2\n"
1217
b"3333333333333333333333333333333333333333\x00\x00value:3\n"
1218
b"4444444444444444444444444444444444444444\x00\x00value:4\n")
1219
node = btree_index._LeafNode(node_bytes, 1, 0)
1220
# We do direct access, or don't care about order, to leaf nodes most of
1221
# the time, so a dict is useful:
1223
(b"0000000000000000000000000000000000000000",): (b"value:0", ()),
1224
(b"1111111111111111111111111111111111111111",): (b"value:1", ()),
1225
(b"2222222222222222222222222222222222222222",): (b"value:2", ()),
1226
(b"3333333333333333333333333333333333333333",): (b"value:3", ()),
1227
(b"4444444444444444444444444444444444444444",): (b"value:4", ()),
1228
}, dict(node.all_items()))
1230
def test_LeafNode_2_2(self):
1231
node_bytes = (b"type=leaf\n"
1232
b"00\x0000\x00\t00\x00ref00\x00value:0\n"
1233
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1234
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1235
b"11\x0044\x00\t11\x00ref00\x00value:4\n"
1238
node = btree_index._LeafNode(node_bytes, 2, 2)
1239
# We do direct access, or don't care about order, to leaf nodes most of
1240
# the time, so a dict is useful:
1242
(b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))),
1243
(b'00', b'11'): (b'value:1', (((b'00', b'ref00'),),
1244
((b'00', b'ref00'), (b'01', b'ref01')))),
1245
(b'11', b'33'): (b'value:3', (((b'11', b'ref22'),),
1246
((b'11', b'ref22'), (b'11', b'ref22')))),
1247
(b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),)))
1248
}, dict(node.all_items()))
1250
def test_InternalNode_1(self):
1251
node_bytes = (b"type=internal\n"
1253
b"0000000000000000000000000000000000000000\n"
1254
b"1111111111111111111111111111111111111111\n"
1255
b"2222222222222222222222222222222222222222\n"
1256
b"3333333333333333333333333333333333333333\n"
1257
b"4444444444444444444444444444444444444444\n"
1259
node = btree_index._InternalNode(node_bytes)
1260
# We want to bisect to find the right children from this node, so a
1261
# vector is most useful.
1263
(b"0000000000000000000000000000000000000000",),
1264
(b"1111111111111111111111111111111111111111",),
1265
(b"2222222222222222222222222222222222222222",),
1266
(b"3333333333333333333333333333333333333333",),
1267
(b"4444444444444444444444444444444444444444",),
1269
self.assertEqual(1, node.offset)
1271
def test_LeafNode_2_2(self):
1272
node_bytes = (b"type=leaf\n"
1273
b"00\x0000\x00\t00\x00ref00\x00value:0\n"
1274
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1275
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1276
b"11\x0044\x00\t11\x00ref00\x00value:4\n"
1279
node = btree_index._LeafNode(node_bytes, 2, 2)
1280
# We do direct access, or don't care about order, to leaf nodes most of
1281
# the time, so a dict is useful:
1283
(b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))),
1284
(b'00', b'11'): (b'value:1', (((b'00', b'ref00'),),
1285
((b'00', b'ref00'), (b'01', b'ref01')))),
1286
(b'11', b'33'): (b'value:3', (((b'11', b'ref22'),),
1287
((b'11', b'ref22'), (b'11', b'ref22')))),
1288
(b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),)))
1289
}, dict(node.all_items()))
1291
def assertFlattened(self, expected, key, value, refs):
1292
flat_key, flat_line = self.parse_btree._flatten_node(
1293
(None, key, value, refs), bool(refs))
1294
self.assertEqual(b'\x00'.join(key), flat_key)
1295
self.assertEqual(expected, flat_line)
1297
def test__flatten_node(self):
1298
self.assertFlattened(b'key\0\0value\n', (b'key',), b'value', [])
1299
self.assertFlattened(b'key\0tuple\0\0value str\n',
1300
(b'key', b'tuple'), b'value str', [])
1301
self.assertFlattened(b'key\0tuple\0triple\0\0value str\n',
1302
(b'key', b'tuple', b'triple'), b'value str', [])
1303
self.assertFlattened(b'k\0t\0s\0ref\0value str\n',
1304
(b'k', b't', b's'), b'value str', [[(b'ref',)]])
1305
self.assertFlattened(b'key\0tuple\0ref\0key\0value str\n',
1306
(b'key', b'tuple'), b'value str', [[(b'ref', b'key')]])
1307
self.assertFlattened(b"00\x0000\x00\t00\x00ref00\x00value:0\n",
1308
(b'00', b'00'), b'value:0', ((), ((b'00', b'ref00'),)))
1309
self.assertFlattened(
1310
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1311
(b'00', b'11'), b'value:1',
1312
(((b'00', b'ref00'),), ((b'00', b'ref00'), (b'01', b'ref01'))))
1313
self.assertFlattened(
1314
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1315
(b'11', b'33'), b'value:3',
1316
(((b'11', b'ref22'),), ((b'11', b'ref22'), (b'11', b'ref22'))))
1317
self.assertFlattened(
1318
b"11\x0044\x00\t11\x00ref00\x00value:4\n",
1319
(b'11', b'44'), b'value:4', ((), ((b'11', b'ref00'),)))
1322
class TestCompiledBtree(tests.TestCase):
1324
def test_exists(self):
1325
# This is just to let the user know if they don't have the feature
1327
self.requireFeature(compiled_btreeparser_feature)
1330
class TestMultiBisectRight(tests.TestCase):
1332
def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1333
self.assertEqual(offsets,
1334
btree_index.BTreeGraphIndex._multi_bisect_right(
1335
search_keys, fixed_keys))
1337
def test_after(self):
1338
self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1339
self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1340
['e', 'f', 'g'], ['a', 'b', 'c'])
1342
def test_before(self):
1343
self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1344
self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1345
['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1347
def test_exact(self):
1348
self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1349
self.assertMultiBisectRight([(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1350
self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1351
['a', 'c'], ['a', 'b', 'c'])
1353
def test_inbetween(self):
1354
self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1355
self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1356
['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1358
def test_mixed(self):
1359
self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1361
['a', 'b', 'd', 'e', 'g', 'h'],
1362
['c', 'd', 'f', 'g'])
1365
class TestExpandOffsets(tests.TestCase):
1367
def make_index(self, size, recommended_pages=None):
1368
"""Make an index with a generic size.
1370
This doesn't actually create anything on disk, it just primes a
1371
BTreeGraphIndex with the recommended information.
1373
index = btree_index.BTreeGraphIndex(
1374
transport.get_transport_from_url('memory:///'),
1375
'test-index', size=size)
1376
if recommended_pages is not None:
1377
index._recommended_pages = recommended_pages
1380
def set_cached_offsets(self, index, cached_offsets):
1381
"""Monkeypatch to give a canned answer for _get_offsets_for...()."""
1382
def _get_offsets_to_cached_pages():
1383
cached = set(cached_offsets)
1385
index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
1387
def prepare_index(self, index, node_ref_lists, key_length, key_count,
1388
row_lengths, cached_offsets):
1389
"""Setup the BTreeGraphIndex with some pre-canned information."""
1390
index.node_ref_lists = node_ref_lists
1391
index._key_length = key_length
1392
index._key_count = key_count
1393
index._row_lengths = row_lengths
1394
index._compute_row_offsets()
1395
index._root_node = btree_index._InternalNode(b'internal\noffset=0\n')
1396
self.set_cached_offsets(index, cached_offsets)
1398
def make_100_node_index(self):
1399
index = self.make_index(4096*100, 6)
1400
# Consider we've already made a single request at the middle
1401
self.prepare_index(index, node_ref_lists=0, key_length=1,
1402
key_count=1000, row_lengths=[1, 99],
1403
cached_offsets=[0, 50])
1406
def make_1000_node_index(self):
1407
index = self.make_index(4096*1000, 6)
1408
# Pretend we've already made a single request in the middle
1409
self.prepare_index(index, node_ref_lists=0, key_length=1,
1410
key_count=90000, row_lengths=[1, 9, 990],
1411
cached_offsets=[0, 5, 500])
1414
def assertNumPages(self, expected_pages, index, size):
1416
self.assertEqual(expected_pages, index._compute_total_pages_in_index())
1418
def assertExpandOffsets(self, expected, index, offsets):
1419
self.assertEqual(expected, index._expand_offsets(offsets),
1420
'We did not get the expected value after expanding'
1423
def test_default_recommended_pages(self):
1424
index = self.make_index(None)
1425
# local transport recommends 4096 byte reads, which is 1 page
1426
self.assertEqual(1, index._recommended_pages)
1428
def test__compute_total_pages_in_index(self):
1429
index = self.make_index(None)
1430
self.assertNumPages(1, index, 1024)
1431
self.assertNumPages(1, index, 4095)
1432
self.assertNumPages(1, index, 4096)
1433
self.assertNumPages(2, index, 4097)
1434
self.assertNumPages(2, index, 8192)
1435
self.assertNumPages(76, index, 4096*75 + 10)
1437
def test__find_layer_start_and_stop(self):
1438
index = self.make_1000_node_index()
1439
self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1440
self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1441
self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1442
self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1443
self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1444
self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1446
def test_unknown_size(self):
1447
# We should not expand if we don't know the file size
1448
index = self.make_index(None, 10)
1449
self.assertExpandOffsets([0], index, [0])
1450
self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
1452
def test_more_than_recommended(self):
1453
index = self.make_index(4096*100, 2)
1454
self.assertExpandOffsets([1, 10], index, [1, 10])
1455
self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
1457
def test_read_all_from_root(self):
1458
index = self.make_index(4096*10, 20)
1459
self.assertExpandOffsets(list(range(10)), index, [0])
1461
def test_read_all_when_cached(self):
1462
# We've read enough that we can grab all the rest in a single request
1463
index = self.make_index(4096*10, 5)
1464
self.prepare_index(index, node_ref_lists=0, key_length=1,
1465
key_count=1000, row_lengths=[1, 9],
1466
cached_offsets=[0, 1, 2, 5, 6])
1467
# It should fill the remaining nodes, regardless of the one requested
1468
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1469
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1470
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
1472
def test_no_root_node(self):
1473
index = self.make_index(4096*10, 5)
1474
self.assertExpandOffsets([0], index, [0])
1476
def test_include_neighbors(self):
1477
index = self.make_100_node_index()
1478
# We expand in both directions, until we have at least 'recommended'
1480
self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1481
self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
1482
# If we hit an 'edge' we continue in the other direction
1483
self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1484
self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
1486
# Requesting many nodes will expand all locations equally
1487
self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1488
self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
1491
def test_stop_at_cached(self):
1492
index = self.make_100_node_index()
1493
self.set_cached_offsets(index, [0, 10, 19])
1494
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1495
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1496
self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1497
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1498
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1499
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
1501
def test_cannot_fully_expand(self):
1502
index = self.make_100_node_index()
1503
self.set_cached_offsets(index, [0, 10, 12])
1504
# We don't go into an endless loop if we are bound by cached nodes
1505
self.assertExpandOffsets([11], index, [11])
1507
def test_overlap(self):
1508
index = self.make_100_node_index()
1509
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1510
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
1512
def test_stay_within_layer(self):
1513
index = self.make_1000_node_index()
1514
# When expanding a request, we won't read nodes from the next layer
1515
self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1516
self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1517
self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1518
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1519
self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
1521
self.set_cached_offsets(index, [0, 4, 12])
1522
self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1523
self.assertExpandOffsets([10, 11], index, [11])
1525
def test_small_requests_unexpanded(self):
1526
index = self.make_100_node_index()
1527
self.set_cached_offsets(index, [0])
1528
self.assertExpandOffsets([1], index, [1])
1529
self.assertExpandOffsets([50], index, [50])
1530
# If we request more than one node, then we'll expand
1531
self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
1533
# The first pass does not expand
1534
index = self.make_1000_node_index()
1535
self.set_cached_offsets(index, [0])
1536
self.assertExpandOffsets([1], index, [1])
1537
self.set_cached_offsets(index, [0, 1])
1538
self.assertExpandOffsets([100], index, [100])
1539
self.set_cached_offsets(index, [0, 1, 100])
1540
# But after the first depth, we will expand
1541
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1542
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1543
self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1544
self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,