1
# Copyright (C) 2008-2012, 2016 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
"""Tests for btree indices."""
36
TestCaseWithTransport,
44
load_tests = scenarios.load_tests_apply_scenarios
47
def btreeparser_scenarios():
48
import breezy.bzr._btree_serializer_py as py_module
49
scenarios = [('python', {'parse_btree': py_module})]
50
if compiled_btreeparser_feature.available():
51
scenarios.append(('C',
52
{'parse_btree': compiled_btreeparser_feature.module}))
56
compiled_btreeparser_feature = features.ModuleAvailableFeature(
57
'breezy.bzr._btree_serializer_pyx')
60
class BTreeTestCase(TestCaseWithTransport):
61
# test names here are suffixed by the key length and reference list count
65
super(BTreeTestCase, self).setUp()
66
self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
68
def make_nodes(self, count, key_elements, reference_lists):
69
"""Generate count*key_elements sample nodes."""
70
def _pos_to_key(pos, lead=b""):
71
return (lead + (b"%d" % pos) * 40,)
73
for prefix_pos in range(key_elements):
75
prefix = _pos_to_key(prefix_pos)
78
for pos in range(count):
79
# TODO: This creates odd keys. When count == 100,000, it
80
# creates a 240 byte key
81
key = prefix + _pos_to_key(pos)
82
value = b"value:%d" % pos
84
# generate some references
86
for list_pos in range(reference_lists):
87
# as many keys in each list as its index + the key depth
88
# mod 2 - this generates both 0 length lists and
89
# ones slightly longer than the number of lists.
90
# It also ensures we have non homogeneous lists.
92
for ref_pos in range(list_pos + pos % 2):
94
# refer to a nearby key
95
refs[-1].append(prefix + _pos_to_key(pos - 1, b"ref"))
97
# serial of this ref in the ref list
98
refs[-1].append(prefix + _pos_to_key(ref_pos, b"ref"))
99
refs[-1] = tuple(refs[-1])
103
keys.append((key, value, refs))
106
def shrink_page_size(self):
107
"""Shrink the default page size so that less fits in a page."""
108
self.overrideAttr(btree_index, '_PAGE_SIZE')
109
btree_index._PAGE_SIZE = 2048
111
def assertEqualApproxCompressed(self, expected, actual, slop=6):
112
"""Check a count of compressed bytes is approximately as expected
114
Relying on compressed length being stable even with fixed inputs is
115
slightly bogus, but zlib is stable enough that this mostly works.
117
if not expected - slop < actual < expected + slop:
118
self.fail("Expected around %d bytes compressed but got %d" %
122
class TestBTreeBuilder(BTreeTestCase):
124
def test_clear_cache(self):
125
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
126
# This is a no-op, but we need the api to be consistent with other
127
# BTreeGraphIndex apis.
128
builder.clear_cache()
130
def test_empty_1_0(self):
131
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
132
# NamedTemporaryFile dies on builder.finish().read(). weird.
133
temp_file = builder.finish()
134
content = temp_file.read()
137
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
141
def test_empty_2_1(self):
142
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
143
# NamedTemporaryFile dies on builder.finish().read(). weird.
144
temp_file = builder.finish()
145
content = temp_file.read()
148
b"B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
152
def test_root_leaf_1_0(self):
153
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
154
nodes = self.make_nodes(5, 1, 0)
156
builder.add_node(*node)
157
# NamedTemporaryFile dies on builder.finish().read(). weird.
158
temp_file = builder.finish()
159
content = temp_file.read()
161
self.assertEqual(131, len(content))
163
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
166
node_content = content[73:]
167
node_bytes = zlib.decompress(node_content)
168
expected_node = (b"type=leaf\n"
169
b"0000000000000000000000000000000000000000\x00\x00value:0\n"
170
b"1111111111111111111111111111111111111111\x00\x00value:1\n"
171
b"2222222222222222222222222222222222222222\x00\x00value:2\n"
172
b"3333333333333333333333333333333333333333\x00\x00value:3\n"
173
b"4444444444444444444444444444444444444444\x00\x00value:4\n")
174
self.assertEqual(expected_node, node_bytes)
176
def test_root_leaf_2_2(self):
177
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
178
nodes = self.make_nodes(5, 2, 2)
180
builder.add_node(*node)
181
# NamedTemporaryFile dies on builder.finish().read(). weird.
182
temp_file = builder.finish()
183
content = temp_file.read()
185
self.assertEqual(238, len(content))
187
b"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
190
node_content = content[74:]
191
node_bytes = zlib.decompress(node_content)
194
b"0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
195
b"0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
196
b"0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
197
b"0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
198
b"0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
199
b"1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
200
b"1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
201
b"1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
202
b"1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
203
b"1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
206
self.assertEqual(expected_node, node_bytes)
208
def test_2_leaves_1_0(self):
209
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
210
nodes = self.make_nodes(400, 1, 0)
212
builder.add_node(*node)
213
# NamedTemporaryFile dies on builder.finish().read(). weird.
214
temp_file = builder.finish()
215
content = temp_file.read()
217
self.assertEqualApproxCompressed(9283, len(content))
219
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
220
b"row_lengths=1,2\n",
222
root = content[77:4096]
223
leaf1 = content[4096:8192]
224
leaf2 = content[8192:]
225
root_bytes = zlib.decompress(root)
229
) + (b"307" * 40) + b"\n"
230
self.assertEqual(expected_root, root_bytes)
231
# We already know serialisation works for leaves, check key selection:
232
leaf1_bytes = zlib.decompress(leaf1)
233
sorted_node_keys = sorted(node[0] for node in nodes)
234
node = btree_index._LeafNode(leaf1_bytes, 1, 0)
235
self.assertEqual(231, len(node))
236
self.assertEqual(sorted_node_keys[:231], node.all_keys())
237
leaf2_bytes = zlib.decompress(leaf2)
238
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
239
self.assertEqual(400 - 231, len(node))
240
self.assertEqual(sorted_node_keys[231:], node.all_keys())
242
def test_last_page_rounded_1_layer(self):
243
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
244
nodes = self.make_nodes(10, 1, 0)
246
builder.add_node(*node)
247
# NamedTemporaryFile dies on builder.finish().read(). weird.
248
temp_file = builder.finish()
249
content = temp_file.read()
251
self.assertEqualApproxCompressed(155, len(content))
253
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
256
# Check thelast page is well formed
258
leaf2_bytes = zlib.decompress(leaf2)
259
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
260
self.assertEqual(10, len(node))
261
sorted_node_keys = sorted(node[0] for node in nodes)
262
self.assertEqual(sorted_node_keys, node.all_keys())
264
def test_last_page_not_rounded_2_layer(self):
265
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
266
nodes = self.make_nodes(400, 1, 0)
268
builder.add_node(*node)
269
# NamedTemporaryFile dies on builder.finish().read(). weird.
270
temp_file = builder.finish()
271
content = temp_file.read()
273
self.assertEqualApproxCompressed(9283, len(content))
275
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
276
b"row_lengths=1,2\n",
278
# Check the last page is well formed
279
leaf2 = content[8192:]
280
leaf2_bytes = zlib.decompress(leaf2)
281
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
282
self.assertEqual(400 - 231, len(node))
283
sorted_node_keys = sorted(node[0] for node in nodes)
284
self.assertEqual(sorted_node_keys[231:], node.all_keys())
286
def test_three_level_tree_details(self):
287
# The left most pointer in the second internal node in a row should
288
# pointer to the second node that the internal node is for, _not_
289
# the first, otherwise the first node overlaps with the last node of
290
# the prior internal node on that row.
291
self.shrink_page_size()
292
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
293
# 40K nodes is enough to create a two internal nodes on the second
294
# level, with a 2K page size
295
nodes = self.make_nodes(20000, 2, 2)
298
builder.add_node(*node)
299
t = transport.get_transport_from_url('trace+' + self.get_url(''))
300
size = t.put_file('index', self.time(builder.finish))
302
index = btree_index.BTreeGraphIndex(t, 'index', size)
303
# Seed the metadata, we're using internal calls now.
305
self.assertEqual(3, len(index._row_lengths),
306
"Not enough rows: %r" % index._row_lengths)
307
self.assertEqual(4, len(index._row_offsets))
308
self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
309
internal_nodes = index._get_internal_nodes([0, 1, 2])
310
root_node = internal_nodes[0]
311
internal_node1 = internal_nodes[1]
312
internal_node2 = internal_nodes[2]
313
# The left most node node2 points at should be one after the right most
314
# node pointed at by node1.
315
self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
316
# The left most key of the second node pointed at by internal_node2
317
# should be its first key. We can check this by looking for its first key
318
# in the second node it points at
319
pos = index._row_offsets[2] + internal_node2.offset + 1
320
leaf = index._get_leaf_nodes([pos])[pos]
321
self.assertTrue(internal_node2.keys[0] in leaf)
323
def test_2_leaves_2_2(self):
324
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
325
nodes = self.make_nodes(100, 2, 2)
327
builder.add_node(*node)
328
# NamedTemporaryFile dies on builder.finish().read(). weird.
329
temp_file = builder.finish()
330
content = temp_file.read()
332
self.assertEqualApproxCompressed(12643, len(content))
334
b"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
335
b"row_lengths=1,3\n",
337
root = content[77:4096]
338
leaf1 = content[4096:8192]
339
leaf2 = content[8192:12288]
340
leaf3 = content[12288:]
341
root_bytes = zlib.decompress(root)
345
+ (b"0" * 40) + b"\x00" + (b"91" * 40) + b"\n"
346
+ (b"1" * 40) + b"\x00" + (b"81" * 40) + b"\n"
348
self.assertEqual(expected_root, root_bytes)
349
# We assume the other leaf nodes have been written correctly - layering
352
def test_spill_index_stress_1_1(self):
353
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
354
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
355
builder.add_node(*nodes[0])
356
# Test the parts of the index that take up memory are doing so
358
self.assertEqual(1, len(builder._nodes))
359
self.assertIs(None, builder._nodes_by_key)
360
builder.add_node(*nodes[1])
361
self.assertEqual(0, len(builder._nodes))
362
self.assertIs(None, builder._nodes_by_key)
363
self.assertEqual(1, len(builder._backing_indices))
364
self.assertEqual(2, builder._backing_indices[0].key_count())
366
builder.add_node(*nodes[2])
367
self.assertEqual(1, len(builder._nodes))
368
self.assertIs(None, builder._nodes_by_key)
369
# And spills to a second backing index combing all
370
builder.add_node(*nodes[3])
371
self.assertEqual(0, len(builder._nodes))
372
self.assertIs(None, builder._nodes_by_key)
373
self.assertEqual(2, len(builder._backing_indices))
374
self.assertEqual(None, builder._backing_indices[0])
375
self.assertEqual(4, builder._backing_indices[1].key_count())
376
# The next spills to the 2-len slot
377
builder.add_node(*nodes[4])
378
builder.add_node(*nodes[5])
379
self.assertEqual(0, len(builder._nodes))
380
self.assertIs(None, builder._nodes_by_key)
381
self.assertEqual(2, len(builder._backing_indices))
382
self.assertEqual(2, builder._backing_indices[0].key_count())
383
self.assertEqual(4, builder._backing_indices[1].key_count())
384
# Next spill combines
385
builder.add_node(*nodes[6])
386
builder.add_node(*nodes[7])
387
self.assertEqual(3, len(builder._backing_indices))
388
self.assertEqual(None, builder._backing_indices[0])
389
self.assertEqual(None, builder._backing_indices[1])
390
self.assertEqual(8, builder._backing_indices[2].key_count())
391
# And so forth - counting up in binary.
392
builder.add_node(*nodes[8])
393
builder.add_node(*nodes[9])
394
self.assertEqual(3, len(builder._backing_indices))
395
self.assertEqual(2, builder._backing_indices[0].key_count())
396
self.assertEqual(None, builder._backing_indices[1])
397
self.assertEqual(8, builder._backing_indices[2].key_count())
398
builder.add_node(*nodes[10])
399
builder.add_node(*nodes[11])
400
self.assertEqual(3, len(builder._backing_indices))
401
self.assertEqual(None, builder._backing_indices[0])
402
self.assertEqual(4, builder._backing_indices[1].key_count())
403
self.assertEqual(8, builder._backing_indices[2].key_count())
404
builder.add_node(*nodes[12])
405
# Test that memory and disk are both used for query methods; and that
406
# None is skipped over happily.
407
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
408
list(builder.iter_all_entries()))
409
# Two nodes - one memory one disk
410
self.assertEqual({(builder,) + node for node in nodes[11:13]},
411
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
412
self.assertEqual(13, builder.key_count())
413
self.assertEqual({(builder,) + node for node in nodes[11:13]},
414
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
415
builder.add_node(*nodes[13])
416
self.assertEqual(3, len(builder._backing_indices))
417
self.assertEqual(2, builder._backing_indices[0].key_count())
418
self.assertEqual(4, builder._backing_indices[1].key_count())
419
self.assertEqual(8, builder._backing_indices[2].key_count())
420
builder.add_node(*nodes[14])
421
builder.add_node(*nodes[15])
422
self.assertEqual(4, len(builder._backing_indices))
423
self.assertEqual(None, builder._backing_indices[0])
424
self.assertEqual(None, builder._backing_indices[1])
425
self.assertEqual(None, builder._backing_indices[2])
426
self.assertEqual(16, builder._backing_indices[3].key_count())
427
# Now finish, and check we got a correctly ordered tree
428
t = self.get_transport('')
429
size = t.put_file('index', builder.finish())
430
index = btree_index.BTreeGraphIndex(t, 'index', size)
431
nodes = list(index.iter_all_entries())
432
self.assertEqual(sorted(nodes), nodes)
433
self.assertEqual(16, len(nodes))
435
def test_spill_index_stress_1_1_no_combine(self):
436
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
437
builder.set_optimize(for_size=False, combine_backing_indices=False)
438
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
439
builder.add_node(*nodes[0])
440
# Test the parts of the index that take up memory are doing so
442
self.assertEqual(1, len(builder._nodes))
443
self.assertIs(None, builder._nodes_by_key)
444
builder.add_node(*nodes[1])
445
self.assertEqual(0, len(builder._nodes))
446
self.assertIs(None, builder._nodes_by_key)
447
self.assertEqual(1, len(builder._backing_indices))
448
self.assertEqual(2, builder._backing_indices[0].key_count())
450
builder.add_node(*nodes[2])
451
self.assertEqual(1, len(builder._nodes))
452
self.assertIs(None, builder._nodes_by_key)
453
# And spills to a second backing index but doesn't combine
454
builder.add_node(*nodes[3])
455
self.assertEqual(0, len(builder._nodes))
456
self.assertIs(None, builder._nodes_by_key)
457
self.assertEqual(2, len(builder._backing_indices))
458
for backing_index in builder._backing_indices:
459
self.assertEqual(2, backing_index.key_count())
460
# The next spills to the 3rd slot
461
builder.add_node(*nodes[4])
462
builder.add_node(*nodes[5])
463
self.assertEqual(0, len(builder._nodes))
464
self.assertIs(None, builder._nodes_by_key)
465
self.assertEqual(3, len(builder._backing_indices))
466
for backing_index in builder._backing_indices:
467
self.assertEqual(2, backing_index.key_count())
468
# Now spill a few more, and check that we don't combine
469
builder.add_node(*nodes[6])
470
builder.add_node(*nodes[7])
471
builder.add_node(*nodes[8])
472
builder.add_node(*nodes[9])
473
builder.add_node(*nodes[10])
474
builder.add_node(*nodes[11])
475
builder.add_node(*nodes[12])
476
self.assertEqual(6, len(builder._backing_indices))
477
for backing_index in builder._backing_indices:
478
self.assertEqual(2, backing_index.key_count())
479
# Test that memory and disk are both used for query methods; and that
480
# None is skipped over happily.
481
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
482
list(builder.iter_all_entries()))
483
# Two nodes - one memory one disk
484
self.assertEqual({(builder,) + node for node in nodes[11:13]},
485
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
486
self.assertEqual(13, builder.key_count())
487
self.assertEqual({(builder,) + node for node in nodes[11:13]},
488
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
489
builder.add_node(*nodes[13])
490
builder.add_node(*nodes[14])
491
builder.add_node(*nodes[15])
492
self.assertEqual(8, len(builder._backing_indices))
493
for backing_index in builder._backing_indices:
494
self.assertEqual(2, backing_index.key_count())
495
# Now finish, and check we got a correctly ordered tree
496
transport = self.get_transport('')
497
size = transport.put_file('index', builder.finish())
498
index = btree_index.BTreeGraphIndex(transport, 'index', size)
499
nodes = list(index.iter_all_entries())
500
self.assertEqual(sorted(nodes), nodes)
501
self.assertEqual(16, len(nodes))
503
def test_set_optimize(self):
504
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
505
builder.set_optimize(for_size=True)
506
self.assertTrue(builder._optimize_for_size)
507
builder.set_optimize(for_size=False)
508
self.assertFalse(builder._optimize_for_size)
509
# test that we can set combine_backing_indices without effecting
512
builder._optimize_for_size = obj
513
builder.set_optimize(combine_backing_indices=False)
514
self.assertFalse(builder._combine_backing_indices)
515
self.assertIs(obj, builder._optimize_for_size)
516
builder.set_optimize(combine_backing_indices=True)
517
self.assertTrue(builder._combine_backing_indices)
518
self.assertIs(obj, builder._optimize_for_size)
520
def test_spill_index_stress_2_2(self):
521
# test that references and longer keys don't confuse things.
522
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
524
nodes = self.make_nodes(16, 2, 2)
525
builder.add_node(*nodes[0])
526
# Test the parts of the index that take up memory are doing so
528
self.assertEqual(1, len(builder._nodes))
529
self.assertIs(None, builder._nodes_by_key)
530
builder.add_node(*nodes[1])
531
self.assertEqual(0, len(builder._nodes))
532
self.assertIs(None, builder._nodes_by_key)
533
self.assertEqual(1, len(builder._backing_indices))
534
self.assertEqual(2, builder._backing_indices[0].key_count())
536
old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
537
builder.add_node(*nodes[2])
538
self.assertEqual(1, len(builder._nodes))
539
self.assertIsNot(None, builder._nodes_by_key)
540
self.assertNotEqual({}, builder._nodes_by_key)
541
# We should have a new entry
542
self.assertNotEqual(old, builder._nodes_by_key)
543
# And spills to a second backing index combing all
544
builder.add_node(*nodes[3])
545
self.assertEqual(0, len(builder._nodes))
546
self.assertIs(None, builder._nodes_by_key)
547
self.assertEqual(2, len(builder._backing_indices))
548
self.assertEqual(None, builder._backing_indices[0])
549
self.assertEqual(4, builder._backing_indices[1].key_count())
550
# The next spills to the 2-len slot
551
builder.add_node(*nodes[4])
552
builder.add_node(*nodes[5])
553
self.assertEqual(0, len(builder._nodes))
554
self.assertIs(None, builder._nodes_by_key)
555
self.assertEqual(2, len(builder._backing_indices))
556
self.assertEqual(2, builder._backing_indices[0].key_count())
557
self.assertEqual(4, builder._backing_indices[1].key_count())
558
# Next spill combines
559
builder.add_node(*nodes[6])
560
builder.add_node(*nodes[7])
561
self.assertEqual(3, len(builder._backing_indices))
562
self.assertEqual(None, builder._backing_indices[0])
563
self.assertEqual(None, builder._backing_indices[1])
564
self.assertEqual(8, builder._backing_indices[2].key_count())
565
# And so forth - counting up in binary.
566
builder.add_node(*nodes[8])
567
builder.add_node(*nodes[9])
568
self.assertEqual(3, len(builder._backing_indices))
569
self.assertEqual(2, builder._backing_indices[0].key_count())
570
self.assertEqual(None, builder._backing_indices[1])
571
self.assertEqual(8, builder._backing_indices[2].key_count())
572
builder.add_node(*nodes[10])
573
builder.add_node(*nodes[11])
574
self.assertEqual(3, len(builder._backing_indices))
575
self.assertEqual(None, builder._backing_indices[0])
576
self.assertEqual(4, builder._backing_indices[1].key_count())
577
self.assertEqual(8, builder._backing_indices[2].key_count())
578
builder.add_node(*nodes[12])
579
# Test that memory and disk are both used for query methods; and that
580
# None is skipped over happily.
581
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
582
list(builder.iter_all_entries()))
583
# Two nodes - one memory one disk
584
self.assertEqual({(builder,) + node for node in nodes[11:13]},
585
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
586
self.assertEqual(13, builder.key_count())
587
self.assertEqual({(builder,) + node for node in nodes[11:13]},
588
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
589
builder.add_node(*nodes[13])
590
self.assertEqual(3, len(builder._backing_indices))
591
self.assertEqual(2, builder._backing_indices[0].key_count())
592
self.assertEqual(4, builder._backing_indices[1].key_count())
593
self.assertEqual(8, builder._backing_indices[2].key_count())
594
builder.add_node(*nodes[14])
595
builder.add_node(*nodes[15])
596
self.assertEqual(4, len(builder._backing_indices))
597
self.assertEqual(None, builder._backing_indices[0])
598
self.assertEqual(None, builder._backing_indices[1])
599
self.assertEqual(None, builder._backing_indices[2])
600
self.assertEqual(16, builder._backing_indices[3].key_count())
601
# Now finish, and check we got a correctly ordered tree
602
transport = self.get_transport('')
603
size = transport.put_file('index', builder.finish())
604
index = btree_index.BTreeGraphIndex(transport, 'index', size)
605
nodes = list(index.iter_all_entries())
606
self.assertEqual(sorted(nodes), nodes)
607
self.assertEqual(16, len(nodes))
609
def test_spill_index_duplicate_key_caught_on_finish(self):
610
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
611
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
612
builder.add_node(*nodes[0])
613
builder.add_node(*nodes[1])
614
builder.add_node(*nodes[0])
615
self.assertRaises(_mod_index.BadIndexDuplicateKey, builder.finish)
618
class TestBTreeIndex(BTreeTestCase):
620
def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
621
builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
622
key_elements=key_elements)
623
for key, value, references in nodes:
624
builder.add_node(key, value, references)
625
stream = builder.finish()
626
trans = transport.get_transport_from_url('trace+' + self.get_url())
627
size = trans.put_file('index', stream)
628
return btree_index.BTreeGraphIndex(trans, 'index', size)
630
def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
632
builder = btree_index.BTreeBuilder(key_elements=key_elements,
633
reference_lists=ref_lists)
634
builder.add_nodes(nodes)
635
transport = self.get_transport('')
636
# NamedTemporaryFile dies on builder.finish().read(). weird.
637
temp_file = builder.finish()
638
content = temp_file.read()
641
transport.put_bytes('index', (b' '*offset)+content)
642
return btree_index.BTreeGraphIndex(transport, 'index', size=size,
645
def test_clear_cache(self):
646
nodes = self.make_nodes(160, 2, 2)
647
index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
648
self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
649
self.assertEqual([1, 4], index._row_lengths)
650
self.assertIsNot(None, index._root_node)
651
internal_node_pre_clear = set(index._internal_node_cache)
652
self.assertTrue(len(index._leaf_node_cache) > 0)
654
# We don't touch _root_node or _internal_node_cache, both should be
655
# small, and can save a round trip or two
656
self.assertIsNot(None, index._root_node)
657
# NOTE: We don't want to affect the _internal_node_cache, as we expect
658
# it will be small, and if we ever do touch this index again, it
659
# will save round-trips. This assertion isn't very strong,
660
# becuase without a 3-level index, we don't have any internal
662
self.assertEqual(internal_node_pre_clear,
663
set(index._internal_node_cache))
664
self.assertEqual(0, len(index._leaf_node_cache))
666
def test_trivial_constructor(self):
667
t = transport.get_transport_from_url('trace+' + self.get_url(''))
668
index = btree_index.BTreeGraphIndex(t, 'index', None)
669
# Checks the page size at load, but that isn't logged yet.
670
self.assertEqual([], t._activity)
672
def test_with_size_constructor(self):
673
t = transport.get_transport_from_url('trace+' + self.get_url(''))
674
index = btree_index.BTreeGraphIndex(t, 'index', 1)
675
# Checks the page size at load, but that isn't logged yet.
676
self.assertEqual([], t._activity)
678
def test_empty_key_count_no_size(self):
679
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
680
t = transport.get_transport_from_url('trace+' + self.get_url(''))
681
t.put_file('index', builder.finish())
682
index = btree_index.BTreeGraphIndex(t, 'index', None)
684
self.assertEqual([], t._activity)
685
self.assertEqual(0, index.key_count())
686
# The entire index should have been requested (as we generally have the
687
# size available, and doing many small readvs is inappropriate).
688
# We can't tell how much was actually read here, but - check the code.
689
self.assertEqual([('get', 'index')], t._activity)
691
def test_empty_key_count(self):
692
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
693
t = transport.get_transport_from_url('trace+' + self.get_url(''))
694
size = t.put_file('index', builder.finish())
695
self.assertEqual(72, size)
696
index = btree_index.BTreeGraphIndex(t, 'index', size)
698
self.assertEqual([], t._activity)
699
self.assertEqual(0, index.key_count())
700
# The entire index should have been read, as 4K > size
701
self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
704
def test_non_empty_key_count_2_2(self):
705
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
706
nodes = self.make_nodes(35, 2, 2)
708
builder.add_node(*node)
709
t = transport.get_transport_from_url('trace+' + self.get_url(''))
710
size = t.put_file('index', builder.finish())
711
index = btree_index.BTreeGraphIndex(t, 'index', size)
713
self.assertEqual([], t._activity)
714
self.assertEqual(70, index.key_count())
715
# The entire index should have been read, as it is one page long.
716
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
718
self.assertEqualApproxCompressed(1173, size)
720
def test_with_offset_no_size(self):
721
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
723
nodes=self.make_nodes(200, 1, 1))
724
index._size = None # throw away the size info
725
self.assertEqual(200, index.key_count())
727
def test_with_small_offset(self):
728
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
730
nodes=self.make_nodes(200, 1, 1))
731
self.assertEqual(200, index.key_count())
733
def test_with_large_offset(self):
734
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
736
nodes=self.make_nodes(200, 1, 1))
737
self.assertEqual(200, index.key_count())
739
def test__read_nodes_no_size_one_page_reads_once(self):
740
self.make_index(nodes=[((b'key',), b'value', ())])
741
trans = transport.get_transport_from_url('trace+' + self.get_url())
742
index = btree_index.BTreeGraphIndex(trans, 'index', None)
743
del trans._activity[:]
744
nodes = dict(index._read_nodes([0]))
745
self.assertEqual({0}, set(nodes))
747
self.assertEqual([(b'key',)], node.all_keys())
748
self.assertEqual([('get', 'index')], trans._activity)
750
def test__read_nodes_no_size_multiple_pages(self):
751
index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
753
num_pages = index._row_offsets[-1]
754
# Reopen with a traced transport and no size
755
trans = transport.get_transport_from_url('trace+' + self.get_url())
756
index = btree_index.BTreeGraphIndex(trans, 'index', None)
757
del trans._activity[:]
758
nodes = dict(index._read_nodes([0]))
759
self.assertEqual(list(range(num_pages)), sorted(nodes))
761
def test_2_levels_key_count_2_2(self):
762
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
763
nodes = self.make_nodes(160, 2, 2)
765
builder.add_node(*node)
766
t = transport.get_transport_from_url('trace+' + self.get_url(''))
767
size = t.put_file('index', builder.finish())
768
self.assertEqualApproxCompressed(17692, size)
769
index = btree_index.BTreeGraphIndex(t, 'index', size)
771
self.assertEqual([], t._activity)
772
self.assertEqual(320, index.key_count())
773
# The entire index should not have been read.
774
self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
777
def test_validate_one_page(self):
778
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
779
nodes = self.make_nodes(45, 2, 2)
781
builder.add_node(*node)
782
t = transport.get_transport_from_url('trace+' + self.get_url(''))
783
size = t.put_file('index', builder.finish())
784
index = btree_index.BTreeGraphIndex(t, 'index', size)
786
self.assertEqual([], t._activity)
788
# The entire index should have been read linearly.
789
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
791
self.assertEqualApproxCompressed(1488, size)
793
def test_validate_two_pages(self):
794
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
795
nodes = self.make_nodes(80, 2, 2)
797
builder.add_node(*node)
798
t = transport.get_transport_from_url('trace+' + self.get_url(''))
799
size = t.put_file('index', builder.finish())
800
# Root page, 2 leaf pages
801
self.assertEqualApproxCompressed(9339, size)
802
index = btree_index.BTreeGraphIndex(t, 'index', size)
804
self.assertEqual([], t._activity)
806
rem = size - 8192 # Number of remaining bytes after second block
807
# The entire index should have been read linearly.
809
[('readv', 'index', [(0, 4096)], False, None),
810
('readv', 'index', [(4096, 4096), (8192, rem)], False, None)],
812
# XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
813
# node and make validate find them.
815
def test_eq_ne(self):
816
# two indices are equal when constructed with the same parameters:
817
t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
818
t2 = self.get_transport()
820
btree_index.BTreeGraphIndex(t1, 'index', None) ==
821
btree_index.BTreeGraphIndex(t1, 'index', None))
823
btree_index.BTreeGraphIndex(t1, 'index', 20) ==
824
btree_index.BTreeGraphIndex(t1, 'index', 20))
826
btree_index.BTreeGraphIndex(t1, 'index', 20) ==
827
btree_index.BTreeGraphIndex(t2, 'index', 20))
829
btree_index.BTreeGraphIndex(t1, 'inde1', 20) ==
830
btree_index.BTreeGraphIndex(t1, 'inde2', 20))
832
btree_index.BTreeGraphIndex(t1, 'index', 10) ==
833
btree_index.BTreeGraphIndex(t1, 'index', 20))
835
btree_index.BTreeGraphIndex(t1, 'index', None) !=
836
btree_index.BTreeGraphIndex(t1, 'index', None))
838
btree_index.BTreeGraphIndex(t1, 'index', 20) !=
839
btree_index.BTreeGraphIndex(t1, 'index', 20))
841
btree_index.BTreeGraphIndex(t1, 'index', 20) !=
842
btree_index.BTreeGraphIndex(t2, 'index', 20))
844
btree_index.BTreeGraphIndex(t1, 'inde1', 20) !=
845
btree_index.BTreeGraphIndex(t1, 'inde2', 20))
847
btree_index.BTreeGraphIndex(t1, 'index', 10) !=
848
btree_index.BTreeGraphIndex(t1, 'index', 20))
850
def test_key_too_big(self):
851
# the size that matters here is the _compressed_ size of the key, so we can't
852
# do a simple character repeat.
853
bigKey = b''.join(b'%d' % n for n in range(btree_index._PAGE_SIZE))
854
self.assertRaises(_mod_index.BadIndexKey,
856
nodes=[((bigKey,), b'value', ())])
858
def test_iter_all_only_root_no_size(self):
859
self.make_index(nodes=[((b'key',), b'value', ())])
860
t = transport.get_transport_from_url('trace+' + self.get_url(''))
861
index = btree_index.BTreeGraphIndex(t, 'index', None)
863
self.assertEqual([((b'key',), b'value')],
864
[x[1:] for x in index.iter_all_entries()])
865
self.assertEqual([('get', 'index')], t._activity)
867
def test_iter_all_entries_reads(self):
868
# iterating all entries reads the header, then does a linear
870
self.shrink_page_size()
871
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
872
# 20k nodes is enough to create a two internal nodes on the second
873
# level, with a 2K page size
874
nodes = self.make_nodes(10000, 2, 2)
876
builder.add_node(*node)
877
t = transport.get_transport_from_url('trace+' + self.get_url(''))
878
size = t.put_file('index', builder.finish())
879
page_size = btree_index._PAGE_SIZE
881
index = btree_index.BTreeGraphIndex(t, 'index', size)
883
self.assertEqual([], t._activity)
884
found_nodes = self.time(list, index.iter_all_entries())
886
for node in found_nodes:
887
self.assertTrue(node[0] is index)
888
bare_nodes.append(node[1:])
889
self.assertEqual(3, len(index._row_lengths),
890
"Not enough rows: %r" % index._row_lengths)
891
# Should be as long as the nodes we supplied
892
self.assertEqual(20000, len(found_nodes))
893
# Should have the same content
894
self.assertEqual(set(nodes), set(bare_nodes))
895
# Should have done linear scan IO up the index, ignoring
896
# the internal nodes:
897
# The entire index should have been read
898
total_pages = sum(index._row_lengths)
899
self.assertEqual(total_pages, index._row_offsets[-1])
900
self.assertEqualApproxCompressed(1303220, size)
901
# The start of the leaves
902
first_byte = index._row_offsets[-2] * page_size
904
for offset in range(first_byte, size, page_size):
905
readv_request.append((offset, page_size))
906
# The last page is truncated
907
readv_request[-1] = (readv_request[-1][0], size % page_size)
908
expected = [('readv', 'index', [(0, page_size)], False, None),
909
('readv', 'index', readv_request, False, None)]
910
if expected != t._activity:
911
self.assertEqualDiff(pprint.pformat(expected),
912
pprint.pformat(t._activity))
914
def test_iter_entries_references_2_refs_resolved(self):
915
# iterating some entries reads just the pages needed. For now, to
916
# get it working and start measuring, only 4K pages are read.
917
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
918
# 80 nodes is enough to create a two-level index.
919
nodes = self.make_nodes(160, 2, 2)
921
builder.add_node(*node)
922
t = transport.get_transport_from_url('trace+' + self.get_url(''))
923
size = t.put_file('index', builder.finish())
925
index = btree_index.BTreeGraphIndex(t, 'index', size)
927
self.assertEqual([], t._activity)
929
found_nodes = list(index.iter_entries([nodes[30][0]]))
931
for node in found_nodes:
932
self.assertTrue(node[0] is index)
933
bare_nodes.append(node[1:])
934
# Should be as long as the nodes we supplied
935
self.assertEqual(1, len(found_nodes))
936
# Should have the same content
937
self.assertEqual(nodes[30], bare_nodes[0])
938
# Should have read the root node, then one leaf page:
939
self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
940
('readv', 'index', [(8192, 4096), ], False, None)],
943
def test_iter_key_prefix_1_element_key_None(self):
944
index = self.make_index()
945
self.assertRaises(_mod_index.BadIndexKey, list,
946
index.iter_entries_prefix([(None, )]))
948
def test_iter_key_prefix_wrong_length(self):
949
index = self.make_index()
950
self.assertRaises(_mod_index.BadIndexKey, list,
951
index.iter_entries_prefix([(b'foo', None)]))
952
index = self.make_index(key_elements=2)
953
self.assertRaises(_mod_index.BadIndexKey, list,
954
index.iter_entries_prefix([(b'foo', )]))
955
self.assertRaises(_mod_index.BadIndexKey, list,
956
index.iter_entries_prefix([(b'foo', None, None)]))
958
def test_iter_key_prefix_1_key_element_no_refs(self):
959
index = self.make_index(nodes=[
960
((b'name', ), b'data', ()),
961
((b'ref', ), b'refdata', ())])
962
self.assertEqual({(index, (b'name', ), b'data'),
963
(index, (b'ref', ), b'refdata')},
964
set(index.iter_entries_prefix([(b'name', ), (b'ref', )])))
966
def test_iter_key_prefix_1_key_element_refs(self):
967
index = self.make_index(1, nodes=[
968
((b'name', ), b'data', ([(b'ref', )], )),
969
((b'ref', ), b'refdata', ([], ))])
970
self.assertEqual({(index, (b'name', ), b'data', (((b'ref',),),)),
971
(index, (b'ref', ), b'refdata', ((), ))},
972
set(index.iter_entries_prefix([(b'name', ), (b'ref', )])))
974
def test_iter_key_prefix_2_key_element_no_refs(self):
975
index = self.make_index(key_elements=2, nodes=[
976
((b'name', b'fin1'), b'data', ()),
977
((b'name', b'fin2'), b'beta', ()),
978
((b'ref', b'erence'), b'refdata', ())])
979
self.assertEqual({(index, (b'name', b'fin1'), b'data'),
980
(index, (b'ref', b'erence'), b'refdata')},
981
set(index.iter_entries_prefix(
982
[(b'name', b'fin1'), (b'ref', b'erence')])))
983
self.assertEqual({(index, (b'name', b'fin1'), b'data'),
984
(index, (b'name', b'fin2'), b'beta')},
985
set(index.iter_entries_prefix([(b'name', None)])))
987
def test_iter_key_prefix_2_key_element_refs(self):
988
index = self.make_index(1, key_elements=2, nodes=[
989
((b'name', b'fin1'), b'data', ([(b'ref', b'erence')], )),
990
((b'name', b'fin2'), b'beta', ([], )),
991
((b'ref', b'erence'), b'refdata', ([], ))])
993
{(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)),
994
(index, (b'ref', b'erence'), b'refdata', ((), ))},
995
set(index.iter_entries_prefix(
996
[(b'name', b'fin1'), (b'ref', b'erence')])))
998
{(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)),
999
(index, (b'name', b'fin2'), b'beta', ((), ))},
1000
set(index.iter_entries_prefix([(b'name', None)])))
1002
# XXX: external_references tests are duplicated in test_index. We
1003
# probably should have per_graph_index tests...
1004
def test_external_references_no_refs(self):
1005
index = self.make_index(ref_lists=0, nodes=[])
1006
self.assertRaises(ValueError, index.external_references, 0)
1008
def test_external_references_no_results(self):
1009
index = self.make_index(ref_lists=1, nodes=[
1010
((b'key',), b'value', ([],))])
1011
self.assertEqual(set(), index.external_references(0))
1013
def test_external_references_missing_ref(self):
1014
missing_key = (b'missing',)
1015
index = self.make_index(ref_lists=1, nodes=[
1016
((b'key',), b'value', ([missing_key],))])
1017
self.assertEqual({missing_key}, index.external_references(0))
1019
def test_external_references_multiple_ref_lists(self):
1020
missing_key = (b'missing',)
1021
index = self.make_index(ref_lists=2, nodes=[
1022
((b'key',), b'value', ([], [missing_key]))])
1023
self.assertEqual(set([]), index.external_references(0))
1024
self.assertEqual({missing_key}, index.external_references(1))
1026
def test_external_references_two_records(self):
1027
index = self.make_index(ref_lists=1, nodes=[
1028
((b'key-1',), b'value', ([(b'key-2',)],)),
1029
((b'key-2',), b'value', ([],)),
1031
self.assertEqual(set([]), index.external_references(0))
1033
def test__find_ancestors_one_page(self):
1036
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1037
(key1, b'value', ([key2],)),
1038
(key2, b'value', ([],)),
1041
missing_keys = set()
1042
search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
1043
self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1044
self.assertEqual(set(), missing_keys)
1045
self.assertEqual(set(), search_keys)
1047
def test__find_ancestors_one_page_w_missing(self):
1051
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1052
(key1, b'value', ([key2],)),
1053
(key2, b'value', ([],)),
1056
missing_keys = set()
1057
search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1059
self.assertEqual({key2: ()}, parent_map)
1060
# we know that key3 is missing because we read the page that it would
1062
self.assertEqual({key3}, missing_keys)
1063
self.assertEqual(set(), search_keys)
1065
def test__find_ancestors_one_parent_missing(self):
1069
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1070
(key1, b'value', ([key2],)),
1071
(key2, b'value', ([key3],)),
1074
missing_keys = set()
1075
search_keys = index._find_ancestors([key1], 0, parent_map,
1077
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1078
self.assertEqual(set(), missing_keys)
1079
# all we know is that key3 wasn't present on the page we were reading
1080
# but if you look, the last key is key2 which comes before key3, so we
1081
# don't know whether key3 would land on this page or not.
1082
self.assertEqual({key3}, search_keys)
1083
search_keys = index._find_ancestors(search_keys, 0, parent_map,
1085
# passing it back in, we are sure it is 'missing'
1086
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1087
self.assertEqual({key3}, missing_keys)
1088
self.assertEqual(set([]), search_keys)
1090
def test__find_ancestors_dont_search_known(self):
1094
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1095
(key1, b'value', ([key2],)),
1096
(key2, b'value', ([key3],)),
1097
(key3, b'value', ([],)),
1099
# We already know about key2, so we won't try to search for key3
1100
parent_map = {key2: (key3,)}
1101
missing_keys = set()
1102
search_keys = index._find_ancestors([key1], 0, parent_map,
1104
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1105
self.assertEqual(set(), missing_keys)
1106
self.assertEqual(set(), search_keys)
1108
def test__find_ancestors_multiple_pages(self):
1109
# We need to use enough keys that we actually cause a split
1110
start_time = 1249671539
1111
email = "joebob@example.com"
1115
for i in range(400):
1116
rev_id = ('%s-%s-%s' % (email,
1117
osutils.compact_date(start_time + i),
1118
osutils.rand_chars(16))).encode('ascii')
1120
nodes.append((rev_key, b'value', ref_lists))
1121
# We have a ref 'list' of length 1, with a list of parents, with 1
1122
# parent which is a key
1123
ref_lists = ((rev_key,),)
1124
rev_keys.append(rev_key)
1125
index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1126
self.assertEqual(400, index.key_count())
1127
self.assertEqual(3, len(index._row_offsets))
1128
nodes = dict(index._read_nodes([1, 2]))
1131
min_l2_key = l2.min_key
1132
max_l1_key = l1.max_key
1133
self.assertTrue(max_l1_key < min_l2_key)
1134
parents_min_l2_key = l2[min_l2_key][1][0]
1135
self.assertEqual((l1.max_key,), parents_min_l2_key)
1136
# Now, whatever key we select that would fall on the second page,
1137
# should give us all the parents until the page break
1138
key_idx = rev_keys.index(min_l2_key)
1139
next_key = rev_keys[key_idx+1]
1140
# So now when we get the parent map, we should get the key we are
1141
# looking for, min_l2_key, and then a reference to go look for the
1142
# parent of that key
1144
missing_keys = set()
1145
search_keys = index._find_ancestors([next_key], 0, parent_map,
1147
self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1148
self.assertEqual(set(), missing_keys)
1149
self.assertEqual({max_l1_key}, search_keys)
1151
search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1153
self.assertEqual(l1.all_keys(), sorted(parent_map))
1154
self.assertEqual(set(), missing_keys)
1155
self.assertEqual(set(), search_keys)
1157
def test__find_ancestors_empty_index(self):
1158
index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1160
missing_keys = set()
1161
search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1163
self.assertEqual(set(), search_keys)
1164
self.assertEqual({}, parent_map)
1165
self.assertEqual({('one',), ('two',)}, missing_keys)
1167
def test_supports_unlimited_cache(self):
1168
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
1169
# We need enough nodes to cause a page split (so we have both an
1170
# internal node and a couple leaf nodes. 500 seems to be enough.)
1171
nodes = self.make_nodes(500, 1, 0)
1173
builder.add_node(*node)
1174
stream = builder.finish()
1175
trans = self.get_transport()
1176
size = trans.put_file('index', stream)
1177
index = btree_index.BTreeGraphIndex(trans, 'index', size)
1178
self.assertEqual(500, index.key_count())
1179
# We have an internal node
1180
self.assertEqual(2, len(index._row_lengths))
1181
# We have at least 2 leaf nodes
1182
self.assertTrue(index._row_lengths[-1] >= 2)
1183
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1184
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1185
index._leaf_node_cache._max_cache)
1186
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1187
self.assertEqual(100, index._internal_node_cache._max_cache)
1188
# No change if unlimited_cache=False is passed
1189
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1190
unlimited_cache=False)
1191
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1192
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1193
index._leaf_node_cache._max_cache)
1194
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1195
self.assertEqual(100, index._internal_node_cache._max_cache)
1196
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1197
unlimited_cache=True)
1198
self.assertIsInstance(index._leaf_node_cache, dict)
1199
self.assertIs(type(index._internal_node_cache), dict)
1200
# Exercise the lookup code
1201
entries = set(index.iter_entries([n[0] for n in nodes]))
1202
self.assertEqual(500, len(entries))
1205
class TestBTreeNodes(BTreeTestCase):
1207
scenarios = btreeparser_scenarios()
1210
super(TestBTreeNodes, self).setUp()
1211
self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
1213
def test_LeafNode_1_0(self):
1214
node_bytes = (b"type=leaf\n"
1215
b"0000000000000000000000000000000000000000\x00\x00value:0\n"
1216
b"1111111111111111111111111111111111111111\x00\x00value:1\n"
1217
b"2222222222222222222222222222222222222222\x00\x00value:2\n"
1218
b"3333333333333333333333333333333333333333\x00\x00value:3\n"
1219
b"4444444444444444444444444444444444444444\x00\x00value:4\n")
1220
node = btree_index._LeafNode(node_bytes, 1, 0)
1221
# We do direct access, or don't care about order, to leaf nodes most of
1222
# the time, so a dict is useful:
1224
(b"0000000000000000000000000000000000000000",): (b"value:0", ()),
1225
(b"1111111111111111111111111111111111111111",): (b"value:1", ()),
1226
(b"2222222222222222222222222222222222222222",): (b"value:2", ()),
1227
(b"3333333333333333333333333333333333333333",): (b"value:3", ()),
1228
(b"4444444444444444444444444444444444444444",): (b"value:4", ()),
1229
}, dict(node.all_items()))
1231
def test_LeafNode_2_2(self):
1232
node_bytes = (b"type=leaf\n"
1233
b"00\x0000\x00\t00\x00ref00\x00value:0\n"
1234
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1235
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1236
b"11\x0044\x00\t11\x00ref00\x00value:4\n"
1239
node = btree_index._LeafNode(node_bytes, 2, 2)
1240
# We do direct access, or don't care about order, to leaf nodes most of
1241
# the time, so a dict is useful:
1243
(b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))),
1244
(b'00', b'11'): (b'value:1', (((b'00', b'ref00'),),
1245
((b'00', b'ref00'), (b'01', b'ref01')))),
1246
(b'11', b'33'): (b'value:3', (((b'11', b'ref22'),),
1247
((b'11', b'ref22'), (b'11', b'ref22')))),
1248
(b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),)))
1249
}, dict(node.all_items()))
1251
def test_InternalNode_1(self):
1252
node_bytes = (b"type=internal\n"
1254
b"0000000000000000000000000000000000000000\n"
1255
b"1111111111111111111111111111111111111111\n"
1256
b"2222222222222222222222222222222222222222\n"
1257
b"3333333333333333333333333333333333333333\n"
1258
b"4444444444444444444444444444444444444444\n"
1260
node = btree_index._InternalNode(node_bytes)
1261
# We want to bisect to find the right children from this node, so a
1262
# vector is most useful.
1264
(b"0000000000000000000000000000000000000000",),
1265
(b"1111111111111111111111111111111111111111",),
1266
(b"2222222222222222222222222222222222222222",),
1267
(b"3333333333333333333333333333333333333333",),
1268
(b"4444444444444444444444444444444444444444",),
1270
self.assertEqual(1, node.offset)
1272
def test_LeafNode_2_2(self):
1273
node_bytes = (b"type=leaf\n"
1274
b"00\x0000\x00\t00\x00ref00\x00value:0\n"
1275
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1276
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1277
b"11\x0044\x00\t11\x00ref00\x00value:4\n"
1280
node = btree_index._LeafNode(node_bytes, 2, 2)
1281
# We do direct access, or don't care about order, to leaf nodes most of
1282
# the time, so a dict is useful:
1284
(b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))),
1285
(b'00', b'11'): (b'value:1', (((b'00', b'ref00'),),
1286
((b'00', b'ref00'), (b'01', b'ref01')))),
1287
(b'11', b'33'): (b'value:3', (((b'11', b'ref22'),),
1288
((b'11', b'ref22'), (b'11', b'ref22')))),
1289
(b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),)))
1290
}, dict(node.all_items()))
1292
def assertFlattened(self, expected, key, value, refs):
1293
flat_key, flat_line = self.parse_btree._flatten_node(
1294
(None, key, value, refs), bool(refs))
1295
self.assertEqual(b'\x00'.join(key), flat_key)
1296
self.assertEqual(expected, flat_line)
1298
def test__flatten_node(self):
1299
self.assertFlattened(b'key\0\0value\n', (b'key',), b'value', [])
1300
self.assertFlattened(b'key\0tuple\0\0value str\n',
1301
(b'key', b'tuple'), b'value str', [])
1302
self.assertFlattened(b'key\0tuple\0triple\0\0value str\n',
1303
(b'key', b'tuple', b'triple'), b'value str', [])
1304
self.assertFlattened(b'k\0t\0s\0ref\0value str\n',
1305
(b'k', b't', b's'), b'value str', [[(b'ref',)]])
1306
self.assertFlattened(b'key\0tuple\0ref\0key\0value str\n',
1307
(b'key', b'tuple'), b'value str', [[(b'ref', b'key')]])
1308
self.assertFlattened(b"00\x0000\x00\t00\x00ref00\x00value:0\n",
1309
(b'00', b'00'), b'value:0', ((), ((b'00', b'ref00'),)))
1310
self.assertFlattened(
1311
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1312
(b'00', b'11'), b'value:1',
1313
(((b'00', b'ref00'),), ((b'00', b'ref00'), (b'01', b'ref01'))))
1314
self.assertFlattened(
1315
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1316
(b'11', b'33'), b'value:3',
1317
(((b'11', b'ref22'),), ((b'11', b'ref22'), (b'11', b'ref22'))))
1318
self.assertFlattened(
1319
b"11\x0044\x00\t11\x00ref00\x00value:4\n",
1320
(b'11', b'44'), b'value:4', ((), ((b'11', b'ref00'),)))
1323
class TestCompiledBtree(tests.TestCase):
1325
def test_exists(self):
1326
# This is just to let the user know if they don't have the feature
1328
self.requireFeature(compiled_btreeparser_feature)
1331
class TestMultiBisectRight(tests.TestCase):
1333
def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1334
self.assertEqual(offsets,
1335
btree_index.BTreeGraphIndex._multi_bisect_right(
1336
search_keys, fixed_keys))
1338
def test_after(self):
1339
self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1340
self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1341
['e', 'f', 'g'], ['a', 'b', 'c'])
1343
def test_before(self):
1344
self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1345
self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1346
['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1348
def test_exact(self):
1349
self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1350
self.assertMultiBisectRight([(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1351
self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1352
['a', 'c'], ['a', 'b', 'c'])
1354
def test_inbetween(self):
1355
self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1356
self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1357
['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1359
def test_mixed(self):
1360
self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1362
['a', 'b', 'd', 'e', 'g', 'h'],
1363
['c', 'd', 'f', 'g'])
1366
class TestExpandOffsets(tests.TestCase):
1368
def make_index(self, size, recommended_pages=None):
1369
"""Make an index with a generic size.
1371
This doesn't actually create anything on disk, it just primes a
1372
BTreeGraphIndex with the recommended information.
1374
index = btree_index.BTreeGraphIndex(
1375
transport.get_transport_from_url('memory:///'),
1376
'test-index', size=size)
1377
if recommended_pages is not None:
1378
index._recommended_pages = recommended_pages
1381
def set_cached_offsets(self, index, cached_offsets):
1382
"""Monkeypatch to give a canned answer for _get_offsets_for...()."""
1383
def _get_offsets_to_cached_pages():
1384
cached = set(cached_offsets)
1386
index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
1388
def prepare_index(self, index, node_ref_lists, key_length, key_count,
1389
row_lengths, cached_offsets):
1390
"""Setup the BTreeGraphIndex with some pre-canned information."""
1391
index.node_ref_lists = node_ref_lists
1392
index._key_length = key_length
1393
index._key_count = key_count
1394
index._row_lengths = row_lengths
1395
index._compute_row_offsets()
1396
index._root_node = btree_index._InternalNode(b'internal\noffset=0\n')
1397
self.set_cached_offsets(index, cached_offsets)
1399
def make_100_node_index(self):
1400
index = self.make_index(4096*100, 6)
1401
# Consider we've already made a single request at the middle
1402
self.prepare_index(index, node_ref_lists=0, key_length=1,
1403
key_count=1000, row_lengths=[1, 99],
1404
cached_offsets=[0, 50])
1407
def make_1000_node_index(self):
1408
index = self.make_index(4096*1000, 6)
1409
# Pretend we've already made a single request in the middle
1410
self.prepare_index(index, node_ref_lists=0, key_length=1,
1411
key_count=90000, row_lengths=[1, 9, 990],
1412
cached_offsets=[0, 5, 500])
1415
def assertNumPages(self, expected_pages, index, size):
1417
self.assertEqual(expected_pages, index._compute_total_pages_in_index())
1419
def assertExpandOffsets(self, expected, index, offsets):
1420
self.assertEqual(expected, index._expand_offsets(offsets),
1421
'We did not get the expected value after expanding'
1424
def test_default_recommended_pages(self):
1425
index = self.make_index(None)
1426
# local transport recommends 4096 byte reads, which is 1 page
1427
self.assertEqual(1, index._recommended_pages)
1429
def test__compute_total_pages_in_index(self):
1430
index = self.make_index(None)
1431
self.assertNumPages(1, index, 1024)
1432
self.assertNumPages(1, index, 4095)
1433
self.assertNumPages(1, index, 4096)
1434
self.assertNumPages(2, index, 4097)
1435
self.assertNumPages(2, index, 8192)
1436
self.assertNumPages(76, index, 4096*75 + 10)
1438
def test__find_layer_start_and_stop(self):
1439
index = self.make_1000_node_index()
1440
self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1441
self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1442
self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1443
self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1444
self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1445
self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1447
def test_unknown_size(self):
1448
# We should not expand if we don't know the file size
1449
index = self.make_index(None, 10)
1450
self.assertExpandOffsets([0], index, [0])
1451
self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
1453
def test_more_than_recommended(self):
1454
index = self.make_index(4096*100, 2)
1455
self.assertExpandOffsets([1, 10], index, [1, 10])
1456
self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
1458
def test_read_all_from_root(self):
1459
index = self.make_index(4096*10, 20)
1460
self.assertExpandOffsets(list(range(10)), index, [0])
1462
def test_read_all_when_cached(self):
1463
# We've read enough that we can grab all the rest in a single request
1464
index = self.make_index(4096*10, 5)
1465
self.prepare_index(index, node_ref_lists=0, key_length=1,
1466
key_count=1000, row_lengths=[1, 9],
1467
cached_offsets=[0, 1, 2, 5, 6])
1468
# It should fill the remaining nodes, regardless of the one requested
1469
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1470
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1471
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
1473
def test_no_root_node(self):
1474
index = self.make_index(4096*10, 5)
1475
self.assertExpandOffsets([0], index, [0])
1477
def test_include_neighbors(self):
1478
index = self.make_100_node_index()
1479
# We expand in both directions, until we have at least 'recommended'
1481
self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1482
self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
1483
# If we hit an 'edge' we continue in the other direction
1484
self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1485
self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
1487
# Requesting many nodes will expand all locations equally
1488
self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1489
self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
1492
def test_stop_at_cached(self):
1493
index = self.make_100_node_index()
1494
self.set_cached_offsets(index, [0, 10, 19])
1495
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1496
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1497
self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1498
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1499
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1500
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
1502
def test_cannot_fully_expand(self):
1503
index = self.make_100_node_index()
1504
self.set_cached_offsets(index, [0, 10, 12])
1505
# We don't go into an endless loop if we are bound by cached nodes
1506
self.assertExpandOffsets([11], index, [11])
1508
def test_overlap(self):
1509
index = self.make_100_node_index()
1510
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1511
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
1513
def test_stay_within_layer(self):
1514
index = self.make_1000_node_index()
1515
# When expanding a request, we won't read nodes from the next layer
1516
self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1517
self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1518
self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1519
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1520
self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
1522
self.set_cached_offsets(index, [0, 4, 12])
1523
self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1524
self.assertExpandOffsets([10, 11], index, [11])
1526
def test_small_requests_unexpanded(self):
1527
index = self.make_100_node_index()
1528
self.set_cached_offsets(index, [0])
1529
self.assertExpandOffsets([1], index, [1])
1530
self.assertExpandOffsets([50], index, [50])
1531
# If we request more than one node, then we'll expand
1532
self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
1534
# The first pass does not expand
1535
index = self.make_1000_node_index()
1536
self.set_cached_offsets(index, [0])
1537
self.assertExpandOffsets([1], index, [1])
1538
self.set_cached_offsets(index, [0, 1])
1539
self.assertExpandOffsets([100], index, [100])
1540
self.set_cached_offsets(index, [0, 1, 100])
1541
# But after the first depth, we will expand
1542
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1543
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1544
self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1545
self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,