1
# Copyright (C) 2008-2012, 2016 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
"""Tests for btree indices."""
36
TestCaseWithTransport,
44
load_tests = scenarios.load_tests_apply_scenarios
47
def btreeparser_scenarios():
48
import breezy.bzr._btree_serializer_py as py_module
49
scenarios = [('python', {'parse_btree': py_module})]
50
if compiled_btreeparser_feature.available():
51
scenarios.append(('C',
52
{'parse_btree': compiled_btreeparser_feature.module}))
56
compiled_btreeparser_feature = features.ModuleAvailableFeature(
57
'breezy.bzr._btree_serializer_pyx')
60
class BTreeTestCase(TestCaseWithTransport):
61
# test names here are suffixed by the key length and reference list count
65
super(BTreeTestCase, self).setUp()
66
self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
68
def make_nodes(self, count, key_elements, reference_lists):
69
"""Generate count*key_elements sample nodes."""
70
def _pos_to_key(pos, lead=b""):
71
return (lead + (b"%d" % pos) * 40,)
73
for prefix_pos in range(key_elements):
75
prefix = _pos_to_key(prefix_pos)
78
for pos in range(count):
79
# TODO: This creates odd keys. When count == 100,000, it
80
# creates a 240 byte key
81
key = prefix + _pos_to_key(pos)
82
value = b"value:%d" % pos
84
# generate some references
86
for list_pos in range(reference_lists):
87
# as many keys in each list as its index + the key depth
88
# mod 2 - this generates both 0 length lists and
89
# ones slightly longer than the number of lists.
90
# It also ensures we have non homogeneous lists.
92
for ref_pos in range(list_pos + pos % 2):
94
# refer to a nearby key
95
refs[-1].append(prefix
96
+ _pos_to_key(pos - 1, b"ref"))
98
# serial of this ref in the ref list
99
refs[-1].append(prefix
100
+ _pos_to_key(ref_pos, b"ref"))
101
refs[-1] = tuple(refs[-1])
105
keys.append((key, value, refs))
108
def shrink_page_size(self):
109
"""Shrink the default page size so that less fits in a page."""
110
self.overrideAttr(btree_index, '_PAGE_SIZE')
111
btree_index._PAGE_SIZE = 2048
113
def assertEqualApproxCompressed(self, expected, actual, slop=6):
114
"""Check a count of compressed bytes is approximately as expected
116
Relying on compressed length being stable even with fixed inputs is
117
slightly bogus, but zlib is stable enough that this mostly works.
119
if not expected - slop < actual < expected + slop:
120
self.fail("Expected around %d bytes compressed but got %d" %
124
class TestBTreeBuilder(BTreeTestCase):
126
def test_clear_cache(self):
127
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
128
# This is a no-op, but we need the api to be consistent with other
129
# BTreeGraphIndex apis.
130
builder.clear_cache()
132
def test_empty_1_0(self):
133
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
134
# NamedTemporaryFile dies on builder.finish().read(). weird.
135
temp_file = builder.finish()
136
content = temp_file.read()
139
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
143
def test_empty_2_1(self):
144
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
145
# NamedTemporaryFile dies on builder.finish().read(). weird.
146
temp_file = builder.finish()
147
content = temp_file.read()
150
b"B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
154
def test_root_leaf_1_0(self):
155
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
156
nodes = self.make_nodes(5, 1, 0)
158
builder.add_node(*node)
159
# NamedTemporaryFile dies on builder.finish().read(). weird.
160
temp_file = builder.finish()
161
content = temp_file.read()
163
self.assertEqual(131, len(content))
165
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
168
node_content = content[73:]
169
node_bytes = zlib.decompress(node_content)
170
expected_node = (b"type=leaf\n"
171
b"0000000000000000000000000000000000000000\x00\x00value:0\n"
172
b"1111111111111111111111111111111111111111\x00\x00value:1\n"
173
b"2222222222222222222222222222222222222222\x00\x00value:2\n"
174
b"3333333333333333333333333333333333333333\x00\x00value:3\n"
175
b"4444444444444444444444444444444444444444\x00\x00value:4\n")
176
self.assertEqual(expected_node, node_bytes)
178
def test_root_leaf_2_2(self):
179
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
180
nodes = self.make_nodes(5, 2, 2)
182
builder.add_node(*node)
183
# NamedTemporaryFile dies on builder.finish().read(). weird.
184
temp_file = builder.finish()
185
content = temp_file.read()
187
self.assertEqual(238, len(content))
189
b"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
192
node_content = content[74:]
193
node_bytes = zlib.decompress(node_content)
196
b"0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
197
b"0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
198
b"0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
199
b"0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
200
b"0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
201
b"1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
202
b"1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
203
b"1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
204
b"1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
205
b"1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
208
self.assertEqual(expected_node, node_bytes)
210
def test_2_leaves_1_0(self):
211
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
212
nodes = self.make_nodes(400, 1, 0)
214
builder.add_node(*node)
215
# NamedTemporaryFile dies on builder.finish().read(). weird.
216
temp_file = builder.finish()
217
content = temp_file.read()
219
self.assertEqualApproxCompressed(9283, len(content))
221
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
222
b"row_lengths=1,2\n",
224
root = content[77:4096]
225
leaf1 = content[4096:8192]
226
leaf2 = content[8192:]
227
root_bytes = zlib.decompress(root)
231
) + (b"307" * 40) + b"\n"
232
self.assertEqual(expected_root, root_bytes)
233
# We already know serialisation works for leaves, check key selection:
234
leaf1_bytes = zlib.decompress(leaf1)
235
sorted_node_keys = sorted(node[0] for node in nodes)
236
node = btree_index._LeafNode(leaf1_bytes, 1, 0)
237
self.assertEqual(231, len(node))
238
self.assertEqual(sorted_node_keys[:231], node.all_keys())
239
leaf2_bytes = zlib.decompress(leaf2)
240
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
241
self.assertEqual(400 - 231, len(node))
242
self.assertEqual(sorted_node_keys[231:], node.all_keys())
244
def test_last_page_rounded_1_layer(self):
245
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
246
nodes = self.make_nodes(10, 1, 0)
248
builder.add_node(*node)
249
# NamedTemporaryFile dies on builder.finish().read(). weird.
250
temp_file = builder.finish()
251
content = temp_file.read()
253
self.assertEqualApproxCompressed(155, len(content))
255
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
258
# Check thelast page is well formed
260
leaf2_bytes = zlib.decompress(leaf2)
261
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
262
self.assertEqual(10, len(node))
263
sorted_node_keys = sorted(node[0] for node in nodes)
264
self.assertEqual(sorted_node_keys, node.all_keys())
266
def test_last_page_not_rounded_2_layer(self):
267
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
268
nodes = self.make_nodes(400, 1, 0)
270
builder.add_node(*node)
271
# NamedTemporaryFile dies on builder.finish().read(). weird.
272
temp_file = builder.finish()
273
content = temp_file.read()
275
self.assertEqualApproxCompressed(9283, len(content))
277
b"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
278
b"row_lengths=1,2\n",
280
# Check the last page is well formed
281
leaf2 = content[8192:]
282
leaf2_bytes = zlib.decompress(leaf2)
283
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
284
self.assertEqual(400 - 231, len(node))
285
sorted_node_keys = sorted(node[0] for node in nodes)
286
self.assertEqual(sorted_node_keys[231:], node.all_keys())
288
def test_three_level_tree_details(self):
289
# The left most pointer in the second internal node in a row should
290
# pointer to the second node that the internal node is for, _not_
291
# the first, otherwise the first node overlaps with the last node of
292
# the prior internal node on that row.
293
self.shrink_page_size()
294
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
295
# 40K nodes is enough to create a two internal nodes on the second
296
# level, with a 2K page size
297
nodes = self.make_nodes(20000, 2, 2)
300
builder.add_node(*node)
301
t = transport.get_transport_from_url('trace+' + self.get_url(''))
302
size = t.put_file('index', self.time(builder.finish))
304
index = btree_index.BTreeGraphIndex(t, 'index', size)
305
# Seed the metadata, we're using internal calls now.
307
self.assertEqual(3, len(index._row_lengths),
308
"Not enough rows: %r" % index._row_lengths)
309
self.assertEqual(4, len(index._row_offsets))
310
self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
311
internal_nodes = index._get_internal_nodes([0, 1, 2])
312
root_node = internal_nodes[0]
313
internal_node1 = internal_nodes[1]
314
internal_node2 = internal_nodes[2]
315
# The left most node node2 points at should be one after the right most
316
# node pointed at by node1.
317
self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
318
# The left most key of the second node pointed at by internal_node2
319
# should be its first key. We can check this by looking for its first key
320
# in the second node it points at
321
pos = index._row_offsets[2] + internal_node2.offset + 1
322
leaf = index._get_leaf_nodes([pos])[pos]
323
self.assertTrue(internal_node2.keys[0] in leaf)
325
def test_2_leaves_2_2(self):
326
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
327
nodes = self.make_nodes(100, 2, 2)
329
builder.add_node(*node)
330
# NamedTemporaryFile dies on builder.finish().read(). weird.
331
temp_file = builder.finish()
332
content = temp_file.read()
334
self.assertEqualApproxCompressed(12643, len(content))
336
b"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
337
b"row_lengths=1,3\n",
339
root = content[77:4096]
340
leaf1 = content[4096:8192]
341
leaf2 = content[8192:12288]
342
leaf3 = content[12288:]
343
root_bytes = zlib.decompress(root)
347
(b"0" * 40) + b"\x00" + (b"91" * 40) + b"\n" +
348
(b"1" * 40) + b"\x00" + (b"81" * 40) + b"\n"
350
self.assertEqual(expected_root, root_bytes)
351
# We assume the other leaf nodes have been written correctly - layering
354
def test_spill_index_stress_1_1(self):
355
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
356
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
357
builder.add_node(*nodes[0])
358
# Test the parts of the index that take up memory are doing so
360
self.assertEqual(1, len(builder._nodes))
361
self.assertIs(None, builder._nodes_by_key)
362
builder.add_node(*nodes[1])
363
self.assertEqual(0, len(builder._nodes))
364
self.assertIs(None, builder._nodes_by_key)
365
self.assertEqual(1, len(builder._backing_indices))
366
self.assertEqual(2, builder._backing_indices[0].key_count())
368
builder.add_node(*nodes[2])
369
self.assertEqual(1, len(builder._nodes))
370
self.assertIs(None, builder._nodes_by_key)
371
# And spills to a second backing index combing all
372
builder.add_node(*nodes[3])
373
self.assertEqual(0, len(builder._nodes))
374
self.assertIs(None, builder._nodes_by_key)
375
self.assertEqual(2, len(builder._backing_indices))
376
self.assertEqual(None, builder._backing_indices[0])
377
self.assertEqual(4, builder._backing_indices[1].key_count())
378
# The next spills to the 2-len slot
379
builder.add_node(*nodes[4])
380
builder.add_node(*nodes[5])
381
self.assertEqual(0, len(builder._nodes))
382
self.assertIs(None, builder._nodes_by_key)
383
self.assertEqual(2, len(builder._backing_indices))
384
self.assertEqual(2, builder._backing_indices[0].key_count())
385
self.assertEqual(4, builder._backing_indices[1].key_count())
386
# Next spill combines
387
builder.add_node(*nodes[6])
388
builder.add_node(*nodes[7])
389
self.assertEqual(3, len(builder._backing_indices))
390
self.assertEqual(None, builder._backing_indices[0])
391
self.assertEqual(None, builder._backing_indices[1])
392
self.assertEqual(8, builder._backing_indices[2].key_count())
393
# And so forth - counting up in binary.
394
builder.add_node(*nodes[8])
395
builder.add_node(*nodes[9])
396
self.assertEqual(3, len(builder._backing_indices))
397
self.assertEqual(2, builder._backing_indices[0].key_count())
398
self.assertEqual(None, builder._backing_indices[1])
399
self.assertEqual(8, builder._backing_indices[2].key_count())
400
builder.add_node(*nodes[10])
401
builder.add_node(*nodes[11])
402
self.assertEqual(3, len(builder._backing_indices))
403
self.assertEqual(None, builder._backing_indices[0])
404
self.assertEqual(4, builder._backing_indices[1].key_count())
405
self.assertEqual(8, builder._backing_indices[2].key_count())
406
builder.add_node(*nodes[12])
407
# Test that memory and disk are both used for query methods; and that
408
# None is skipped over happily.
409
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
410
list(builder.iter_all_entries()))
411
# Two nodes - one memory one disk
412
self.assertEqual({(builder,) + node for node in nodes[11:13]},
413
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
414
self.assertEqual(13, builder.key_count())
415
self.assertEqual({(builder,) + node for node in nodes[11:13]},
416
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
417
builder.add_node(*nodes[13])
418
self.assertEqual(3, len(builder._backing_indices))
419
self.assertEqual(2, builder._backing_indices[0].key_count())
420
self.assertEqual(4, builder._backing_indices[1].key_count())
421
self.assertEqual(8, builder._backing_indices[2].key_count())
422
builder.add_node(*nodes[14])
423
builder.add_node(*nodes[15])
424
self.assertEqual(4, len(builder._backing_indices))
425
self.assertEqual(None, builder._backing_indices[0])
426
self.assertEqual(None, builder._backing_indices[1])
427
self.assertEqual(None, builder._backing_indices[2])
428
self.assertEqual(16, builder._backing_indices[3].key_count())
429
# Now finish, and check we got a correctly ordered tree
430
t = self.get_transport('')
431
size = t.put_file('index', builder.finish())
432
index = btree_index.BTreeGraphIndex(t, 'index', size)
433
nodes = list(index.iter_all_entries())
434
self.assertEqual(sorted(nodes), nodes)
435
self.assertEqual(16, len(nodes))
437
def test_spill_index_stress_1_1_no_combine(self):
438
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
439
builder.set_optimize(for_size=False, combine_backing_indices=False)
440
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
441
builder.add_node(*nodes[0])
442
# Test the parts of the index that take up memory are doing so
444
self.assertEqual(1, len(builder._nodes))
445
self.assertIs(None, builder._nodes_by_key)
446
builder.add_node(*nodes[1])
447
self.assertEqual(0, len(builder._nodes))
448
self.assertIs(None, builder._nodes_by_key)
449
self.assertEqual(1, len(builder._backing_indices))
450
self.assertEqual(2, builder._backing_indices[0].key_count())
452
builder.add_node(*nodes[2])
453
self.assertEqual(1, len(builder._nodes))
454
self.assertIs(None, builder._nodes_by_key)
455
# And spills to a second backing index but doesn't combine
456
builder.add_node(*nodes[3])
457
self.assertEqual(0, len(builder._nodes))
458
self.assertIs(None, builder._nodes_by_key)
459
self.assertEqual(2, len(builder._backing_indices))
460
for backing_index in builder._backing_indices:
461
self.assertEqual(2, backing_index.key_count())
462
# The next spills to the 3rd slot
463
builder.add_node(*nodes[4])
464
builder.add_node(*nodes[5])
465
self.assertEqual(0, len(builder._nodes))
466
self.assertIs(None, builder._nodes_by_key)
467
self.assertEqual(3, len(builder._backing_indices))
468
for backing_index in builder._backing_indices:
469
self.assertEqual(2, backing_index.key_count())
470
# Now spill a few more, and check that we don't combine
471
builder.add_node(*nodes[6])
472
builder.add_node(*nodes[7])
473
builder.add_node(*nodes[8])
474
builder.add_node(*nodes[9])
475
builder.add_node(*nodes[10])
476
builder.add_node(*nodes[11])
477
builder.add_node(*nodes[12])
478
self.assertEqual(6, len(builder._backing_indices))
479
for backing_index in builder._backing_indices:
480
self.assertEqual(2, backing_index.key_count())
481
# Test that memory and disk are both used for query methods; and that
482
# None is skipped over happily.
483
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
484
list(builder.iter_all_entries()))
485
# Two nodes - one memory one disk
486
self.assertEqual({(builder,) + node for node in nodes[11:13]},
487
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
488
self.assertEqual(13, builder.key_count())
489
self.assertEqual({(builder,) + node for node in nodes[11:13]},
490
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
491
builder.add_node(*nodes[13])
492
builder.add_node(*nodes[14])
493
builder.add_node(*nodes[15])
494
self.assertEqual(8, len(builder._backing_indices))
495
for backing_index in builder._backing_indices:
496
self.assertEqual(2, backing_index.key_count())
497
# Now finish, and check we got a correctly ordered tree
498
transport = self.get_transport('')
499
size = transport.put_file('index', builder.finish())
500
index = btree_index.BTreeGraphIndex(transport, 'index', size)
501
nodes = list(index.iter_all_entries())
502
self.assertEqual(sorted(nodes), nodes)
503
self.assertEqual(16, len(nodes))
505
def test_set_optimize(self):
506
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
507
builder.set_optimize(for_size=True)
508
self.assertTrue(builder._optimize_for_size)
509
builder.set_optimize(for_size=False)
510
self.assertFalse(builder._optimize_for_size)
511
# test that we can set combine_backing_indices without effecting
514
builder._optimize_for_size = obj
515
builder.set_optimize(combine_backing_indices=False)
516
self.assertFalse(builder._combine_backing_indices)
517
self.assertIs(obj, builder._optimize_for_size)
518
builder.set_optimize(combine_backing_indices=True)
519
self.assertTrue(builder._combine_backing_indices)
520
self.assertIs(obj, builder._optimize_for_size)
522
def test_spill_index_stress_2_2(self):
523
# test that references and longer keys don't confuse things.
524
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
526
nodes = self.make_nodes(16, 2, 2)
527
builder.add_node(*nodes[0])
528
# Test the parts of the index that take up memory are doing so
530
self.assertEqual(1, len(builder._nodes))
531
self.assertIs(None, builder._nodes_by_key)
532
builder.add_node(*nodes[1])
533
self.assertEqual(0, len(builder._nodes))
534
self.assertIs(None, builder._nodes_by_key)
535
self.assertEqual(1, len(builder._backing_indices))
536
self.assertEqual(2, builder._backing_indices[0].key_count())
538
# Build up the nodes by key dict
539
old = dict(builder._get_nodes_by_key())
540
builder.add_node(*nodes[2])
541
self.assertEqual(1, len(builder._nodes))
542
self.assertIsNot(None, builder._nodes_by_key)
543
self.assertNotEqual({}, builder._nodes_by_key)
544
# We should have a new entry
545
self.assertNotEqual(old, builder._nodes_by_key)
546
# And spills to a second backing index combing all
547
builder.add_node(*nodes[3])
548
self.assertEqual(0, len(builder._nodes))
549
self.assertIs(None, builder._nodes_by_key)
550
self.assertEqual(2, len(builder._backing_indices))
551
self.assertEqual(None, builder._backing_indices[0])
552
self.assertEqual(4, builder._backing_indices[1].key_count())
553
# The next spills to the 2-len slot
554
builder.add_node(*nodes[4])
555
builder.add_node(*nodes[5])
556
self.assertEqual(0, len(builder._nodes))
557
self.assertIs(None, builder._nodes_by_key)
558
self.assertEqual(2, len(builder._backing_indices))
559
self.assertEqual(2, builder._backing_indices[0].key_count())
560
self.assertEqual(4, builder._backing_indices[1].key_count())
561
# Next spill combines
562
builder.add_node(*nodes[6])
563
builder.add_node(*nodes[7])
564
self.assertEqual(3, len(builder._backing_indices))
565
self.assertEqual(None, builder._backing_indices[0])
566
self.assertEqual(None, builder._backing_indices[1])
567
self.assertEqual(8, builder._backing_indices[2].key_count())
568
# And so forth - counting up in binary.
569
builder.add_node(*nodes[8])
570
builder.add_node(*nodes[9])
571
self.assertEqual(3, len(builder._backing_indices))
572
self.assertEqual(2, builder._backing_indices[0].key_count())
573
self.assertEqual(None, builder._backing_indices[1])
574
self.assertEqual(8, builder._backing_indices[2].key_count())
575
builder.add_node(*nodes[10])
576
builder.add_node(*nodes[11])
577
self.assertEqual(3, len(builder._backing_indices))
578
self.assertEqual(None, builder._backing_indices[0])
579
self.assertEqual(4, builder._backing_indices[1].key_count())
580
self.assertEqual(8, builder._backing_indices[2].key_count())
581
builder.add_node(*nodes[12])
582
# Test that memory and disk are both used for query methods; and that
583
# None is skipped over happily.
584
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
585
list(builder.iter_all_entries()))
586
# Two nodes - one memory one disk
587
self.assertEqual({(builder,) + node for node in nodes[11:13]},
588
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
589
self.assertEqual(13, builder.key_count())
590
self.assertEqual({(builder,) + node for node in nodes[11:13]},
591
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
592
builder.add_node(*nodes[13])
593
self.assertEqual(3, len(builder._backing_indices))
594
self.assertEqual(2, builder._backing_indices[0].key_count())
595
self.assertEqual(4, builder._backing_indices[1].key_count())
596
self.assertEqual(8, builder._backing_indices[2].key_count())
597
builder.add_node(*nodes[14])
598
builder.add_node(*nodes[15])
599
self.assertEqual(4, len(builder._backing_indices))
600
self.assertEqual(None, builder._backing_indices[0])
601
self.assertEqual(None, builder._backing_indices[1])
602
self.assertEqual(None, builder._backing_indices[2])
603
self.assertEqual(16, builder._backing_indices[3].key_count())
604
# Now finish, and check we got a correctly ordered tree
605
transport = self.get_transport('')
606
size = transport.put_file('index', builder.finish())
607
index = btree_index.BTreeGraphIndex(transport, 'index', size)
608
nodes = list(index.iter_all_entries())
609
self.assertEqual(sorted(nodes), nodes)
610
self.assertEqual(16, len(nodes))
612
def test_spill_index_duplicate_key_caught_on_finish(self):
613
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
614
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
615
builder.add_node(*nodes[0])
616
builder.add_node(*nodes[1])
617
builder.add_node(*nodes[0])
618
self.assertRaises(_mod_index.BadIndexDuplicateKey, builder.finish)
621
class TestBTreeIndex(BTreeTestCase):
623
def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
624
builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
625
key_elements=key_elements)
626
for key, value, references in nodes:
627
builder.add_node(key, value, references)
628
stream = builder.finish()
629
trans = transport.get_transport_from_url('trace+' + self.get_url())
630
size = trans.put_file('index', stream)
631
return btree_index.BTreeGraphIndex(trans, 'index', size)
633
def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
635
builder = btree_index.BTreeBuilder(key_elements=key_elements,
636
reference_lists=ref_lists)
637
builder.add_nodes(nodes)
638
transport = self.get_transport('')
639
# NamedTemporaryFile dies on builder.finish().read(). weird.
640
temp_file = builder.finish()
641
content = temp_file.read()
644
transport.put_bytes('index', (b' ' * offset) + content)
645
return btree_index.BTreeGraphIndex(transport, 'index', size=size,
648
def test_clear_cache(self):
649
nodes = self.make_nodes(160, 2, 2)
650
index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
651
self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
652
self.assertEqual([1, 4], index._row_lengths)
653
self.assertIsNot(None, index._root_node)
654
internal_node_pre_clear = set(index._internal_node_cache)
655
self.assertTrue(len(index._leaf_node_cache) > 0)
657
# We don't touch _root_node or _internal_node_cache, both should be
658
# small, and can save a round trip or two
659
self.assertIsNot(None, index._root_node)
660
# NOTE: We don't want to affect the _internal_node_cache, as we expect
661
# it will be small, and if we ever do touch this index again, it
662
# will save round-trips. This assertion isn't very strong,
663
# becuase without a 3-level index, we don't have any internal
665
self.assertEqual(internal_node_pre_clear,
666
set(index._internal_node_cache))
667
self.assertEqual(0, len(index._leaf_node_cache))
669
def test_trivial_constructor(self):
670
t = transport.get_transport_from_url('trace+' + self.get_url(''))
671
index = btree_index.BTreeGraphIndex(t, 'index', None)
672
# Checks the page size at load, but that isn't logged yet.
673
self.assertEqual([], t._activity)
675
def test_with_size_constructor(self):
676
t = transport.get_transport_from_url('trace+' + self.get_url(''))
677
index = btree_index.BTreeGraphIndex(t, 'index', 1)
678
# Checks the page size at load, but that isn't logged yet.
679
self.assertEqual([], t._activity)
681
def test_empty_key_count_no_size(self):
682
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
683
t = transport.get_transport_from_url('trace+' + self.get_url(''))
684
t.put_file('index', builder.finish())
685
index = btree_index.BTreeGraphIndex(t, 'index', None)
687
self.assertEqual([], t._activity)
688
self.assertEqual(0, index.key_count())
689
# The entire index should have been requested (as we generally have the
690
# size available, and doing many small readvs is inappropriate).
691
# We can't tell how much was actually read here, but - check the code.
692
self.assertEqual([('get', 'index')], t._activity)
694
def test_empty_key_count(self):
695
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
696
t = transport.get_transport_from_url('trace+' + self.get_url(''))
697
size = t.put_file('index', builder.finish())
698
self.assertEqual(72, size)
699
index = btree_index.BTreeGraphIndex(t, 'index', size)
701
self.assertEqual([], t._activity)
702
self.assertEqual(0, index.key_count())
703
# The entire index should have been read, as 4K > size
704
self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
707
def test_non_empty_key_count_2_2(self):
708
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
709
nodes = self.make_nodes(35, 2, 2)
711
builder.add_node(*node)
712
t = transport.get_transport_from_url('trace+' + self.get_url(''))
713
size = t.put_file('index', builder.finish())
714
index = btree_index.BTreeGraphIndex(t, 'index', size)
716
self.assertEqual([], t._activity)
717
self.assertEqual(70, index.key_count())
718
# The entire index should have been read, as it is one page long.
719
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
721
self.assertEqualApproxCompressed(1173, size)
723
def test_with_offset_no_size(self):
724
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
726
nodes=self.make_nodes(200, 1, 1))
727
index._size = None # throw away the size info
728
self.assertEqual(200, index.key_count())
730
def test_with_small_offset(self):
731
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
733
nodes=self.make_nodes(200, 1, 1))
734
self.assertEqual(200, index.key_count())
736
def test_with_large_offset(self):
737
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
739
nodes=self.make_nodes(200, 1, 1))
740
self.assertEqual(200, index.key_count())
742
def test__read_nodes_no_size_one_page_reads_once(self):
743
self.make_index(nodes=[((b'key',), b'value', ())])
744
trans = transport.get_transport_from_url('trace+' + self.get_url())
745
index = btree_index.BTreeGraphIndex(trans, 'index', None)
746
del trans._activity[:]
747
nodes = dict(index._read_nodes([0]))
748
self.assertEqual({0}, set(nodes))
750
self.assertEqual([(b'key',)], node.all_keys())
751
self.assertEqual([('get', 'index')], trans._activity)
753
def test__read_nodes_no_size_multiple_pages(self):
754
index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
756
num_pages = index._row_offsets[-1]
757
# Reopen with a traced transport and no size
758
trans = transport.get_transport_from_url('trace+' + self.get_url())
759
index = btree_index.BTreeGraphIndex(trans, 'index', None)
760
del trans._activity[:]
761
nodes = dict(index._read_nodes([0]))
762
self.assertEqual(list(range(num_pages)), sorted(nodes))
764
def test_2_levels_key_count_2_2(self):
765
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
766
nodes = self.make_nodes(160, 2, 2)
768
builder.add_node(*node)
769
t = transport.get_transport_from_url('trace+' + self.get_url(''))
770
size = t.put_file('index', builder.finish())
771
self.assertEqualApproxCompressed(17692, size)
772
index = btree_index.BTreeGraphIndex(t, 'index', size)
774
self.assertEqual([], t._activity)
775
self.assertEqual(320, index.key_count())
776
# The entire index should not have been read.
777
self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
780
def test_validate_one_page(self):
781
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
782
nodes = self.make_nodes(45, 2, 2)
784
builder.add_node(*node)
785
t = transport.get_transport_from_url('trace+' + self.get_url(''))
786
size = t.put_file('index', builder.finish())
787
index = btree_index.BTreeGraphIndex(t, 'index', size)
789
self.assertEqual([], t._activity)
791
# The entire index should have been read linearly.
792
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
794
self.assertEqualApproxCompressed(1488, size)
796
def test_validate_two_pages(self):
797
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
798
nodes = self.make_nodes(80, 2, 2)
800
builder.add_node(*node)
801
t = transport.get_transport_from_url('trace+' + self.get_url(''))
802
size = t.put_file('index', builder.finish())
803
# Root page, 2 leaf pages
804
self.assertEqualApproxCompressed(9339, size)
805
index = btree_index.BTreeGraphIndex(t, 'index', size)
807
self.assertEqual([], t._activity)
809
rem = size - 8192 # Number of remaining bytes after second block
810
# The entire index should have been read linearly.
812
[('readv', 'index', [(0, 4096)], False, None),
813
('readv', 'index', [(4096, 4096), (8192, rem)], False, None)],
815
# XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
816
# node and make validate find them.
818
def test_eq_ne(self):
819
# two indices are equal when constructed with the same parameters:
820
t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
821
t2 = self.get_transport()
823
btree_index.BTreeGraphIndex(t1, 'index', None)
824
== btree_index.BTreeGraphIndex(t1, 'index', None))
826
btree_index.BTreeGraphIndex(t1, 'index', 20)
827
== btree_index.BTreeGraphIndex(t1, 'index', 20))
829
btree_index.BTreeGraphIndex(t1, 'index', 20)
830
== btree_index.BTreeGraphIndex(t2, 'index', 20))
832
btree_index.BTreeGraphIndex(t1, 'inde1', 20)
833
== btree_index.BTreeGraphIndex(t1, 'inde2', 20))
835
btree_index.BTreeGraphIndex(t1, 'index', 10)
836
== btree_index.BTreeGraphIndex(t1, 'index', 20))
838
btree_index.BTreeGraphIndex(t1, 'index', None)
839
!= btree_index.BTreeGraphIndex(t1, 'index', None))
841
btree_index.BTreeGraphIndex(t1, 'index', 20)
842
!= btree_index.BTreeGraphIndex(t1, 'index', 20))
844
btree_index.BTreeGraphIndex(t1, 'index', 20)
845
!= btree_index.BTreeGraphIndex(t2, 'index', 20))
847
btree_index.BTreeGraphIndex(t1, 'inde1', 20)
848
!= btree_index.BTreeGraphIndex(t1, 'inde2', 20))
850
btree_index.BTreeGraphIndex(t1, 'index', 10)
851
!= btree_index.BTreeGraphIndex(t1, 'index', 20))
853
def test_key_too_big(self):
854
# the size that matters here is the _compressed_ size of the key, so we can't
855
# do a simple character repeat.
856
bigKey = b''.join(b'%d' % n for n in range(btree_index._PAGE_SIZE))
857
self.assertRaises(_mod_index.BadIndexKey,
859
nodes=[((bigKey,), b'value', ())])
861
def test_iter_all_only_root_no_size(self):
862
self.make_index(nodes=[((b'key',), b'value', ())])
863
t = transport.get_transport_from_url('trace+' + self.get_url(''))
864
index = btree_index.BTreeGraphIndex(t, 'index', None)
866
self.assertEqual([((b'key',), b'value')],
867
[x[1:] for x in index.iter_all_entries()])
868
self.assertEqual([('get', 'index')], t._activity)
870
def test_iter_all_entries_reads(self):
871
# iterating all entries reads the header, then does a linear
873
self.shrink_page_size()
874
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
875
# 20k nodes is enough to create a two internal nodes on the second
876
# level, with a 2K page size
877
nodes = self.make_nodes(10000, 2, 2)
879
builder.add_node(*node)
880
t = transport.get_transport_from_url('trace+' + self.get_url(''))
881
size = t.put_file('index', builder.finish())
882
page_size = btree_index._PAGE_SIZE
884
index = btree_index.BTreeGraphIndex(t, 'index', size)
886
self.assertEqual([], t._activity)
887
found_nodes = self.time(list, index.iter_all_entries())
889
for node in found_nodes:
890
self.assertTrue(node[0] is index)
891
bare_nodes.append(node[1:])
892
self.assertEqual(3, len(index._row_lengths),
893
"Not enough rows: %r" % index._row_lengths)
894
# Should be as long as the nodes we supplied
895
self.assertEqual(20000, len(found_nodes))
896
# Should have the same content
897
self.assertEqual(set(nodes), set(bare_nodes))
898
# Should have done linear scan IO up the index, ignoring
899
# the internal nodes:
900
# The entire index should have been read
901
total_pages = sum(index._row_lengths)
902
self.assertEqual(total_pages, index._row_offsets[-1])
903
self.assertEqualApproxCompressed(1303220, size)
904
# The start of the leaves
905
first_byte = index._row_offsets[-2] * page_size
907
for offset in range(first_byte, size, page_size):
908
readv_request.append((offset, page_size))
909
# The last page is truncated
910
readv_request[-1] = (readv_request[-1][0], size % page_size)
911
expected = [('readv', 'index', [(0, page_size)], False, None),
912
('readv', 'index', readv_request, False, None)]
913
if expected != t._activity:
914
self.assertEqualDiff(pprint.pformat(expected),
915
pprint.pformat(t._activity))
917
def test_iter_entries_references_2_refs_resolved(self):
918
# iterating some entries reads just the pages needed. For now, to
919
# get it working and start measuring, only 4K pages are read.
920
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
921
# 80 nodes is enough to create a two-level index.
922
nodes = self.make_nodes(160, 2, 2)
924
builder.add_node(*node)
925
t = transport.get_transport_from_url('trace+' + self.get_url(''))
926
size = t.put_file('index', builder.finish())
928
index = btree_index.BTreeGraphIndex(t, 'index', size)
930
self.assertEqual([], t._activity)
932
found_nodes = list(index.iter_entries([nodes[30][0]]))
934
for node in found_nodes:
935
self.assertTrue(node[0] is index)
936
bare_nodes.append(node[1:])
937
# Should be as long as the nodes we supplied
938
self.assertEqual(1, len(found_nodes))
939
# Should have the same content
940
self.assertEqual(nodes[30], bare_nodes[0])
941
# Should have read the root node, then one leaf page:
942
self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
943
('readv', 'index', [(8192, 4096), ], False, None)],
946
def test_iter_key_prefix_1_element_key_None(self):
947
index = self.make_index()
948
self.assertRaises(_mod_index.BadIndexKey, list,
949
index.iter_entries_prefix([(None, )]))
951
def test_iter_key_prefix_wrong_length(self):
952
index = self.make_index()
953
self.assertRaises(_mod_index.BadIndexKey, list,
954
index.iter_entries_prefix([(b'foo', None)]))
955
index = self.make_index(key_elements=2)
956
self.assertRaises(_mod_index.BadIndexKey, list,
957
index.iter_entries_prefix([(b'foo', )]))
958
self.assertRaises(_mod_index.BadIndexKey, list,
959
index.iter_entries_prefix([(b'foo', None, None)]))
961
def test_iter_key_prefix_1_key_element_no_refs(self):
962
index = self.make_index(nodes=[
963
((b'name', ), b'data', ()),
964
((b'ref', ), b'refdata', ())])
965
self.assertEqual({(index, (b'name', ), b'data'),
966
(index, (b'ref', ), b'refdata')},
967
set(index.iter_entries_prefix([(b'name', ), (b'ref', )])))
969
def test_iter_key_prefix_1_key_element_refs(self):
970
index = self.make_index(1, nodes=[
971
((b'name', ), b'data', ([(b'ref', )], )),
972
((b'ref', ), b'refdata', ([], ))])
973
self.assertEqual({(index, (b'name', ), b'data', (((b'ref',),),)),
974
(index, (b'ref', ), b'refdata', ((), ))},
975
set(index.iter_entries_prefix([(b'name', ), (b'ref', )])))
977
def test_iter_key_prefix_2_key_element_no_refs(self):
978
index = self.make_index(key_elements=2, nodes=[
979
((b'name', b'fin1'), b'data', ()),
980
((b'name', b'fin2'), b'beta', ()),
981
((b'ref', b'erence'), b'refdata', ())])
982
self.assertEqual({(index, (b'name', b'fin1'), b'data'),
983
(index, (b'ref', b'erence'), b'refdata')},
984
set(index.iter_entries_prefix(
985
[(b'name', b'fin1'), (b'ref', b'erence')])))
986
self.assertEqual({(index, (b'name', b'fin1'), b'data'),
987
(index, (b'name', b'fin2'), b'beta')},
988
set(index.iter_entries_prefix([(b'name', None)])))
990
def test_iter_key_prefix_2_key_element_refs(self):
991
index = self.make_index(1, key_elements=2, nodes=[
992
((b'name', b'fin1'), b'data', ([(b'ref', b'erence')], )),
993
((b'name', b'fin2'), b'beta', ([], )),
994
((b'ref', b'erence'), b'refdata', ([], ))])
996
{(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)),
997
(index, (b'ref', b'erence'), b'refdata', ((), ))},
998
set(index.iter_entries_prefix(
999
[(b'name', b'fin1'), (b'ref', b'erence')])))
1001
{(index, (b'name', b'fin1'), b'data', (((b'ref', b'erence'),),)),
1002
(index, (b'name', b'fin2'), b'beta', ((), ))},
1003
set(index.iter_entries_prefix([(b'name', None)])))
1005
# XXX: external_references tests are duplicated in test_index. We
1006
# probably should have per_graph_index tests...
1007
def test_external_references_no_refs(self):
1008
index = self.make_index(ref_lists=0, nodes=[])
1009
self.assertRaises(ValueError, index.external_references, 0)
1011
def test_external_references_no_results(self):
1012
index = self.make_index(ref_lists=1, nodes=[
1013
((b'key',), b'value', ([],))])
1014
self.assertEqual(set(), index.external_references(0))
1016
def test_external_references_missing_ref(self):
1017
missing_key = (b'missing',)
1018
index = self.make_index(ref_lists=1, nodes=[
1019
((b'key',), b'value', ([missing_key],))])
1020
self.assertEqual({missing_key}, index.external_references(0))
1022
def test_external_references_multiple_ref_lists(self):
1023
missing_key = (b'missing',)
1024
index = self.make_index(ref_lists=2, nodes=[
1025
((b'key',), b'value', ([], [missing_key]))])
1026
self.assertEqual(set([]), index.external_references(0))
1027
self.assertEqual({missing_key}, index.external_references(1))
1029
def test_external_references_two_records(self):
1030
index = self.make_index(ref_lists=1, nodes=[
1031
((b'key-1',), b'value', ([(b'key-2',)],)),
1032
((b'key-2',), b'value', ([],)),
1034
self.assertEqual(set([]), index.external_references(0))
1036
def test__find_ancestors_one_page(self):
1039
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1040
(key1, b'value', ([key2],)),
1041
(key2, b'value', ([],)),
1044
missing_keys = set()
1045
search_keys = index._find_ancestors(
1046
[key1], 0, parent_map, missing_keys)
1047
self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1048
self.assertEqual(set(), missing_keys)
1049
self.assertEqual(set(), search_keys)
1051
def test__find_ancestors_one_page_w_missing(self):
1055
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1056
(key1, b'value', ([key2],)),
1057
(key2, b'value', ([],)),
1060
missing_keys = set()
1061
search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1063
self.assertEqual({key2: ()}, parent_map)
1064
# we know that key3 is missing because we read the page that it would
1066
self.assertEqual({key3}, missing_keys)
1067
self.assertEqual(set(), search_keys)
1069
def test__find_ancestors_one_parent_missing(self):
1073
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1074
(key1, b'value', ([key2],)),
1075
(key2, b'value', ([key3],)),
1078
missing_keys = set()
1079
search_keys = index._find_ancestors([key1], 0, parent_map,
1081
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1082
self.assertEqual(set(), missing_keys)
1083
# all we know is that key3 wasn't present on the page we were reading
1084
# but if you look, the last key is key2 which comes before key3, so we
1085
# don't know whether key3 would land on this page or not.
1086
self.assertEqual({key3}, search_keys)
1087
search_keys = index._find_ancestors(search_keys, 0, parent_map,
1089
# passing it back in, we are sure it is 'missing'
1090
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1091
self.assertEqual({key3}, missing_keys)
1092
self.assertEqual(set([]), search_keys)
1094
def test__find_ancestors_dont_search_known(self):
1098
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1099
(key1, b'value', ([key2],)),
1100
(key2, b'value', ([key3],)),
1101
(key3, b'value', ([],)),
1103
# We already know about key2, so we won't try to search for key3
1104
parent_map = {key2: (key3,)}
1105
missing_keys = set()
1106
search_keys = index._find_ancestors([key1], 0, parent_map,
1108
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1109
self.assertEqual(set(), missing_keys)
1110
self.assertEqual(set(), search_keys)
1112
def test__find_ancestors_multiple_pages(self):
1113
# We need to use enough keys that we actually cause a split
1114
start_time = 1249671539
1115
email = "joebob@example.com"
1119
for i in range(400):
1120
rev_id = ('%s-%s-%s' % (email,
1121
osutils.compact_date(start_time + i),
1122
osutils.rand_chars(16))).encode('ascii')
1124
nodes.append((rev_key, b'value', ref_lists))
1125
# We have a ref 'list' of length 1, with a list of parents, with 1
1126
# parent which is a key
1127
ref_lists = ((rev_key,),)
1128
rev_keys.append(rev_key)
1129
index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1130
self.assertEqual(400, index.key_count())
1131
self.assertEqual(3, len(index._row_offsets))
1132
nodes = dict(index._read_nodes([1, 2]))
1135
min_l2_key = l2.min_key
1136
max_l1_key = l1.max_key
1137
self.assertTrue(max_l1_key < min_l2_key)
1138
parents_min_l2_key = l2[min_l2_key][1][0]
1139
self.assertEqual((l1.max_key,), parents_min_l2_key)
1140
# Now, whatever key we select that would fall on the second page,
1141
# should give us all the parents until the page break
1142
key_idx = rev_keys.index(min_l2_key)
1143
next_key = rev_keys[key_idx + 1]
1144
# So now when we get the parent map, we should get the key we are
1145
# looking for, min_l2_key, and then a reference to go look for the
1146
# parent of that key
1148
missing_keys = set()
1149
search_keys = index._find_ancestors([next_key], 0, parent_map,
1151
self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1152
self.assertEqual(set(), missing_keys)
1153
self.assertEqual({max_l1_key}, search_keys)
1155
search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1157
self.assertEqual(l1.all_keys(), sorted(parent_map))
1158
self.assertEqual(set(), missing_keys)
1159
self.assertEqual(set(), search_keys)
1161
def test__find_ancestors_empty_index(self):
1162
index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1164
missing_keys = set()
1165
search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1167
self.assertEqual(set(), search_keys)
1168
self.assertEqual({}, parent_map)
1169
self.assertEqual({('one',), ('two',)}, missing_keys)
1171
def test_supports_unlimited_cache(self):
1172
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
1173
# We need enough nodes to cause a page split (so we have both an
1174
# internal node and a couple leaf nodes. 500 seems to be enough.)
1175
nodes = self.make_nodes(500, 1, 0)
1177
builder.add_node(*node)
1178
stream = builder.finish()
1179
trans = self.get_transport()
1180
size = trans.put_file('index', stream)
1181
index = btree_index.BTreeGraphIndex(trans, 'index', size)
1182
self.assertEqual(500, index.key_count())
1183
# We have an internal node
1184
self.assertEqual(2, len(index._row_lengths))
1185
# We have at least 2 leaf nodes
1186
self.assertTrue(index._row_lengths[-1] >= 2)
1187
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1188
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1189
index._leaf_node_cache._max_cache)
1190
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1191
self.assertEqual(100, index._internal_node_cache._max_cache)
1192
# No change if unlimited_cache=False is passed
1193
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1194
unlimited_cache=False)
1195
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1196
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1197
index._leaf_node_cache._max_cache)
1198
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1199
self.assertEqual(100, index._internal_node_cache._max_cache)
1200
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1201
unlimited_cache=True)
1202
self.assertIsInstance(index._leaf_node_cache, dict)
1203
self.assertIs(type(index._internal_node_cache), dict)
1204
# Exercise the lookup code
1205
entries = set(index.iter_entries([n[0] for n in nodes]))
1206
self.assertEqual(500, len(entries))
1209
class TestBTreeNodes(BTreeTestCase):
1211
scenarios = btreeparser_scenarios()
1214
super(TestBTreeNodes, self).setUp()
1215
self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
1217
def test_LeafNode_1_0(self):
1218
node_bytes = (b"type=leaf\n"
1219
b"0000000000000000000000000000000000000000\x00\x00value:0\n"
1220
b"1111111111111111111111111111111111111111\x00\x00value:1\n"
1221
b"2222222222222222222222222222222222222222\x00\x00value:2\n"
1222
b"3333333333333333333333333333333333333333\x00\x00value:3\n"
1223
b"4444444444444444444444444444444444444444\x00\x00value:4\n")
1224
node = btree_index._LeafNode(node_bytes, 1, 0)
1225
# We do direct access, or don't care about order, to leaf nodes most of
1226
# the time, so a dict is useful:
1228
(b"0000000000000000000000000000000000000000",): (b"value:0", ()),
1229
(b"1111111111111111111111111111111111111111",): (b"value:1", ()),
1230
(b"2222222222222222222222222222222222222222",): (b"value:2", ()),
1231
(b"3333333333333333333333333333333333333333",): (b"value:3", ()),
1232
(b"4444444444444444444444444444444444444444",): (b"value:4", ()),
1233
}, dict(node.all_items()))
1235
def test_LeafNode_2_2(self):
1236
node_bytes = (b"type=leaf\n"
1237
b"00\x0000\x00\t00\x00ref00\x00value:0\n"
1238
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1239
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1240
b"11\x0044\x00\t11\x00ref00\x00value:4\n"
1243
node = btree_index._LeafNode(node_bytes, 2, 2)
1244
# We do direct access, or don't care about order, to leaf nodes most of
1245
# the time, so a dict is useful:
1247
(b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))),
1248
(b'00', b'11'): (b'value:1', (((b'00', b'ref00'),),
1249
((b'00', b'ref00'), (b'01', b'ref01')))),
1250
(b'11', b'33'): (b'value:3', (((b'11', b'ref22'),),
1251
((b'11', b'ref22'), (b'11', b'ref22')))),
1252
(b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),)))
1253
}, dict(node.all_items()))
1255
def test_InternalNode_1(self):
1256
node_bytes = (b"type=internal\n"
1258
b"0000000000000000000000000000000000000000\n"
1259
b"1111111111111111111111111111111111111111\n"
1260
b"2222222222222222222222222222222222222222\n"
1261
b"3333333333333333333333333333333333333333\n"
1262
b"4444444444444444444444444444444444444444\n"
1264
node = btree_index._InternalNode(node_bytes)
1265
# We want to bisect to find the right children from this node, so a
1266
# vector is most useful.
1268
(b"0000000000000000000000000000000000000000",),
1269
(b"1111111111111111111111111111111111111111",),
1270
(b"2222222222222222222222222222222222222222",),
1271
(b"3333333333333333333333333333333333333333",),
1272
(b"4444444444444444444444444444444444444444",),
1274
self.assertEqual(1, node.offset)
1276
def test_LeafNode_2_2(self):
1277
node_bytes = (b"type=leaf\n"
1278
b"00\x0000\x00\t00\x00ref00\x00value:0\n"
1279
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1280
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1281
b"11\x0044\x00\t11\x00ref00\x00value:4\n"
1284
node = btree_index._LeafNode(node_bytes, 2, 2)
1285
# We do direct access, or don't care about order, to leaf nodes most of
1286
# the time, so a dict is useful:
1288
(b'00', b'00'): (b'value:0', ((), ((b'00', b'ref00'),))),
1289
(b'00', b'11'): (b'value:1', (((b'00', b'ref00'),),
1290
((b'00', b'ref00'), (b'01', b'ref01')))),
1291
(b'11', b'33'): (b'value:3', (((b'11', b'ref22'),),
1292
((b'11', b'ref22'), (b'11', b'ref22')))),
1293
(b'11', b'44'): (b'value:4', ((), ((b'11', b'ref00'),)))
1294
}, dict(node.all_items()))
1296
def assertFlattened(self, expected, key, value, refs):
1297
flat_key, flat_line = self.parse_btree._flatten_node(
1298
(None, key, value, refs), bool(refs))
1299
self.assertEqual(b'\x00'.join(key), flat_key)
1300
self.assertEqual(expected, flat_line)
1302
def test__flatten_node(self):
1303
self.assertFlattened(b'key\0\0value\n', (b'key',), b'value', [])
1304
self.assertFlattened(b'key\0tuple\0\0value str\n',
1305
(b'key', b'tuple'), b'value str', [])
1306
self.assertFlattened(b'key\0tuple\0triple\0\0value str\n',
1307
(b'key', b'tuple', b'triple'), b'value str', [])
1308
self.assertFlattened(b'k\0t\0s\0ref\0value str\n',
1309
(b'k', b't', b's'), b'value str', [[(b'ref',)]])
1310
self.assertFlattened(b'key\0tuple\0ref\0key\0value str\n',
1311
(b'key', b'tuple'), b'value str', [[(b'ref', b'key')]])
1312
self.assertFlattened(b"00\x0000\x00\t00\x00ref00\x00value:0\n",
1313
(b'00', b'00'), b'value:0', ((), ((b'00', b'ref00'),)))
1314
self.assertFlattened(
1315
b"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1316
(b'00', b'11'), b'value:1',
1317
(((b'00', b'ref00'),), ((b'00', b'ref00'), (b'01', b'ref01'))))
1318
self.assertFlattened(
1319
b"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1320
(b'11', b'33'), b'value:3',
1321
(((b'11', b'ref22'),), ((b'11', b'ref22'), (b'11', b'ref22'))))
1322
self.assertFlattened(
1323
b"11\x0044\x00\t11\x00ref00\x00value:4\n",
1324
(b'11', b'44'), b'value:4', ((), ((b'11', b'ref00'),)))
1327
class TestCompiledBtree(tests.TestCase):
1329
def test_exists(self):
1330
# This is just to let the user know if they don't have the feature
1332
self.requireFeature(compiled_btreeparser_feature)
1335
class TestMultiBisectRight(tests.TestCase):
1337
def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1338
self.assertEqual(offsets,
1339
btree_index.BTreeGraphIndex._multi_bisect_right(
1340
search_keys, fixed_keys))
1342
def test_after(self):
1343
self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1344
self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1345
['e', 'f', 'g'], ['a', 'b', 'c'])
1347
def test_before(self):
1348
self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1349
self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1350
['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1352
def test_exact(self):
1353
self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1354
self.assertMultiBisectRight(
1355
[(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1356
self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1357
['a', 'c'], ['a', 'b', 'c'])
1359
def test_inbetween(self):
1360
self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1361
self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1362
['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1364
def test_mixed(self):
1365
self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1367
['a', 'b', 'd', 'e', 'g', 'h'],
1368
['c', 'd', 'f', 'g'])
1371
class TestExpandOffsets(tests.TestCase):
1373
def make_index(self, size, recommended_pages=None):
1374
"""Make an index with a generic size.
1376
This doesn't actually create anything on disk, it just primes a
1377
BTreeGraphIndex with the recommended information.
1379
index = btree_index.BTreeGraphIndex(
1380
transport.get_transport_from_url('memory:///'),
1381
'test-index', size=size)
1382
if recommended_pages is not None:
1383
index._recommended_pages = recommended_pages
1386
def set_cached_offsets(self, index, cached_offsets):
1387
"""Monkeypatch to give a canned answer for _get_offsets_for...()."""
1388
def _get_offsets_to_cached_pages():
1389
cached = set(cached_offsets)
1391
index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
1393
def prepare_index(self, index, node_ref_lists, key_length, key_count,
1394
row_lengths, cached_offsets):
1395
"""Setup the BTreeGraphIndex with some pre-canned information."""
1396
index.node_ref_lists = node_ref_lists
1397
index._key_length = key_length
1398
index._key_count = key_count
1399
index._row_lengths = row_lengths
1400
index._compute_row_offsets()
1401
index._root_node = btree_index._InternalNode(b'internal\noffset=0\n')
1402
self.set_cached_offsets(index, cached_offsets)
1404
def make_100_node_index(self):
1405
index = self.make_index(4096 * 100, 6)
1406
# Consider we've already made a single request at the middle
1407
self.prepare_index(index, node_ref_lists=0, key_length=1,
1408
key_count=1000, row_lengths=[1, 99],
1409
cached_offsets=[0, 50])
1412
def make_1000_node_index(self):
1413
index = self.make_index(4096 * 1000, 6)
1414
# Pretend we've already made a single request in the middle
1415
self.prepare_index(index, node_ref_lists=0, key_length=1,
1416
key_count=90000, row_lengths=[1, 9, 990],
1417
cached_offsets=[0, 5, 500])
1420
def assertNumPages(self, expected_pages, index, size):
1422
self.assertEqual(expected_pages, index._compute_total_pages_in_index())
1424
def assertExpandOffsets(self, expected, index, offsets):
1425
self.assertEqual(expected, index._expand_offsets(offsets),
1426
'We did not get the expected value after expanding'
1429
def test_default_recommended_pages(self):
1430
index = self.make_index(None)
1431
# local transport recommends 4096 byte reads, which is 1 page
1432
self.assertEqual(1, index._recommended_pages)
1434
def test__compute_total_pages_in_index(self):
1435
index = self.make_index(None)
1436
self.assertNumPages(1, index, 1024)
1437
self.assertNumPages(1, index, 4095)
1438
self.assertNumPages(1, index, 4096)
1439
self.assertNumPages(2, index, 4097)
1440
self.assertNumPages(2, index, 8192)
1441
self.assertNumPages(76, index, 4096 * 75 + 10)
1443
def test__find_layer_start_and_stop(self):
1444
index = self.make_1000_node_index()
1445
self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1446
self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1447
self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1448
self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1449
self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1450
self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1452
def test_unknown_size(self):
1453
# We should not expand if we don't know the file size
1454
index = self.make_index(None, 10)
1455
self.assertExpandOffsets([0], index, [0])
1456
self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
1458
def test_more_than_recommended(self):
1459
index = self.make_index(4096 * 100, 2)
1460
self.assertExpandOffsets([1, 10], index, [1, 10])
1461
self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
1463
def test_read_all_from_root(self):
1464
index = self.make_index(4096 * 10, 20)
1465
self.assertExpandOffsets(list(range(10)), index, [0])
1467
def test_read_all_when_cached(self):
1468
# We've read enough that we can grab all the rest in a single request
1469
index = self.make_index(4096 * 10, 5)
1470
self.prepare_index(index, node_ref_lists=0, key_length=1,
1471
key_count=1000, row_lengths=[1, 9],
1472
cached_offsets=[0, 1, 2, 5, 6])
1473
# It should fill the remaining nodes, regardless of the one requested
1474
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1475
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1476
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
1478
def test_no_root_node(self):
1479
index = self.make_index(4096 * 10, 5)
1480
self.assertExpandOffsets([0], index, [0])
1482
def test_include_neighbors(self):
1483
index = self.make_100_node_index()
1484
# We expand in both directions, until we have at least 'recommended'
1486
self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1487
self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
1488
# If we hit an 'edge' we continue in the other direction
1489
self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1490
self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
1492
# Requesting many nodes will expand all locations equally
1493
self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1494
self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
1497
def test_stop_at_cached(self):
1498
index = self.make_100_node_index()
1499
self.set_cached_offsets(index, [0, 10, 19])
1500
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1501
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1502
self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1503
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1504
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1505
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
1507
def test_cannot_fully_expand(self):
1508
index = self.make_100_node_index()
1509
self.set_cached_offsets(index, [0, 10, 12])
1510
# We don't go into an endless loop if we are bound by cached nodes
1511
self.assertExpandOffsets([11], index, [11])
1513
def test_overlap(self):
1514
index = self.make_100_node_index()
1515
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1516
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
1518
def test_stay_within_layer(self):
1519
index = self.make_1000_node_index()
1520
# When expanding a request, we won't read nodes from the next layer
1521
self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1522
self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1523
self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1524
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1525
self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
1527
self.set_cached_offsets(index, [0, 4, 12])
1528
self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1529
self.assertExpandOffsets([10, 11], index, [11])
1531
def test_small_requests_unexpanded(self):
1532
index = self.make_100_node_index()
1533
self.set_cached_offsets(index, [0])
1534
self.assertExpandOffsets([1], index, [1])
1535
self.assertExpandOffsets([50], index, [50])
1536
# If we request more than one node, then we'll expand
1537
self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
1539
# The first pass does not expand
1540
index = self.make_1000_node_index()
1541
self.set_cached_offsets(index, [0])
1542
self.assertExpandOffsets([1], index, [1])
1543
self.set_cached_offsets(index, [0, 1])
1544
self.assertExpandOffsets([100], index, [100])
1545
self.set_cached_offsets(index, [0, 1, 100])
1546
# But after the first depth, we will expand
1547
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1548
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1549
self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1550
self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,