1
# Copyright (C) 2008-2011, 2016 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for maps built on a CHK versionedfiles facility."""
28
from ..bzr.chk_map import (
34
from ..static_tuple import StaticTuple
37
class TestNode(tests.TestCase):
39
def assertCommonPrefix(self, expected_common, prefix, key):
40
common = Node.common_prefix(prefix, key)
41
self.assertTrue(len(common) <= len(prefix))
42
self.assertTrue(len(common) <= len(key))
43
self.assertStartsWith(prefix, common)
44
self.assertStartsWith(key, common)
45
self.assertEqual(expected_common, common)
47
def test_common_prefix(self):
48
self.assertCommonPrefix(b'beg', b'beg', b'begin')
50
def test_no_common_prefix(self):
51
self.assertCommonPrefix(b'', b'begin', b'end')
54
self.assertCommonPrefix(b'begin', b'begin', b'begin')
56
def test_not_a_prefix(self):
57
self.assertCommonPrefix(b'b', b'begin', b'b')
60
self.assertCommonPrefix(b'', b'', b'end')
61
self.assertCommonPrefix(b'', b'begin', b'')
62
self.assertCommonPrefix(b'', b'', b'')
65
class TestCaseWithStore(tests.TestCaseWithMemoryTransport):
67
def get_chk_bytes(self):
68
# This creates a standalone CHK store.
69
factory = groupcompress.make_pack_factory(False, False, 1)
70
self.chk_bytes = factory(self.get_transport())
73
def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
74
search_key_func=None):
76
chk_bytes = self.get_chk_bytes()
77
root_key = CHKMap.from_dict(chk_bytes, a_dict,
78
maximum_size=maximum_size, key_width=key_width,
79
search_key_func=search_key_func)
80
root_key2 = CHKMap._create_via_map(chk_bytes, a_dict,
81
maximum_size=maximum_size, key_width=key_width,
82
search_key_func=search_key_func)
83
self.assertEqual(root_key, root_key2, "CHKMap.from_dict() did not"
84
" match CHKMap._create_via_map")
85
chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func)
88
def read_bytes(self, chk_bytes, key):
89
stream = chk_bytes.get_record_stream([key], 'unordered', True)
91
if record.storage_kind == 'absent':
92
self.fail('Store does not contain the key %s' % (key,))
93
return record.get_bytes_as("fulltext")
95
def to_dict(self, node, *args):
96
return dict(node.iteritems(*args))
99
class TestCaseWithExampleMaps(TestCaseWithStore):
101
def get_chk_bytes(self):
102
if getattr(self, '_chk_bytes', None) is None:
103
self._chk_bytes = super(TestCaseWithExampleMaps,
104
self).get_chk_bytes()
105
return self._chk_bytes
107
def get_map(self, a_dict, maximum_size=100, search_key_func=None):
108
c_map = self._get_map(a_dict, maximum_size=maximum_size,
109
chk_bytes=self.get_chk_bytes(),
110
search_key_func=search_key_func)
113
def make_root_only_map(self, search_key_func=None):
114
return self.get_map({
115
(b'aaa',): b'initial aaa content',
116
(b'abb',): b'initial abb content',
117
}, search_key_func=search_key_func)
119
def make_root_only_aaa_ddd_map(self, search_key_func=None):
120
return self.get_map({
121
(b'aaa',): b'initial aaa content',
122
(b'ddd',): b'initial ddd content',
123
}, search_key_func=search_key_func)
125
def make_one_deep_map(self, search_key_func=None):
126
# Same as root_only_map, except it forces an InternalNode at the root
127
return self.get_map({
128
(b'aaa',): b'initial aaa content',
129
(b'abb',): b'initial abb content',
130
(b'ccc',): b'initial ccc content',
131
(b'ddd',): b'initial ddd content',
132
}, search_key_func=search_key_func)
134
def make_two_deep_map(self, search_key_func=None):
135
# Carefully chosen so that it creates a 2-deep map for both
136
# _search_key_plain and for _search_key_16
137
# Also so that things line up with make_one_deep_two_prefix_map
138
return self.get_map({
139
(b'aaa',): b'initial aaa content',
140
(b'abb',): b'initial abb content',
141
(b'acc',): b'initial acc content',
142
(b'ace',): b'initial ace content',
143
(b'add',): b'initial add content',
144
(b'adh',): b'initial adh content',
145
(b'adl',): b'initial adl content',
146
(b'ccc',): b'initial ccc content',
147
(b'ddd',): b'initial ddd content',
148
}, search_key_func=search_key_func)
150
def make_one_deep_two_prefix_map(self, search_key_func=None):
151
"""Create a map with one internal node, but references are extra long.
153
Otherwise has similar content to make_two_deep_map.
155
return self.get_map({
156
(b'aaa',): b'initial aaa content',
157
(b'add',): b'initial add content',
158
(b'adh',): b'initial adh content',
159
(b'adl',): b'initial adl content',
160
}, search_key_func=search_key_func)
162
def make_one_deep_one_prefix_map(self, search_key_func=None):
163
"""Create a map with one internal node, but references are extra long.
165
Similar to make_one_deep_two_prefix_map, except the split is at the
166
first char, rather than the second.
168
return self.get_map({
169
(b'add',): b'initial add content',
170
(b'adh',): b'initial adh content',
171
(b'adl',): b'initial adl content',
172
(b'bbb',): b'initial bbb content',
173
}, search_key_func=search_key_func)
176
class TestTestCaseWithExampleMaps(TestCaseWithExampleMaps):
177
"""Actual tests for the provided examples."""
179
def test_root_only_map_plain(self):
180
c_map = self.make_root_only_map()
181
self.assertEqualDiff(
183
" ('aaa',) 'initial aaa content'\n"
184
" ('abb',) 'initial abb content'\n",
187
def test_root_only_map_16(self):
188
c_map = self.make_root_only_map(search_key_func=chk_map._search_key_16)
189
self.assertEqualDiff(
191
" ('aaa',) 'initial aaa content'\n"
192
" ('abb',) 'initial abb content'\n",
195
def test_one_deep_map_plain(self):
196
c_map = self.make_one_deep_map()
197
self.assertEqualDiff(
200
" ('aaa',) 'initial aaa content'\n"
201
" ('abb',) 'initial abb content'\n"
203
" ('ccc',) 'initial ccc content'\n"
205
" ('ddd',) 'initial ddd content'\n",
208
def test_one_deep_map_16(self):
209
c_map = self.make_one_deep_map(search_key_func=chk_map._search_key_16)
210
self.assertEqualDiff(
213
" ('ccc',) 'initial ccc content'\n"
215
" ('abb',) 'initial abb content'\n"
217
" ('aaa',) 'initial aaa content'\n"
218
" ('ddd',) 'initial ddd content'\n",
221
def test_root_only_aaa_ddd_plain(self):
222
c_map = self.make_root_only_aaa_ddd_map()
223
self.assertEqualDiff(
225
" ('aaa',) 'initial aaa content'\n"
226
" ('ddd',) 'initial ddd content'\n",
229
def test_root_only_aaa_ddd_16(self):
230
c_map = self.make_root_only_aaa_ddd_map(
231
search_key_func=chk_map._search_key_16)
232
# We use 'aaa' and 'ddd' because they happen to map to 'F' when using
234
self.assertEqualDiff(
236
" ('aaa',) 'initial aaa content'\n"
237
" ('ddd',) 'initial ddd content'\n",
240
def test_two_deep_map_plain(self):
241
c_map = self.make_two_deep_map()
242
self.assertEqualDiff(
244
" 'a' InternalNode\n"
246
" ('aaa',) 'initial aaa content'\n"
248
" ('abb',) 'initial abb content'\n"
250
" ('acc',) 'initial acc content'\n"
251
" ('ace',) 'initial ace content'\n"
253
" ('add',) 'initial add content'\n"
254
" ('adh',) 'initial adh content'\n"
255
" ('adl',) 'initial adl content'\n"
257
" ('ccc',) 'initial ccc content'\n"
259
" ('ddd',) 'initial ddd content'\n",
262
def test_two_deep_map_16(self):
263
c_map = self.make_two_deep_map(search_key_func=chk_map._search_key_16)
264
self.assertEqualDiff(
267
" ('acc',) 'initial acc content'\n"
268
" ('ccc',) 'initial ccc content'\n"
270
" ('abb',) 'initial abb content'\n"
272
" ('ace',) 'initial ace content'\n"
273
" 'F' InternalNode\n"
275
" ('aaa',) 'initial aaa content'\n"
277
" ('adl',) 'initial adl content'\n"
279
" ('adh',) 'initial adh content'\n"
281
" ('ddd',) 'initial ddd content'\n"
283
" ('add',) 'initial add content'\n",
286
def test_one_deep_two_prefix_map_plain(self):
287
c_map = self.make_one_deep_two_prefix_map()
288
self.assertEqualDiff(
291
" ('aaa',) 'initial aaa content'\n"
293
" ('add',) 'initial add content'\n"
294
" ('adh',) 'initial adh content'\n"
295
" ('adl',) 'initial adl content'\n",
298
def test_one_deep_two_prefix_map_16(self):
299
c_map = self.make_one_deep_two_prefix_map(
300
search_key_func=chk_map._search_key_16)
301
self.assertEqualDiff(
304
" ('aaa',) 'initial aaa content'\n"
306
" ('adl',) 'initial adl content'\n"
308
" ('adh',) 'initial adh content'\n"
310
" ('add',) 'initial add content'\n",
313
def test_one_deep_one_prefix_map_plain(self):
314
c_map = self.make_one_deep_one_prefix_map()
315
self.assertEqualDiff(
318
" ('add',) 'initial add content'\n"
319
" ('adh',) 'initial adh content'\n"
320
" ('adl',) 'initial adl content'\n"
322
" ('bbb',) 'initial bbb content'\n",
325
def test_one_deep_one_prefix_map_16(self):
326
c_map = self.make_one_deep_one_prefix_map(
327
search_key_func=chk_map._search_key_16)
328
self.assertEqualDiff(
331
" ('bbb',) 'initial bbb content'\n"
333
" ('add',) 'initial add content'\n"
334
" ('adh',) 'initial adh content'\n"
335
" ('adl',) 'initial adl content'\n",
339
class TestMap(TestCaseWithStore):
341
def assertHasABMap(self, chk_bytes):
342
ab_leaf_bytes = b'chkleaf:\n0\n1\n1\na\n\x001\nb\n'
343
ab_sha1 = osutils.sha_string(ab_leaf_bytes)
344
self.assertEqual(b'90986195696b177c8895d48fdb4b7f2366f798a0', ab_sha1)
345
root_key = (b'sha1:' + ab_sha1,)
346
self.assertEqual(ab_leaf_bytes, self.read_bytes(chk_bytes, root_key))
349
def assertHasEmptyMap(self, chk_bytes):
350
empty_leaf_bytes = b'chkleaf:\n0\n1\n0\n\n'
351
empty_sha1 = osutils.sha_string(empty_leaf_bytes)
352
self.assertEqual(b'8571e09bf1bcc5b9621ce31b3d4c93d6e9a1ed26', empty_sha1)
353
root_key = (b'sha1:' + empty_sha1,)
354
self.assertEqual(empty_leaf_bytes, self.read_bytes(chk_bytes, root_key))
357
def assertMapLayoutEqual(self, map_one, map_two):
358
"""Assert that the internal structure is identical between the maps."""
359
map_one._ensure_root()
360
node_one_stack = [map_one._root_node]
361
map_two._ensure_root()
362
node_two_stack = [map_two._root_node]
363
while node_one_stack:
364
node_one = node_one_stack.pop()
365
node_two = node_two_stack.pop()
366
if node_one.__class__ != node_two.__class__:
367
self.assertEqualDiff(map_one._dump_tree(include_keys=True),
368
map_two._dump_tree(include_keys=True))
369
self.assertEqual(node_one._search_prefix,
370
node_two._search_prefix)
371
if isinstance(node_one, InternalNode):
372
# Internal nodes must have identical references
373
self.assertEqual(sorted(node_one._items.keys()),
374
sorted(node_two._items.keys()))
375
node_one_stack.extend(sorted(
376
[n for n, _ in node_one._iter_nodes(map_one._store)],
377
key=lambda a: a._search_prefix))
378
node_two_stack.extend(sorted(
379
[n for n, _ in node_two._iter_nodes(map_two._store)],
380
key=lambda a: a._search_prefix))
382
# Leaf nodes must have identical contents
383
self.assertEqual(node_one._items, node_two._items)
384
self.assertEqual([], node_two_stack)
386
def assertCanonicalForm(self, chkmap):
387
"""Assert that the chkmap is in 'canonical' form.
389
We do this by adding all of the key value pairs from scratch, both in
390
forward order and reverse order, and assert that the final tree layout
393
items = list(chkmap.iteritems())
394
map_forward = chk_map.CHKMap(None, None)
395
map_forward._root_node.set_maximum_size(chkmap._root_node.maximum_size)
396
for key, value in items:
397
map_forward.map(key, value)
398
self.assertMapLayoutEqual(map_forward, chkmap)
399
map_reverse = chk_map.CHKMap(None, None)
400
map_reverse._root_node.set_maximum_size(chkmap._root_node.maximum_size)
401
for key, value in reversed(items):
402
map_reverse.map(key, value)
403
self.assertMapLayoutEqual(map_reverse, chkmap)
405
def test_assert_map_layout_equal(self):
406
store = self.get_chk_bytes()
407
map_one = CHKMap(store, None)
408
map_one._root_node.set_maximum_size(20)
409
map_two = CHKMap(store, None)
410
map_two._root_node.set_maximum_size(20)
411
self.assertMapLayoutEqual(map_one, map_two)
412
map_one.map((b'aaa', ), b'value')
413
self.assertRaises(AssertionError,
414
self.assertMapLayoutEqual, map_one, map_two)
415
map_two.map((b'aaa', ), b'value')
416
self.assertMapLayoutEqual(map_one, map_two)
417
# Split the tree, so we ensure that internal nodes and leaf nodes are
419
map_one.map((b'aab', ), b'value')
420
self.assertIsInstance(map_one._root_node, InternalNode)
421
self.assertRaises(AssertionError,
422
self.assertMapLayoutEqual, map_one, map_two)
423
map_two.map((b'aab', ), b'value')
424
self.assertMapLayoutEqual(map_one, map_two)
425
map_one.map((b'aac', ), b'value')
426
self.assertRaises(AssertionError,
427
self.assertMapLayoutEqual, map_one, map_two)
428
self.assertCanonicalForm(map_one)
430
def test_from_dict_empty(self):
431
chk_bytes = self.get_chk_bytes()
432
root_key = CHKMap.from_dict(chk_bytes, {})
433
# Check the data was saved and inserted correctly.
434
expected_root_key = self.assertHasEmptyMap(chk_bytes)
435
self.assertEqual(expected_root_key, root_key)
437
def test_from_dict_ab(self):
438
chk_bytes = self.get_chk_bytes()
439
root_key = CHKMap.from_dict(chk_bytes, {(b"a", ): b"b"})
440
# Check the data was saved and inserted correctly.
441
expected_root_key = self.assertHasABMap(chk_bytes)
442
self.assertEqual(expected_root_key, root_key)
444
def test_apply_empty_ab(self):
445
# applying a delta (None, "a", "b") to an empty chkmap generates the
446
# same map as from_dict_ab.
447
chk_bytes = self.get_chk_bytes()
448
root_key = CHKMap.from_dict(chk_bytes, {})
449
chkmap = CHKMap(chk_bytes, root_key)
450
new_root = chkmap.apply_delta([(None, (b"a", ), b"b")])
451
# Check the data was saved and inserted correctly.
452
expected_root_key = self.assertHasABMap(chk_bytes)
453
self.assertEqual(expected_root_key, new_root)
454
# The update should have left us with an in memory root node, with an
456
self.assertEqual(new_root, chkmap._root_node._key)
458
def test_apply_ab_empty(self):
459
# applying a delta ("a", None, None) to a map with 'a' in it generates
461
chk_bytes = self.get_chk_bytes()
462
root_key = CHKMap.from_dict(chk_bytes, {(b"a",):b"b"})
463
chkmap = CHKMap(chk_bytes, root_key)
464
new_root = chkmap.apply_delta([((b"a",), None, None)])
465
# Check the data was saved and inserted correctly.
466
expected_root_key = self.assertHasEmptyMap(chk_bytes)
467
self.assertEqual(expected_root_key, new_root)
468
# The update should have left us with an in memory root node, with an
470
self.assertEqual(new_root, chkmap._root_node._key)
472
def test_apply_delete_to_internal_node(self):
473
# applying a delta should be convert an internal root node to a leaf
474
# node if the delta shrinks the map enough.
475
store = self.get_chk_bytes()
476
chkmap = CHKMap(store, None)
477
# Add three items: 2 small enough to fit in one node, and one huge to
478
# force multiple nodes.
479
chkmap._root_node.set_maximum_size(100)
480
chkmap.map((b'small',), b'value')
481
chkmap.map((b'little',), b'value')
482
chkmap.map((b'very-big',), b'x' * 100)
483
# (Check that we have constructed the scenario we want to test)
484
self.assertIsInstance(chkmap._root_node, InternalNode)
485
# Delete the huge item so that the map fits in one node again.
486
delta = [((b'very-big',), None, None)]
487
chkmap.apply_delta(delta)
488
self.assertCanonicalForm(chkmap)
489
self.assertIsInstance(chkmap._root_node, LeafNode)
491
def test_apply_new_keys_must_be_new(self):
492
# applying a delta (None, "a", "b") to a map with 'a' in it generates
494
chk_bytes = self.get_chk_bytes()
495
root_key = CHKMap.from_dict(chk_bytes, {(b"a",):b"b"})
496
chkmap = CHKMap(chk_bytes, root_key)
497
self.assertRaises(errors.InconsistentDelta, chkmap.apply_delta,
498
[(None, (b"a",), b"b")])
499
# As an error occured, the update should have left us without changing
500
# anything (the root should be unchanged).
501
self.assertEqual(root_key, chkmap._root_node._key)
503
def test_apply_delta_is_deterministic(self):
504
chk_bytes = self.get_chk_bytes()
505
chkmap1 = CHKMap(chk_bytes, None)
506
chkmap1._root_node.set_maximum_size(10)
507
chkmap1.apply_delta([(None, (b'aaa',), b'common'),
508
(None, (b'bba',), b'target2'),
509
(None, (b'bbb',), b'common')])
510
root_key1 = chkmap1._save()
511
self.assertCanonicalForm(chkmap1)
513
chkmap2 = CHKMap(chk_bytes, None)
514
chkmap2._root_node.set_maximum_size(10)
515
chkmap2.apply_delta([(None, (b'bbb',), b'common'),
516
(None, (b'bba',), b'target2'),
517
(None, (b'aaa',), b'common')])
518
root_key2 = chkmap2._save()
519
self.assertEqualDiff(chkmap1._dump_tree(include_keys=True),
520
chkmap2._dump_tree(include_keys=True))
521
self.assertEqual(root_key1, root_key2)
522
self.assertCanonicalForm(chkmap2)
524
def test_stable_splitting(self):
525
store = self.get_chk_bytes()
526
chkmap = CHKMap(store, None)
527
# Should fit 2 keys per LeafNode
528
chkmap._root_node.set_maximum_size(35)
529
chkmap.map((b'aaa',), b'v')
530
self.assertEqualDiff("'' LeafNode\n"
533
chkmap.map((b'aab',), b'v')
534
self.assertEqualDiff("'' LeafNode\n"
538
self.assertCanonicalForm(chkmap)
540
# Creates a new internal node, and splits the others into leaves
541
chkmap.map((b'aac',), b'v')
542
self.assertEqualDiff("'' InternalNode\n"
550
self.assertCanonicalForm(chkmap)
552
# Splits again, because it can't fit in the current structure
553
chkmap.map((b'bbb',), b'v')
554
self.assertEqualDiff("'' InternalNode\n"
555
" 'a' InternalNode\n"
565
self.assertCanonicalForm(chkmap)
567
def test_map_splits_with_longer_key(self):
568
store = self.get_chk_bytes()
569
chkmap = CHKMap(store, None)
570
# Should fit 1 key per LeafNode
571
chkmap._root_node.set_maximum_size(10)
572
chkmap.map((b'aaa',), b'v')
573
chkmap.map((b'aaaa',), b'v')
574
self.assertCanonicalForm(chkmap)
575
self.assertIsInstance(chkmap._root_node, InternalNode)
577
def test_with_linefeed_in_key(self):
578
store = self.get_chk_bytes()
579
chkmap = CHKMap(store, None)
580
# Should fit 1 key per LeafNode
581
chkmap._root_node.set_maximum_size(10)
582
chkmap.map((b'a\ra',), b'val1')
583
chkmap.map((b'a\rb',), b'val2')
584
chkmap.map((b'ac',), b'val3')
585
self.assertCanonicalForm(chkmap)
586
self.assertEqualDiff("'' InternalNode\n"
587
" 'a\\r' InternalNode\n"
588
" 'a\\ra' LeafNode\n"
589
" ('a\\ra',) 'val1'\n"
590
" 'a\\rb' LeafNode\n"
591
" ('a\\rb',) 'val2'\n"
595
# We should also successfully serialise and deserialise these items
596
root_key = chkmap._save()
597
chkmap = CHKMap(store, root_key)
598
self.assertEqualDiff("'' InternalNode\n"
599
" 'a\\r' InternalNode\n"
600
" 'a\\ra' LeafNode\n"
601
" ('a\\ra',) 'val1'\n"
602
" 'a\\rb' LeafNode\n"
603
" ('a\\rb',) 'val2'\n"
608
def test_deep_splitting(self):
609
store = self.get_chk_bytes()
610
chkmap = CHKMap(store, None)
611
# Should fit 2 keys per LeafNode
612
chkmap._root_node.set_maximum_size(40)
613
chkmap.map((b'aaaaaaaa',), b'v')
614
chkmap.map((b'aaaaabaa',), b'v')
615
self.assertEqualDiff("'' LeafNode\n"
616
" ('aaaaaaaa',) 'v'\n"
617
" ('aaaaabaa',) 'v'\n",
619
chkmap.map((b'aaabaaaa',), b'v')
620
chkmap.map((b'aaababaa',), b'v')
621
self.assertEqualDiff("'' InternalNode\n"
623
" ('aaaaaaaa',) 'v'\n"
624
" ('aaaaabaa',) 'v'\n"
626
" ('aaabaaaa',) 'v'\n"
627
" ('aaababaa',) 'v'\n",
629
chkmap.map((b'aaabacaa',), b'v')
630
chkmap.map((b'aaabadaa',), b'v')
631
self.assertEqualDiff("'' InternalNode\n"
633
" ('aaaaaaaa',) 'v'\n"
634
" ('aaaaabaa',) 'v'\n"
635
" 'aaab' InternalNode\n"
636
" 'aaabaa' LeafNode\n"
637
" ('aaabaaaa',) 'v'\n"
638
" 'aaabab' LeafNode\n"
639
" ('aaababaa',) 'v'\n"
640
" 'aaabac' LeafNode\n"
641
" ('aaabacaa',) 'v'\n"
642
" 'aaabad' LeafNode\n"
643
" ('aaabadaa',) 'v'\n",
645
chkmap.map((b'aaababba',), b'val')
646
chkmap.map((b'aaababca',), b'val')
647
self.assertEqualDiff("'' InternalNode\n"
649
" ('aaaaaaaa',) 'v'\n"
650
" ('aaaaabaa',) 'v'\n"
651
" 'aaab' InternalNode\n"
652
" 'aaabaa' LeafNode\n"
653
" ('aaabaaaa',) 'v'\n"
654
" 'aaabab' InternalNode\n"
655
" 'aaababa' LeafNode\n"
656
" ('aaababaa',) 'v'\n"
657
" 'aaababb' LeafNode\n"
658
" ('aaababba',) 'val'\n"
659
" 'aaababc' LeafNode\n"
660
" ('aaababca',) 'val'\n"
661
" 'aaabac' LeafNode\n"
662
" ('aaabacaa',) 'v'\n"
663
" 'aaabad' LeafNode\n"
664
" ('aaabadaa',) 'v'\n",
666
# Now we add a node that should fit around an existing InternalNode,
667
# but has a slightly different key prefix, which causes a new
669
chkmap.map((b'aaabDaaa',), b'v')
670
self.assertEqualDiff("'' InternalNode\n"
672
" ('aaaaaaaa',) 'v'\n"
673
" ('aaaaabaa',) 'v'\n"
674
" 'aaab' InternalNode\n"
675
" 'aaabD' LeafNode\n"
676
" ('aaabDaaa',) 'v'\n"
677
" 'aaaba' InternalNode\n"
678
" 'aaabaa' LeafNode\n"
679
" ('aaabaaaa',) 'v'\n"
680
" 'aaabab' InternalNode\n"
681
" 'aaababa' LeafNode\n"
682
" ('aaababaa',) 'v'\n"
683
" 'aaababb' LeafNode\n"
684
" ('aaababba',) 'val'\n"
685
" 'aaababc' LeafNode\n"
686
" ('aaababca',) 'val'\n"
687
" 'aaabac' LeafNode\n"
688
" ('aaabacaa',) 'v'\n"
689
" 'aaabad' LeafNode\n"
690
" ('aaabadaa',) 'v'\n",
693
def test_map_collapses_if_size_changes(self):
694
store = self.get_chk_bytes()
695
chkmap = CHKMap(store, None)
696
# Should fit 2 keys per LeafNode
697
chkmap._root_node.set_maximum_size(35)
698
chkmap.map((b'aaa',), b'v')
699
chkmap.map((b'aab',), b'very long value that splits')
700
self.assertEqualDiff("'' InternalNode\n"
704
" ('aab',) 'very long value that splits'\n",
706
self.assertCanonicalForm(chkmap)
707
# Now changing the value to something small should cause a rebuild
708
chkmap.map((b'aab',), b'v')
709
self.assertEqualDiff("'' LeafNode\n"
713
self.assertCanonicalForm(chkmap)
715
def test_map_double_deep_collapses(self):
716
store = self.get_chk_bytes()
717
chkmap = CHKMap(store, None)
718
# Should fit 3 small keys per LeafNode
719
chkmap._root_node.set_maximum_size(40)
720
chkmap.map((b'aaa',), b'v')
721
chkmap.map((b'aab',), b'very long value that splits')
722
chkmap.map((b'abc',), b'v')
723
self.assertEqualDiff("'' InternalNode\n"
724
" 'aa' InternalNode\n"
728
" ('aab',) 'very long value that splits'\n"
732
chkmap.map((b'aab',), b'v')
733
self.assertCanonicalForm(chkmap)
734
self.assertEqualDiff("'' LeafNode\n"
740
def test_stable_unmap(self):
741
store = self.get_chk_bytes()
742
chkmap = CHKMap(store, None)
743
# Should fit 2 keys per LeafNode
744
chkmap._root_node.set_maximum_size(35)
745
chkmap.map((b'aaa',), b'v')
746
chkmap.map((b'aab',), b'v')
747
self.assertEqualDiff("'' LeafNode\n"
751
# Creates a new internal node, and splits the others into leaves
752
chkmap.map((b'aac',), b'v')
753
self.assertEqualDiff("'' InternalNode\n"
761
self.assertCanonicalForm(chkmap)
762
# Now lets unmap one of the keys, and assert that we collapse the
764
chkmap.unmap((b'aac',))
765
self.assertEqualDiff("'' LeafNode\n"
769
self.assertCanonicalForm(chkmap)
771
def test_unmap_double_deep(self):
772
store = self.get_chk_bytes()
773
chkmap = CHKMap(store, None)
774
# Should fit 3 keys per LeafNode
775
chkmap._root_node.set_maximum_size(40)
776
chkmap.map((b'aaa',), b'v')
777
chkmap.map((b'aaab',), b'v')
778
chkmap.map((b'aab',), b'very long value')
779
chkmap.map((b'abc',), b'v')
780
self.assertEqualDiff("'' InternalNode\n"
781
" 'aa' InternalNode\n"
786
" ('aab',) 'very long value'\n"
790
# Removing the 'aab' key should cause everything to collapse back to a
792
chkmap.unmap((b'aab',))
793
self.assertEqualDiff("'' LeafNode\n"
799
def test_unmap_double_deep_non_empty_leaf(self):
800
store = self.get_chk_bytes()
801
chkmap = CHKMap(store, None)
802
# Should fit 3 keys per LeafNode
803
chkmap._root_node.set_maximum_size(40)
804
chkmap.map((b'aaa',), b'v')
805
chkmap.map((b'aab',), b'long value')
806
chkmap.map((b'aabb',), b'v')
807
chkmap.map((b'abc',), b'v')
808
self.assertEqualDiff("'' InternalNode\n"
809
" 'aa' InternalNode\n"
813
" ('aab',) 'long value'\n"
818
# Removing the 'aab' key should cause everything to collapse back to a
820
chkmap.unmap((b'aab',))
821
self.assertEqualDiff("'' LeafNode\n"
827
def test_unmap_with_known_internal_node_doesnt_page(self):
828
store = self.get_chk_bytes()
829
chkmap = CHKMap(store, None)
830
# Should fit 3 keys per LeafNode
831
chkmap._root_node.set_maximum_size(30)
832
chkmap.map((b'aaa',), b'v')
833
chkmap.map((b'aab',), b'v')
834
chkmap.map((b'aac',), b'v')
835
chkmap.map((b'abc',), b'v')
836
chkmap.map((b'acd',), b'v')
837
self.assertEqualDiff("'' InternalNode\n"
838
" 'aa' InternalNode\n"
850
# Save everything to the map, and start over
851
chkmap = CHKMap(store, chkmap._save())
852
# Mapping an 'aa' key loads the internal node, but should not map the
853
# 'ab' and 'ac' nodes
854
chkmap.map((b'aad',), b'v')
855
self.assertIsInstance(chkmap._root_node._items[b'aa'], InternalNode)
856
self.assertIsInstance(chkmap._root_node._items[b'ab'], StaticTuple)
857
self.assertIsInstance(chkmap._root_node._items[b'ac'], StaticTuple)
858
# Unmapping 'acd' can notice that 'aa' is an InternalNode and not have
860
chkmap.unmap((b'acd',))
861
self.assertIsInstance(chkmap._root_node._items[b'aa'], InternalNode)
862
self.assertIsInstance(chkmap._root_node._items[b'ab'], StaticTuple)
864
def test_unmap_without_fitting_doesnt_page_in(self):
865
store = self.get_chk_bytes()
866
chkmap = CHKMap(store, None)
867
# Should fit 2 keys per LeafNode
868
chkmap._root_node.set_maximum_size(20)
869
chkmap.map((b'aaa',), b'v')
870
chkmap.map((b'aab',), b'v')
871
self.assertEqualDiff("'' InternalNode\n"
877
# Save everything to the map, and start over
878
chkmap = CHKMap(store, chkmap._save())
879
chkmap.map((b'aac',), b'v')
880
chkmap.map((b'aad',), b'v')
881
chkmap.map((b'aae',), b'v')
882
chkmap.map((b'aaf',), b'v')
883
# At this point, the previous nodes should not be paged in, but the
884
# newly added nodes would be
885
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
886
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
887
self.assertIsInstance(chkmap._root_node._items[b'aac'], LeafNode)
888
self.assertIsInstance(chkmap._root_node._items[b'aad'], LeafNode)
889
self.assertIsInstance(chkmap._root_node._items[b'aae'], LeafNode)
890
self.assertIsInstance(chkmap._root_node._items[b'aaf'], LeafNode)
891
# Now unmapping one of the new nodes will use only the already-paged-in
892
# nodes to determine that we don't need to do more.
893
chkmap.unmap((b'aaf',))
894
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
895
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
896
self.assertIsInstance(chkmap._root_node._items[b'aac'], LeafNode)
897
self.assertIsInstance(chkmap._root_node._items[b'aad'], LeafNode)
898
self.assertIsInstance(chkmap._root_node._items[b'aae'], LeafNode)
900
def test_unmap_pages_in_if_necessary(self):
901
store = self.get_chk_bytes()
902
chkmap = CHKMap(store, None)
903
# Should fit 2 keys per LeafNode
904
chkmap._root_node.set_maximum_size(30)
905
chkmap.map((b'aaa',), b'val')
906
chkmap.map((b'aab',), b'val')
907
chkmap.map((b'aac',), b'val')
908
self.assertEqualDiff("'' InternalNode\n"
916
root_key = chkmap._save()
917
# Save everything to the map, and start over
918
chkmap = CHKMap(store, root_key)
919
chkmap.map((b'aad',), b'v')
920
# At this point, the previous nodes should not be paged in, but the
921
# newly added node would be
922
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
923
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
924
self.assertIsInstance(chkmap._root_node._items[b'aac'], StaticTuple)
925
self.assertIsInstance(chkmap._root_node._items[b'aad'], LeafNode)
926
# Unmapping the new node will check the existing nodes to see if they
928
# Clear the page cache so we ensure we have to read all the children
929
chk_map.clear_cache()
930
chkmap.unmap((b'aad',))
931
self.assertIsInstance(chkmap._root_node._items[b'aaa'], LeafNode)
932
self.assertIsInstance(chkmap._root_node._items[b'aab'], LeafNode)
933
self.assertIsInstance(chkmap._root_node._items[b'aac'], LeafNode)
935
def test_unmap_pages_in_from_page_cache(self):
936
store = self.get_chk_bytes()
937
chkmap = CHKMap(store, None)
938
# Should fit 2 keys per LeafNode
939
chkmap._root_node.set_maximum_size(30)
940
chkmap.map((b'aaa',), b'val')
941
chkmap.map((b'aab',), b'val')
942
chkmap.map((b'aac',), b'val')
943
root_key = chkmap._save()
944
# Save everything to the map, and start over
945
chkmap = CHKMap(store, root_key)
946
chkmap.map((b'aad',), b'val')
947
self.assertEqualDiff("'' InternalNode\n"
957
# Save everything to the map, start over after _dump_tree
958
chkmap = CHKMap(store, root_key)
959
chkmap.map((b'aad',), b'v')
960
# At this point, the previous nodes should not be paged in, but the
961
# newly added node would be
962
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
963
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
964
self.assertIsInstance(chkmap._root_node._items[b'aac'], StaticTuple)
965
self.assertIsInstance(chkmap._root_node._items[b'aad'], LeafNode)
966
# Now clear the page cache, and only include 2 of the children in the
968
aab_key = chkmap._root_node._items[b'aab']
969
aab_bytes = chk_map._get_cache()[aab_key]
970
aac_key = chkmap._root_node._items[b'aac']
971
aac_bytes = chk_map._get_cache()[aac_key]
972
chk_map.clear_cache()
973
chk_map._get_cache()[aab_key] = aab_bytes
974
chk_map._get_cache()[aac_key] = aac_bytes
976
# Unmapping the new node will check the nodes from the page cache
977
# first, and not have to read in 'aaa'
978
chkmap.unmap((b'aad',))
979
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
980
self.assertIsInstance(chkmap._root_node._items[b'aab'], LeafNode)
981
self.assertIsInstance(chkmap._root_node._items[b'aac'], LeafNode)
983
def test_unmap_uses_existing_items(self):
984
store = self.get_chk_bytes()
985
chkmap = CHKMap(store, None)
986
# Should fit 2 keys per LeafNode
987
chkmap._root_node.set_maximum_size(30)
988
chkmap.map((b'aaa',), b'val')
989
chkmap.map((b'aab',), b'val')
990
chkmap.map((b'aac',), b'val')
991
root_key = chkmap._save()
992
# Save everything to the map, and start over
993
chkmap = CHKMap(store, root_key)
994
chkmap.map((b'aad',), b'val')
995
chkmap.map((b'aae',), b'val')
996
chkmap.map((b'aaf',), b'val')
997
# At this point, the previous nodes should not be paged in, but the
998
# newly added node would be
999
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
1000
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
1001
self.assertIsInstance(chkmap._root_node._items[b'aac'], StaticTuple)
1002
self.assertIsInstance(chkmap._root_node._items[b'aad'], LeafNode)
1003
self.assertIsInstance(chkmap._root_node._items[b'aae'], LeafNode)
1004
self.assertIsInstance(chkmap._root_node._items[b'aaf'], LeafNode)
1006
# Unmapping a new node will see the other nodes that are already in
1007
# memory, and not need to page in anything else
1008
chkmap.unmap((b'aad',))
1009
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
1010
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
1011
self.assertIsInstance(chkmap._root_node._items[b'aac'], StaticTuple)
1012
self.assertIsInstance(chkmap._root_node._items[b'aae'], LeafNode)
1013
self.assertIsInstance(chkmap._root_node._items[b'aaf'], LeafNode)
1015
def test_iter_changes_empty_ab(self):
1016
# Asking for changes between an empty dict to a dict with keys returns
1018
basis = self._get_map({}, maximum_size=10)
1019
target = self._get_map(
1020
{(b'a',): b'content here', (b'b',): b'more content'},
1021
chk_bytes=basis._store, maximum_size=10)
1022
self.assertEqual([((b'a',), None, b'content here'),
1023
((b'b',), None, b'more content')],
1024
sorted(list(target.iter_changes(basis))))
1026
def test_iter_changes_ab_empty(self):
1027
# Asking for changes between a dict with keys to an empty dict returns
1029
basis = self._get_map({(b'a',): b'content here', (b'b',): b'more content'},
1031
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1032
self.assertEqual([((b'a',), b'content here', None),
1033
((b'b',), b'more content', None)],
1034
sorted(list(target.iter_changes(basis))))
1036
def test_iter_changes_empty_empty_is_empty(self):
1037
basis = self._get_map({}, maximum_size=10)
1038
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1039
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1041
def test_iter_changes_ab_ab_is_empty(self):
1042
basis = self._get_map({(b'a',): b'content here', (b'b',): b'more content'},
1044
target = self._get_map(
1045
{(b'a',): b'content here', (b'b',): b'more content'},
1046
chk_bytes=basis._store, maximum_size=10)
1047
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1049
def test_iter_changes_ab_ab_nodes_not_loaded(self):
1050
basis = self._get_map({(b'a',): b'content here', (b'b',): b'more content'},
1052
target = self._get_map(
1053
{(b'a',): b'content here', (b'b',): b'more content'},
1054
chk_bytes=basis._store, maximum_size=10)
1055
list(target.iter_changes(basis))
1056
self.assertIsInstance(target._root_node, StaticTuple)
1057
self.assertIsInstance(basis._root_node, StaticTuple)
1059
def test_iter_changes_ab_ab_changed_values_shown(self):
1060
basis = self._get_map({(b'a',): b'content here', (b'b',): b'more content'},
1062
target = self._get_map(
1063
{(b'a',): b'content here', (b'b',): b'different content'},
1064
chk_bytes=basis._store, maximum_size=10)
1065
result = sorted(list(target.iter_changes(basis)))
1066
self.assertEqual([((b'b',), b'more content', b'different content')],
1069
def test_iter_changes_mixed_node_length(self):
1070
# When one side has different node lengths than the other, common
1071
# but different keys still need to be show, and new-and-old included
1073
# aaa - common unaltered
1074
# aab - common altered
1078
# aaa to be not loaded (later test)
1079
# aab, b, at to be returned.
1080
# basis splits at byte 0,1,2, aaa is commonb is basis only
1081
basis_dict = {(b'aaa',): b'foo bar',
1082
(b'aab',): b'common altered a', (b'b',): b'foo bar b'}
1083
# target splits at byte 1,2, at is target only
1084
target_dict = {(b'aaa',): b'foo bar',
1085
(b'aab',): b'common altered b', (b'at',): b'foo bar t'}
1087
((b'aab',), b'common altered a', b'common altered b'),
1088
((b'at',), None, b'foo bar t'),
1089
((b'b',), b'foo bar b', None),
1091
basis = self._get_map(basis_dict, maximum_size=10)
1092
target = self._get_map(target_dict, maximum_size=10,
1093
chk_bytes=basis._store)
1094
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1096
def test_iter_changes_common_pages_not_loaded(self):
1097
# aaa - common unaltered
1098
# aab - common altered
1102
# aaa to be not loaded
1103
# aaa not to be in result.
1104
basis_dict = {(b'aaa',): b'foo bar',
1105
(b'aab',): b'common altered a', (b'b',): b'foo bar b'}
1106
# target splits at byte 1, at is target only
1107
target_dict = {(b'aaa',): b'foo bar',
1108
(b'aab',): b'common altered b', (b'at',): b'foo bar t'}
1109
basis = self._get_map(basis_dict, maximum_size=10)
1110
target = self._get_map(target_dict, maximum_size=10,
1111
chk_bytes=basis._store)
1112
basis_get = basis._store.get_record_stream
1113
def get_record_stream(keys, order, fulltext):
1114
if (b'sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
1115
raise AssertionError("'aaa' pointer was followed %r" % keys)
1116
return basis_get(keys, order, fulltext)
1117
basis._store.get_record_stream = get_record_stream
1118
result = sorted(list(target.iter_changes(basis)))
1119
for change in result:
1120
if change[0] == (b'aaa',):
1121
self.fail("Found unexpected change: %s" % change)
1123
def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
1124
# Within a leaf there are no hash's to exclude keys, make sure multi
1125
# value leaf nodes are handled well.
1126
basis_dict = {(b'aaa',): b'foo bar',
1127
(b'aab',): b'common altered a', (b'b',): b'foo bar b'}
1128
target_dict = {(b'aaa',): b'foo bar',
1129
(b'aab',): b'common altered b', (b'at',): b'foo bar t'}
1131
((b'aab',), b'common altered a', b'common altered b'),
1132
((b'at',), None, b'foo bar t'),
1133
((b'b',), b'foo bar b', None),
1135
basis = self._get_map(basis_dict)
1136
target = self._get_map(target_dict, chk_bytes=basis._store)
1137
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1139
def test_iteritems_empty(self):
1140
chk_bytes = self.get_chk_bytes()
1141
root_key = CHKMap.from_dict(chk_bytes, {})
1142
chkmap = CHKMap(chk_bytes, root_key)
1143
self.assertEqual([], list(chkmap.iteritems()))
1145
def test_iteritems_two_items(self):
1146
chk_bytes = self.get_chk_bytes()
1147
root_key = CHKMap.from_dict(chk_bytes,
1148
{(b"a", ): b"content here", (b"b", ): b"more content"})
1149
chkmap = CHKMap(chk_bytes, root_key)
1150
self.assertEqual([((b"a",), b"content here"), ((b"b",), b"more content")],
1151
sorted(list(chkmap.iteritems())))
1153
def test_iteritems_selected_one_of_two_items(self):
1154
chkmap = self._get_map({(b"a",):b"content here", (b"b",):b"more content"})
1155
self.assertEqual({(b"a",): b"content here"},
1156
self.to_dict(chkmap, [(b"a",)]))
1158
def test_iteritems_keys_prefixed_by_2_width_nodes(self):
1159
chkmap = self._get_map(
1160
{(b"a", b"a"): b"content here", (b"a", b"b",):b"more content",
1161
(b"b", b""): b'boring content'},
1162
maximum_size=10, key_width=2)
1164
{(b"a", b"a"): b"content here", (b"a", b"b"): b'more content'},
1165
self.to_dict(chkmap, [(b"a",)]))
1167
def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self):
1168
search_key_func = chk_map.search_key_registry.get(b'hash-16-way')
1169
self.assertEqual(b'E8B7BE43\x00E8B7BE43',
1170
search_key_func(StaticTuple(b'a', b'a')))
1171
self.assertEqual(b'E8B7BE43\x0071BEEFF9',
1172
search_key_func(StaticTuple(b'a', b'b')))
1173
self.assertEqual(b'71BEEFF9\x0000000000',
1174
search_key_func(StaticTuple(b'b', b'')))
1175
chkmap = self._get_map(
1176
{(b"a", b"a"): b"content here", (b"a", b"b",): b"more content",
1177
(b"b", b""): b'boring content'},
1178
maximum_size=10, key_width=2, search_key_func=search_key_func)
1180
{(b"a", b"a"): b"content here", (b"a", b"b"): b'more content'},
1181
self.to_dict(chkmap, [(b"a",)]))
1183
def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
1184
chkmap = self._get_map(
1185
{(b"a", b"a"):b"content here", (b"a", b"b",): b"more content",
1186
(b"b", b""): b'boring content'}, key_width=2)
1188
{(b"a", b"a"): b"content here", (b"a", b"b"): b'more content'},
1189
self.to_dict(chkmap, [(b"a",)]))
1191
def test___len__empty(self):
1192
chkmap = self._get_map({})
1193
self.assertEqual(0, len(chkmap))
1195
def test___len__2(self):
1196
chkmap = self._get_map({(b"foo",): b"bar", (b"gam",): b"quux"})
1197
self.assertEqual(2, len(chkmap))
1199
def test_max_size_100_bytes_new(self):
1200
# When there is a 100 byte upper node limit, a tree is formed.
1201
chkmap = self._get_map({(b"k1"*50,):b"v1", (b"k2"*50,):b"v2"}, maximum_size=100)
1202
# We expect three nodes:
1203
# A root, with two children, and with two key prefixes - k1 to one, and
1204
# k2 to the other as our node splitting is only just being developed.
1205
# The maximum size should be embedded
1206
chkmap._ensure_root()
1207
self.assertEqual(100, chkmap._root_node.maximum_size)
1208
self.assertEqual(1, chkmap._root_node._key_width)
1209
# There should be two child nodes, and prefix of 2(bytes):
1210
self.assertEqual(2, len(chkmap._root_node._items))
1211
self.assertEqual(b"k", chkmap._root_node._compute_search_prefix())
1212
# The actual nodes pointed at will change as serialisers change; so
1213
# here we test that the key prefix is correct; then load the nodes and
1214
# check they have the right pointed at key; whether they have the
1215
# pointed at value inline or not is also unrelated to this test so we
1216
# don't check that in detail - rather we just check the aggregate
1218
nodes = sorted(chkmap._root_node._items.items())
1221
self.assertEqual(b'k1', ptr1[0])
1222
self.assertEqual(b'k2', ptr2[0])
1223
node1 = chk_map._deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1], None)
1224
self.assertIsInstance(node1, LeafNode)
1225
self.assertEqual(1, len(node1))
1226
self.assertEqual({(b'k1'*50,): b'v1'}, self.to_dict(node1, chkmap._store))
1227
node2 = chk_map._deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1], None)
1228
self.assertIsInstance(node2, LeafNode)
1229
self.assertEqual(1, len(node2))
1230
self.assertEqual({(b'k2'*50,): b'v2'}, self.to_dict(node2, chkmap._store))
1231
# Having checked we have a good structure, check that the content is
1233
self.assertEqual(2, len(chkmap))
1234
self.assertEqual({(b"k1"*50,): b"v1", (b"k2"*50,): b"v2"},
1235
self.to_dict(chkmap))
1237
def test_init_root_is_LeafNode_new(self):
1238
chk_bytes = self.get_chk_bytes()
1239
chkmap = CHKMap(chk_bytes, None)
1240
self.assertIsInstance(chkmap._root_node, LeafNode)
1241
self.assertEqual({}, self.to_dict(chkmap))
1242
self.assertEqual(0, len(chkmap))
1244
def test_init_and_save_new(self):
1245
chk_bytes = self.get_chk_bytes()
1246
chkmap = CHKMap(chk_bytes, None)
1247
key = chkmap._save()
1248
leaf_node = LeafNode()
1249
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1251
def test_map_first_item_new(self):
1252
chk_bytes = self.get_chk_bytes()
1253
chkmap = CHKMap(chk_bytes, None)
1254
chkmap.map((b"foo,",), b"bar")
1255
self.assertEqual({(b'foo,',): b'bar'}, self.to_dict(chkmap))
1256
self.assertEqual(1, len(chkmap))
1257
key = chkmap._save()
1258
leaf_node = LeafNode()
1259
leaf_node.map(chk_bytes, (b"foo,",), b"bar")
1260
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1262
def test_unmap_last_item_root_is_leaf_new(self):
1263
chkmap = self._get_map({(b"k1"*50,): b"v1", (b"k2"*50,): b"v2"})
1264
chkmap.unmap((b"k1"*50,))
1265
chkmap.unmap((b"k2"*50,))
1266
self.assertEqual(0, len(chkmap))
1267
self.assertEqual({}, self.to_dict(chkmap))
1268
key = chkmap._save()
1269
leaf_node = LeafNode()
1270
self.assertEqual([key], leaf_node.serialise(chkmap._store))
1272
def test__dump_tree(self):
1273
chkmap = self._get_map({(b"aaa",): b"value1", (b"aab",): b"value2",
1274
(b"bbb",): b"value3",},
1276
self.assertEqualDiff("'' InternalNode\n"
1277
" 'a' InternalNode\n"
1279
" ('aaa',) 'value1'\n"
1281
" ('aab',) 'value2'\n"
1283
" ('bbb',) 'value3'\n",
1284
chkmap._dump_tree())
1285
self.assertEqualDiff("'' InternalNode\n"
1286
" 'a' InternalNode\n"
1288
" ('aaa',) 'value1'\n"
1290
" ('aab',) 'value2'\n"
1292
" ('bbb',) 'value3'\n",
1293
chkmap._dump_tree())
1294
self.assertEqualDiff(
1295
"'' InternalNode sha1:0690d471eb0a624f359797d0ee4672bd68f4e236\n"
1296
" 'a' InternalNode sha1:1514c35503da9418d8fd90c1bed553077cb53673\n"
1297
" 'aaa' LeafNode sha1:4cc5970454d40b4ce297a7f13ddb76f63b88fefb\n"
1298
" ('aaa',) 'value1'\n"
1299
" 'aab' LeafNode sha1:1d68bc90914ef8a3edbcc8bb28b00cb4fea4b5e2\n"
1300
" ('aab',) 'value2'\n"
1301
" 'b' LeafNode sha1:3686831435b5596515353364eab0399dc45d49e7\n"
1302
" ('bbb',) 'value3'\n",
1303
chkmap._dump_tree(include_keys=True))
1305
def test__dump_tree_in_progress(self):
1306
chkmap = self._get_map({(b"aaa",): b"value1", (b"aab",): b"value2"},
1308
chkmap.map((b'bbb',), b'value3')
1309
self.assertEqualDiff("'' InternalNode\n"
1310
" 'a' InternalNode\n"
1312
" ('aaa',) 'value1'\n"
1314
" ('aab',) 'value2'\n"
1316
" ('bbb',) 'value3'\n",
1317
chkmap._dump_tree())
1318
# For things that are updated by adding 'bbb', we don't have a sha key
1319
# for them yet, so they are listed as None
1320
self.assertEqualDiff(
1321
"'' InternalNode None\n"
1322
" 'a' InternalNode sha1:6b0d881dd739a66f733c178b24da64395edfaafd\n"
1323
" 'aaa' LeafNode sha1:40b39a08d895babce17b20ae5f62d187eaa4f63a\n"
1324
" ('aaa',) 'value1'\n"
1325
" 'aab' LeafNode sha1:ad1dc7c4e801302c95bf1ba7b20bc45e548cd51a\n"
1326
" ('aab',) 'value2'\n"
1327
" 'b' LeafNode None\n"
1328
" ('bbb',) 'value3'\n",
1329
chkmap._dump_tree(include_keys=True))
1332
def _search_key_single(key):
1333
"""A search key function that maps all nodes to the same value"""
1336
def _test_search_key(key):
1337
return b'test:' + b'\x00'.join(key)
1340
class TestMapSearchKeys(TestCaseWithStore):
1342
def test_default_chk_map_uses_flat_search_key(self):
1343
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None)
1344
self.assertEqual(b'1',
1345
chkmap._search_key_func((b'1',)))
1346
self.assertEqual(b'1\x002',
1347
chkmap._search_key_func((b'1', b'2')))
1348
self.assertEqual(b'1\x002\x003',
1349
chkmap._search_key_func((b'1', b'2', b'3')))
1351
def test_search_key_is_passed_to_root_node(self):
1352
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1353
search_key_func=_test_search_key)
1354
self.assertIs(_test_search_key, chkmap._search_key_func)
1355
self.assertEqual(b'test:1\x002\x003',
1356
chkmap._search_key_func((b'1', b'2', b'3')))
1357
self.assertEqual(b'test:1\x002\x003',
1358
chkmap._root_node._search_key((b'1', b'2', b'3')))
1360
def test_search_key_passed_via__ensure_root(self):
1361
chk_bytes = self.get_chk_bytes()
1362
chkmap = chk_map.CHKMap(chk_bytes, None,
1363
search_key_func=_test_search_key)
1364
root_key = chkmap._save()
1365
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1366
search_key_func=_test_search_key)
1367
chkmap._ensure_root()
1368
self.assertEqual(b'test:1\x002\x003',
1369
chkmap._root_node._search_key((b'1', b'2', b'3')))
1371
def test_search_key_with_internal_node(self):
1372
chk_bytes = self.get_chk_bytes()
1373
chkmap = chk_map.CHKMap(chk_bytes, None,
1374
search_key_func=_test_search_key)
1375
chkmap._root_node.set_maximum_size(10)
1376
chkmap.map((b'1',), b'foo')
1377
chkmap.map((b'2',), b'bar')
1378
chkmap.map((b'3',), b'baz')
1379
self.assertEqualDiff("'' InternalNode\n"
1380
" 'test:1' LeafNode\n"
1382
" 'test:2' LeafNode\n"
1384
" 'test:3' LeafNode\n"
1386
, chkmap._dump_tree())
1387
root_key = chkmap._save()
1388
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1389
search_key_func=_test_search_key)
1390
self.assertEqualDiff("'' InternalNode\n"
1391
" 'test:1' LeafNode\n"
1393
" 'test:2' LeafNode\n"
1395
" 'test:3' LeafNode\n"
1397
, chkmap._dump_tree())
1399
def test_search_key_16(self):
1400
chk_bytes = self.get_chk_bytes()
1401
chkmap = chk_map.CHKMap(chk_bytes, None,
1402
search_key_func=chk_map._search_key_16)
1403
chkmap._root_node.set_maximum_size(10)
1404
chkmap.map((b'1',), b'foo')
1405
chkmap.map((b'2',), b'bar')
1406
chkmap.map((b'3',), b'baz')
1407
self.assertEqualDiff("'' InternalNode\n"
1414
, chkmap._dump_tree())
1415
root_key = chkmap._save()
1416
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1417
search_key_func=chk_map._search_key_16)
1418
# We can get the values back correctly
1419
self.assertEqual([((b'1',), b'foo')],
1420
list(chkmap.iteritems([(b'1',)])))
1421
self.assertEqualDiff("'' InternalNode\n"
1428
, chkmap._dump_tree())
1430
def test_search_key_255(self):
1431
chk_bytes = self.get_chk_bytes()
1432
chkmap = chk_map.CHKMap(chk_bytes, None,
1433
search_key_func=chk_map._search_key_255)
1434
chkmap._root_node.set_maximum_size(10)
1435
chkmap.map((b'1',), b'foo')
1436
chkmap.map((b'2',), b'bar')
1437
chkmap.map((b'3',), b'baz')
1438
self.assertEqualDiff("'' InternalNode\n"
1439
" '\\x1a' LeafNode\n"
1443
" '\\x83' LeafNode\n"
1445
, chkmap._dump_tree(encoding='latin1'))
1446
root_key = chkmap._save()
1447
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1448
search_key_func=chk_map._search_key_255)
1449
# We can get the values back correctly
1450
self.assertEqual([((b'1',), b'foo')],
1451
list(chkmap.iteritems([(b'1',)])))
1452
self.assertEqualDiff("'' InternalNode\n"
1453
" '\\x1a' LeafNode\n"
1457
" '\\x83' LeafNode\n"
1459
, chkmap._dump_tree(encoding='latin1'))
1461
def test_search_key_collisions(self):
1462
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1463
search_key_func=_search_key_single)
1464
# The node will want to expand, but it cannot, because it knows that
1465
# all the keys must map to this node
1466
chkmap._root_node.set_maximum_size(20)
1467
chkmap.map((b'1',), b'foo')
1468
chkmap.map((b'2',), b'bar')
1469
chkmap.map((b'3',), b'baz')
1470
self.assertEqualDiff("'' LeafNode\n"
1474
, chkmap._dump_tree())
1477
class TestLeafNode(TestCaseWithStore):
1479
def test_current_size_empty(self):
1481
self.assertEqual(16, node._current_size())
1483
def test_current_size_size_changed(self):
1485
node.set_maximum_size(10)
1486
self.assertEqual(17, node._current_size())
1488
def test_current_size_width_changed(self):
1490
node._key_width = 10
1491
self.assertEqual(17, node._current_size())
1493
def test_current_size_items(self):
1495
base_size = node._current_size()
1496
node.map(None, (b"foo bar",), b"baz")
1497
self.assertEqual(base_size + 14, node._current_size())
1499
def test_deserialise_empty(self):
1500
node = LeafNode.deserialise(b"chkleaf:\n10\n1\n0\n\n", (b"sha1:1234",))
1501
self.assertEqual(0, len(node))
1502
self.assertEqual(10, node.maximum_size)
1503
self.assertEqual((b"sha1:1234",), node.key())
1504
self.assertIs(None, node._search_prefix)
1505
self.assertIs(None, node._common_serialised_prefix)
1507
def test_deserialise_items(self):
1508
node = LeafNode.deserialise(
1509
b"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1511
self.assertEqual(2, len(node))
1512
self.assertEqual([((b"foo bar",), b"baz"), ((b"quux",), b"blarh")],
1513
sorted(node.iteritems(None)))
1515
def test_deserialise_item_with_null_width_1(self):
1516
node = LeafNode.deserialise(
1517
b"chkleaf:\n0\n1\n2\n\nfoo\x001\nbar\x00baz\nquux\x001\nblarh\n",
1519
self.assertEqual(2, len(node))
1520
self.assertEqual([((b"foo",), b"bar\x00baz"), ((b"quux",), b"blarh")],
1521
sorted(node.iteritems(None)))
1523
def test_deserialise_item_with_null_width_2(self):
1524
node = LeafNode.deserialise(
1525
b"chkleaf:\n0\n2\n2\n\nfoo\x001\x001\nbar\x00baz\n"
1526
b"quux\x00\x001\nblarh\n",
1528
self.assertEqual(2, len(node))
1529
self.assertEqual([((b"foo", b"1"), b"bar\x00baz"), ((b"quux", b""), b"blarh")],
1530
sorted(node.iteritems(None)))
1532
def test_iteritems_selected_one_of_two_items(self):
1533
node = LeafNode.deserialise(
1534
b"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1536
self.assertEqual(2, len(node))
1537
self.assertEqual([((b"quux",), b"blarh")],
1538
sorted(node.iteritems(None, [(b"quux",), (b"qaz",)])))
1540
def test_deserialise_item_with_common_prefix(self):
1541
node = LeafNode.deserialise(
1542
b"chkleaf:\n0\n2\n2\nfoo\x00\n1\x001\nbar\x00baz\n2\x001\nblarh\n",
1544
self.assertEqual(2, len(node))
1545
self.assertEqual([((b"foo", b"1"), b"bar\x00baz"), ((b"foo", b"2"), b"blarh")],
1546
sorted(node.iteritems(None)))
1547
self.assertIs(chk_map._unknown, node._search_prefix)
1548
self.assertEqual(b'foo\x00', node._common_serialised_prefix)
1550
def test_deserialise_multi_line(self):
1551
node = LeafNode.deserialise(
1552
b"chkleaf:\n0\n2\n2\nfoo\x00\n1\x002\nbar\nbaz\n2\x002\nblarh\n\n",
1554
self.assertEqual(2, len(node))
1555
self.assertEqual([((b"foo", b"1"), b"bar\nbaz"),
1556
((b"foo", b"2"), b"blarh\n"),
1557
], sorted(node.iteritems(None)))
1558
self.assertIs(chk_map._unknown, node._search_prefix)
1559
self.assertEqual(b'foo\x00', node._common_serialised_prefix)
1561
def test_key_new(self):
1563
self.assertEqual(None, node.key())
1565
def test_key_after_map(self):
1566
node = LeafNode.deserialise(b"chkleaf:\n10\n1\n0\n\n", (b"sha1:1234",))
1567
node.map(None, (b"foo bar",), b"baz quux")
1568
self.assertEqual(None, node.key())
1570
def test_key_after_unmap(self):
1571
node = LeafNode.deserialise(
1572
b"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1574
node.unmap(None, (b"foo bar",))
1575
self.assertEqual(None, node.key())
1577
def test_map_exceeding_max_size_only_entry_new(self):
1579
node.set_maximum_size(10)
1580
result = node.map(None, (b"foo bar",), b"baz quux")
1581
self.assertEqual((b"foo bar", [(b"", node)]), result)
1582
self.assertTrue(10 < node._current_size())
1584
def test_map_exceeding_max_size_second_entry_early_difference_new(self):
1586
node.set_maximum_size(10)
1587
node.map(None, (b"foo bar",), b"baz quux")
1588
prefix, result = list(node.map(None, (b"blue",), b"red"))
1589
self.assertEqual(b"", prefix)
1590
self.assertEqual(2, len(result))
1591
split_chars = {result[0][0], result[1][0]}
1592
self.assertEqual({b"f", b"b"}, split_chars)
1593
nodes = dict(result)
1595
self.assertEqual({(b"foo bar",): b"baz quux"}, self.to_dict(node, None))
1596
self.assertEqual(10, node.maximum_size)
1597
self.assertEqual(1, node._key_width)
1599
self.assertEqual({(b"blue",): b"red"}, self.to_dict(node, None))
1600
self.assertEqual(10, node.maximum_size)
1601
self.assertEqual(1, node._key_width)
1603
def test_map_first(self):
1605
result = node.map(None, (b"foo bar",), b"baz quux")
1606
self.assertEqual((b"foo bar", [(b"", node)]), result)
1607
self.assertEqual({(b"foo bar",):b"baz quux"}, self.to_dict(node, None))
1608
self.assertEqual(1, len(node))
1610
def test_map_second(self):
1612
node.map(None, (b"foo bar",), b"baz quux")
1613
result = node.map(None, (b"bingo",), b"bango")
1614
self.assertEqual((b"", [(b"", node)]), result)
1615
self.assertEqual({(b"foo bar",): b"baz quux", (b"bingo",): b"bango"},
1616
self.to_dict(node, None))
1617
self.assertEqual(2, len(node))
1619
def test_map_replacement(self):
1621
node.map(None, (b"foo bar",), b"baz quux")
1622
result = node.map(None, (b"foo bar",), b"bango")
1623
self.assertEqual((b"foo bar", [(b"", node)]), result)
1624
self.assertEqual({(b"foo bar",): b"bango"},
1625
self.to_dict(node, None))
1626
self.assertEqual(1, len(node))
1628
def test_serialise_empty(self):
1629
store = self.get_chk_bytes()
1631
node.set_maximum_size(10)
1632
expected_key = (b"sha1:f34c3f0634ea3f85953dffa887620c0a5b1f4a51",)
1633
self.assertEqual([expected_key],
1634
list(node.serialise(store)))
1635
self.assertEqual(b"chkleaf:\n10\n1\n0\n\n", self.read_bytes(store, expected_key))
1636
self.assertEqual(expected_key, node.key())
1638
def test_serialise_items(self):
1639
store = self.get_chk_bytes()
1641
node.set_maximum_size(10)
1642
node.map(None, (b"foo bar",), b"baz quux")
1643
expected_key = (b"sha1:f89fac7edfc6bdb1b1b54a556012ff0c646ef5e0",)
1644
self.assertEqual(b'foo bar', node._common_serialised_prefix)
1645
self.assertEqual([expected_key],
1646
list(node.serialise(store)))
1647
self.assertEqual(b"chkleaf:\n10\n1\n1\nfoo bar\n\x001\nbaz quux\n",
1648
self.read_bytes(store, expected_key))
1649
self.assertEqual(expected_key, node.key())
1651
def test_unique_serialised_prefix_empty_new(self):
1653
self.assertIs(None, node._compute_search_prefix())
1655
def test_unique_serialised_prefix_one_item_new(self):
1657
node.map(None, (b"foo bar", b"baz"), b"baz quux")
1658
self.assertEqual(b"foo bar\x00baz", node._compute_search_prefix())
1660
def test_unmap_missing(self):
1662
self.assertRaises(KeyError, node.unmap, None, (b"foo bar",))
1664
def test_unmap_present(self):
1666
node.map(None, (b"foo bar",), b"baz quux")
1667
result = node.unmap(None, (b"foo bar",))
1668
self.assertEqual(node, result)
1669
self.assertEqual({}, self.to_dict(node, None))
1670
self.assertEqual(0, len(node))
1672
def test_map_maintains_common_prefixes(self):
1675
node.map(None, (b"foo bar", b"baz"), b"baz quux")
1676
self.assertEqual(b'foo bar\x00baz', node._search_prefix)
1677
self.assertEqual(b'foo bar\x00baz', node._common_serialised_prefix)
1678
node.map(None, (b"foo bar", b"bing"), b"baz quux")
1679
self.assertEqual(b'foo bar\x00b', node._search_prefix)
1680
self.assertEqual(b'foo bar\x00b', node._common_serialised_prefix)
1681
node.map(None, (b"fool", b"baby"), b"baz quux")
1682
self.assertEqual(b'foo', node._search_prefix)
1683
self.assertEqual(b'foo', node._common_serialised_prefix)
1684
node.map(None, (b"foo bar", b"baz"), b"replaced")
1685
self.assertEqual(b'foo', node._search_prefix)
1686
self.assertEqual(b'foo', node._common_serialised_prefix)
1687
node.map(None, (b"very", b"different"), b"value")
1688
self.assertEqual(b'', node._search_prefix)
1689
self.assertEqual(b'', node._common_serialised_prefix)
1691
def test_unmap_maintains_common_prefixes(self):
1694
node.map(None, (b"foo bar", b"baz"), b"baz quux")
1695
node.map(None, (b"foo bar", b"bing"), b"baz quux")
1696
node.map(None, (b"fool", b"baby"), b"baz quux")
1697
node.map(None, (b"very", b"different"), b"value")
1698
self.assertEqual(b'', node._search_prefix)
1699
self.assertEqual(b'', node._common_serialised_prefix)
1700
node.unmap(None, (b"very", b"different"))
1701
self.assertEqual(b"foo", node._search_prefix)
1702
self.assertEqual(b"foo", node._common_serialised_prefix)
1703
node.unmap(None, (b"fool", b"baby"))
1704
self.assertEqual(b'foo bar\x00b', node._search_prefix)
1705
self.assertEqual(b'foo bar\x00b', node._common_serialised_prefix)
1706
node.unmap(None, (b"foo bar", b"baz"))
1707
self.assertEqual(b'foo bar\x00bing', node._search_prefix)
1708
self.assertEqual(b'foo bar\x00bing', node._common_serialised_prefix)
1709
node.unmap(None, (b"foo bar", b"bing"))
1710
self.assertEqual(None, node._search_prefix)
1711
self.assertEqual(None, node._common_serialised_prefix)
1714
class TestInternalNode(TestCaseWithStore):
1716
def test_add_node_empty_new(self):
1717
node = InternalNode(b'fo')
1719
child.set_maximum_size(100)
1720
child.map(None, (b"foo",), b"bar")
1721
node.add_node(b"foo", child)
1722
# Note that node isn't strictly valid now as a tree (only one child),
1723
# but thats ok for this test.
1724
# The first child defines the node's width:
1725
self.assertEqual(3, node._node_width)
1726
# We should be able to iterate over the contents without doing IO.
1727
self.assertEqual({(b'foo',): b'bar'}, self.to_dict(node, None))
1728
# The length should be known:
1729
self.assertEqual(1, len(node))
1730
# serialising the node should serialise the child and the node.
1731
chk_bytes = self.get_chk_bytes()
1732
keys = list(node.serialise(chk_bytes))
1733
child_key = child.serialise(chk_bytes)[0]
1735
[child_key, (b'sha1:cf67e9997d8228a907c1f5bfb25a8bd9cd916fac',)],
1737
# We should be able to access deserialised content.
1738
bytes = self.read_bytes(chk_bytes, keys[1])
1739
node = chk_map._deserialise(bytes, keys[1], None)
1740
self.assertEqual(1, len(node))
1741
self.assertEqual({(b'foo',): b'bar'}, self.to_dict(node, chk_bytes))
1742
self.assertEqual(3, node._node_width)
1744
def test_add_node_resets_key_new(self):
1745
node = InternalNode(b'fo')
1747
child.set_maximum_size(100)
1748
child.map(None, (b"foo",), b"bar")
1749
node.add_node(b"foo", child)
1750
chk_bytes = self.get_chk_bytes()
1751
keys = list(node.serialise(chk_bytes))
1752
self.assertEqual(keys[1], node._key)
1753
node.add_node(b"fos", child)
1754
self.assertEqual(None, node._key)
1756
# def test_add_node_empty_oversized_one_ok_new(self):
1757
# def test_add_node_one_oversized_second_kept_minimum_fan(self):
1758
# def test_add_node_two_oversized_third_kept_minimum_fan(self):
1759
# def test_add_node_one_oversized_second_splits_errors(self):
1761
def test__iter_nodes_no_key_filter(self):
1762
node = InternalNode(b'')
1764
child.set_maximum_size(100)
1765
child.map(None, (b"foo",), b"bar")
1766
node.add_node(b"f", child)
1768
child.set_maximum_size(100)
1769
child.map(None, (b"bar",), b"baz")
1770
node.add_node(b"b", child)
1772
for child, node_key_filter in node._iter_nodes(None, key_filter=None):
1773
self.assertEqual(None, node_key_filter)
1775
def test__iter_nodes_splits_key_filter(self):
1776
node = InternalNode(b'')
1778
child.set_maximum_size(100)
1779
child.map(None, (b"foo",), b"bar")
1780
node.add_node(b"f", child)
1782
child.set_maximum_size(100)
1783
child.map(None, (b"bar",), b"baz")
1784
node.add_node(b"b", child)
1786
# foo and bar both match exactly one leaf node, but 'cat' should not
1787
# match any, and should not be placed in one.
1788
key_filter = ((b'foo',), (b'bar',), (b'cat',))
1789
for child, node_key_filter in node._iter_nodes(None,
1790
key_filter=key_filter):
1791
# each child could only match one key filter, so make sure it was
1793
self.assertEqual(1, len(node_key_filter))
1795
def test__iter_nodes_with_multiple_matches(self):
1796
node = InternalNode(b'')
1798
child.set_maximum_size(100)
1799
child.map(None, (b"foo",), b"val")
1800
child.map(None, (b"fob",), b"val")
1801
node.add_node(b"f", child)
1803
child.set_maximum_size(100)
1804
child.map(None, (b"bar",), b"val")
1805
child.map(None, (b"baz",), b"val")
1806
node.add_node(b"b", child)
1808
# Note that 'ram' doesn't match anything, so it should be freely
1810
key_filter = ((b'foo',), (b'fob',), (b'bar',), (b'baz',), (b'ram',))
1811
for child, node_key_filter in node._iter_nodes(None,
1812
key_filter=key_filter):
1813
# each child could match two key filters, so make sure they were
1815
self.assertEqual(2, len(node_key_filter))
1817
def make_fo_fa_node(self):
1818
node = InternalNode(b'f')
1820
child.set_maximum_size(100)
1821
child.map(None, (b"foo",), b"val")
1822
child.map(None, (b"fob",), b"val")
1823
node.add_node(b'fo', child)
1825
child.set_maximum_size(100)
1826
child.map(None, (b"far",), b"val")
1827
child.map(None, (b"faz",), b"val")
1828
node.add_node(b"fa", child)
1831
def test__iter_nodes_single_entry(self):
1832
node = self.make_fo_fa_node()
1833
key_filter = [(b'foo',)]
1834
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1835
self.assertEqual(1, len(nodes))
1836
self.assertEqual(key_filter, nodes[0][1])
1838
def test__iter_nodes_single_entry_misses(self):
1839
node = self.make_fo_fa_node()
1840
key_filter = [(b'bar',)]
1841
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1842
self.assertEqual(0, len(nodes))
1844
def test__iter_nodes_mixed_key_width(self):
1845
node = self.make_fo_fa_node()
1846
key_filter = [(b'foo', b'bar'), (b'foo',), (b'fo',), (b'b',)]
1847
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1848
self.assertEqual(1, len(nodes))
1849
matches = key_filter[:]
1850
matches.remove((b'b',))
1851
self.assertEqual(sorted(matches), sorted(nodes[0][1]))
1853
def test__iter_nodes_match_all(self):
1854
node = self.make_fo_fa_node()
1855
key_filter = [(b'foo', b'bar'), (b'foo',), (b'fo',), (b'f',)]
1856
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1857
self.assertEqual(2, len(nodes))
1859
def test__iter_nodes_fixed_widths_and_misses(self):
1860
node = self.make_fo_fa_node()
1861
# foo and faa should both match one child, baz should miss
1862
key_filter = [(b'foo',), (b'faa',), (b'baz',)]
1863
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1864
self.assertEqual(2, len(nodes))
1865
for node, matches in nodes:
1866
self.assertEqual(1, len(matches))
1868
def test_iteritems_empty_new(self):
1869
node = InternalNode()
1870
self.assertEqual([], sorted(node.iteritems(None)))
1872
def test_iteritems_two_children(self):
1873
node = InternalNode()
1875
leaf1.map(None, (b'foo bar',), b'quux')
1877
leaf2.map(None, (b'strange',), b'beast')
1878
node.add_node(b"f", leaf1)
1879
node.add_node(b"s", leaf2)
1880
self.assertEqual([((b'foo bar',), b'quux'), ((b'strange',), b'beast')],
1881
sorted(node.iteritems(None)))
1883
def test_iteritems_two_children_partial(self):
1884
node = InternalNode()
1886
leaf1.map(None, (b'foo bar',), b'quux')
1888
leaf2.map(None, (b'strange',), b'beast')
1889
node.add_node(b"f", leaf1)
1890
# This sets up a path that should not be followed - it will error if
1891
# the code tries to.
1892
node._items[b'f'] = None
1893
node.add_node(b"s", leaf2)
1894
self.assertEqual([((b'strange',), b'beast')],
1895
sorted(node.iteritems(None, [(b'strange',), (b'weird',)])))
1897
def test_iteritems_two_children_with_hash(self):
1898
search_key_func = chk_map.search_key_registry.get(b'hash-255-way')
1899
node = InternalNode(search_key_func=search_key_func)
1900
leaf1 = LeafNode(search_key_func=search_key_func)
1901
leaf1.map(None, StaticTuple(b'foo bar',), b'quux')
1902
leaf2 = LeafNode(search_key_func=search_key_func)
1903
leaf2.map(None, StaticTuple(b'strange',), b'beast')
1904
self.assertEqual(b'\xbeF\x014', search_key_func(StaticTuple(b'foo bar',)))
1905
self.assertEqual(b'\x85\xfa\xf7K', search_key_func(StaticTuple(b'strange',)))
1906
node.add_node(b"\xbe", leaf1)
1907
# This sets up a path that should not be followed - it will error if
1908
# the code tries to.
1909
node._items[b'\xbe'] = None
1910
node.add_node(b"\x85", leaf2)
1911
self.assertEqual([((b'strange',), b'beast')],
1912
sorted(node.iteritems(None, [StaticTuple(b'strange',),
1913
StaticTuple(b'weird',)])))
1915
def test_iteritems_partial_empty(self):
1916
node = InternalNode()
1917
self.assertEqual([], sorted(node.iteritems([(b'missing',)])))
1919
def test_map_to_new_child_new(self):
1920
chkmap = self._get_map({(b'k1',): b'foo', (b'k2',): b'bar'}, maximum_size=10)
1921
chkmap._ensure_root()
1922
node = chkmap._root_node
1923
# Ensure test validity: nothing paged in below the root.
1925
len([value for value in node._items.values()
1926
if isinstance(value, StaticTuple)]))
1927
# now, mapping to k3 should add a k3 leaf
1928
prefix, nodes = node.map(None, (b'k3',), b'quux')
1929
self.assertEqual(b"k", prefix)
1930
self.assertEqual([(b"", node)], nodes)
1931
# check new child details
1932
child = node._items[b'k3']
1933
self.assertIsInstance(child, LeafNode)
1934
self.assertEqual(1, len(child))
1935
self.assertEqual({(b'k3',): b'quux'}, self.to_dict(child, None))
1936
self.assertEqual(None, child._key)
1937
self.assertEqual(10, child.maximum_size)
1938
self.assertEqual(1, child._key_width)
1939
# Check overall structure:
1940
self.assertEqual(3, len(chkmap))
1941
self.assertEqual({(b'k1',): b'foo', (b'k2',): b'bar', (b'k3',): b'quux'},
1942
self.to_dict(chkmap))
1943
# serialising should only serialise the new data - k3 and the internal
1945
keys = list(node.serialise(chkmap._store))
1946
child_key = child.serialise(chkmap._store)[0]
1947
self.assertEqual([child_key, keys[1]], keys)
1949
def test_map_to_child_child_splits_new(self):
1950
chkmap = self._get_map({(b'k1',): b'foo', (b'k22',): b'bar'}, maximum_size=10)
1951
# Check for the canonical root value for this tree:
1952
self.assertEqualDiff("'' InternalNode\n"
1957
, chkmap._dump_tree())
1958
# _dump_tree pages everything in, so reload using just the root
1959
chkmap = CHKMap(chkmap._store, chkmap._root_node)
1960
chkmap._ensure_root()
1961
node = chkmap._root_node
1962
# Ensure test validity: nothing paged in below the root.
1964
len([value for value in node._items.values()
1965
if isinstance(value, StaticTuple)]))
1966
# now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
1967
# k23, which for simplicity in the current implementation generates
1968
# a new internal node between node, and k22/k23.
1969
prefix, nodes = node.map(chkmap._store, (b'k23',), b'quux')
1970
self.assertEqual(b"k", prefix)
1971
self.assertEqual([(b"", node)], nodes)
1972
# check new child details
1973
child = node._items[b'k2']
1974
self.assertIsInstance(child, InternalNode)
1975
self.assertEqual(2, len(child))
1976
self.assertEqual({(b'k22',): b'bar', (b'k23',): b'quux'},
1977
self.to_dict(child, None))
1978
self.assertEqual(None, child._key)
1979
self.assertEqual(10, child.maximum_size)
1980
self.assertEqual(1, child._key_width)
1981
self.assertEqual(3, child._node_width)
1982
# Check overall structure:
1983
self.assertEqual(3, len(chkmap))
1984
self.assertEqual({(b'k1',): b'foo', (b'k22',): b'bar', (b'k23',): b'quux'},
1985
self.to_dict(chkmap))
1986
# serialising should only serialise the new data - although k22 hasn't
1987
# changed because its a special corner case (splitting on with only one
1988
# key leaves one node unaltered), in general k22 is serialised, so we
1989
# expect k22, k23, the new internal node, and node, to be serialised.
1990
keys = list(node.serialise(chkmap._store))
1991
child_key = child._key
1992
k22_key = child._items[b'k22']._key
1993
k23_key = child._items[b'k23']._key
1994
self.assertEqual({k22_key, k23_key, child_key, node.key()}, set(keys))
1995
self.assertEqualDiff("'' InternalNode\n"
1998
" 'k2' InternalNode\n"
2002
" ('k23',) 'quux'\n"
2003
, chkmap._dump_tree())
2005
def test__search_prefix_filter_with_hash(self):
2006
search_key_func = chk_map.search_key_registry.get(b'hash-16-way')
2007
node = InternalNode(search_key_func=search_key_func)
2009
node._node_width = 4
2010
self.assertEqual(b'E8B7BE43\x0071BEEFF9', search_key_func(
2011
StaticTuple(b'a', b'b')))
2012
self.assertEqual(b'E8B7', node._search_prefix_filter(
2013
StaticTuple(b'a', b'b')))
2014
self.assertEqual(b'E8B7', node._search_prefix_filter(
2015
StaticTuple(b'a',)))
2017
def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
2018
chkmap = self._get_map(
2019
{(b'k1',): b'foo', (b'k22',): b'bar', (b'k23',): b'quux'}, maximum_size=10)
2020
# Check we have the expected tree.
2021
self.assertEqualDiff("'' InternalNode\n"
2024
" 'k2' InternalNode\n"
2028
" ('k23',) 'quux'\n"
2029
, chkmap._dump_tree())
2030
chkmap = CHKMap(chkmap._store, chkmap._root_node)
2031
chkmap._ensure_root()
2032
node = chkmap._root_node
2033
# unmapping k23 should give us a root, with k1 and k22 as direct
2035
result = node.unmap(chkmap._store, (b'k23',))
2036
# check the pointed-at object within node - k2 should now point at the
2037
# k22 leaf (which has been paged in to see if we can collapse the tree)
2038
child = node._items[b'k2']
2039
self.assertIsInstance(child, LeafNode)
2040
self.assertEqual(1, len(child))
2041
self.assertEqual({(b'k22',): b'bar'},
2042
self.to_dict(child, None))
2043
# Check overall structure is instact:
2044
self.assertEqual(2, len(chkmap))
2045
self.assertEqual({(b'k1',): b'foo', (b'k22',): b'bar'},
2046
self.to_dict(chkmap))
2047
# serialising should only serialise the new data - the root node.
2048
keys = list(node.serialise(chkmap._store))
2049
self.assertEqual([keys[-1]], keys)
2050
chkmap = CHKMap(chkmap._store, keys[-1])
2051
self.assertEqualDiff("'' InternalNode\n"
2056
, chkmap._dump_tree())
2058
def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
2059
chkmap = self._get_map(
2060
{(b'k1',): b'foo', (b'k22',): b'bar', (b'k23',): b'quux'}, maximum_size=10)
2061
self.assertEqualDiff("'' InternalNode\n"
2064
" 'k2' InternalNode\n"
2068
" ('k23',) 'quux'\n"
2069
, chkmap._dump_tree())
2070
orig_root = chkmap._root_node
2071
chkmap = CHKMap(chkmap._store, orig_root)
2072
chkmap._ensure_root()
2073
node = chkmap._root_node
2074
k2_ptr = node._items[b'k2']
2075
# unmapping k1 should give us a root, with k22 and k23 as direct
2076
# children, and should not have needed to page in the subtree.
2077
result = node.unmap(chkmap._store, (b'k1',))
2078
self.assertEqual(k2_ptr, result)
2079
chkmap = CHKMap(chkmap._store, orig_root)
2080
# Unmapping at the CHKMap level should switch to the new root
2081
chkmap.unmap((b'k1',))
2082
self.assertEqual(k2_ptr, chkmap._root_node)
2083
self.assertEqualDiff("'' InternalNode\n"
2087
" ('k23',) 'quux'\n"
2088
, chkmap._dump_tree())
2092
# map -> fits - done
2093
# map -> doesn't fit - shrink from left till fits
2094
# key data to return: the common prefix, new nodes.
2096
# unmap -> how to tell if siblings can be combined.
2097
# combing leaf nodes means expanding the prefix to the left; so gather the size of
2098
# all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
2099
# is an internal node, we know that that is a dense subtree - can't combine.
2100
# otherwise as soon as the sum of serialised values exceeds the split threshold
2101
# we know we can't combine - stop.
2102
# unmap -> key return data - space in node, common prefix length? and key count
2104
# variable length prefixes? -> later start with fixed width to get something going
2105
# map -> fits - update pointer to leaf
2106
# return [prefix and node] - seems sound.
2107
# map -> doesn't fit - find unique prefix and shift right
2108
# create internal nodes for all the partitions, return list of unique
2109
# prefixes and nodes.
2110
# map -> new prefix - create a leaf
2111
# unmap -> if child key count 0, remove
2112
# unmap -> return space in node, common prefix length? (why?), key count
2114
# map, if 1 node returned, use it, otherwise make an internal and populate.
2115
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
2117
# map inits as empty leafnode.
2123
# AA, AB, AC, AD, BA
2124
# packed internal node - ideal:
2125
# AA, AB, AC, AD, BA
2126
# single byte fanout - A,B, AA,AB,AC,AD, BA
2129
# AB - split, but we want to end up with AB, BA, in one node, with
2133
class TestCHKMapDifference(TestCaseWithExampleMaps):
2135
def get_difference(self, new_roots, old_roots,
2136
search_key_func=None):
2137
if search_key_func is None:
2138
search_key_func = chk_map._search_key_plain
2139
return chk_map.CHKMapDifference(self.get_chk_bytes(),
2140
new_roots, old_roots, search_key_func)
2142
def test__init__(self):
2143
c_map = self.make_root_only_map()
2145
c_map.map((b'aaa',), b'new aaa content')
2146
key2 = c_map._save()
2147
diff = self.get_difference([key2], [key1])
2148
self.assertEqual({key1}, diff._all_old_chks)
2149
self.assertEqual([], diff._old_queue)
2150
self.assertEqual([], diff._new_queue)
2152
def help__read_all_roots(self, search_key_func):
2153
c_map = self.make_root_only_map(search_key_func=search_key_func)
2155
c_map.map((b'aaa',), b'new aaa content')
2156
key2 = c_map._save()
2157
diff = self.get_difference([key2], [key1], search_key_func)
2158
root_results = [record.key for record in diff._read_all_roots()]
2159
self.assertEqual([key2], root_results)
2160
# We should have queued up only items that aren't in the old
2162
self.assertEqual([((b'aaa',), b'new aaa content')],
2163
diff._new_item_queue)
2164
self.assertEqual([], diff._new_queue)
2165
# And there are no old references, so that queue should be
2167
self.assertEqual([], diff._old_queue)
2169
def test__read_all_roots_plain(self):
2170
self.help__read_all_roots(search_key_func=chk_map._search_key_plain)
2172
def test__read_all_roots_16(self):
2173
self.help__read_all_roots(search_key_func=chk_map._search_key_16)
2175
def test__read_all_roots_skips_known_old(self):
2176
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2178
c_map2 = self.make_root_only_map(chk_map._search_key_plain)
2180
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2181
root_results = [record.key for record in diff._read_all_roots()]
2182
# We should have no results. key2 is completely contained within key1,
2183
# and we should have seen that in the first pass
2184
self.assertEqual([], root_results)
2186
def test__read_all_roots_prepares_queues(self):
2187
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2189
c_map._dump_tree() # load everything
2190
key1_a = c_map._root_node._items[b'a'].key()
2191
c_map.map((b'abb',), b'new abb content')
2192
key2 = c_map._save()
2193
key2_a = c_map._root_node._items[b'a'].key()
2194
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2195
root_results = [record.key for record in diff._read_all_roots()]
2196
self.assertEqual([key2], root_results)
2197
# At this point, we should have queued up only the 'a' Leaf on both
2198
# sides, both 'c' and 'd' are known to not have changed on both sides
2199
self.assertEqual([key2_a], diff._new_queue)
2200
self.assertEqual([], diff._new_item_queue)
2201
self.assertEqual([key1_a], diff._old_queue)
2203
def test__read_all_roots_multi_new_prepares_queues(self):
2204
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2206
c_map._dump_tree() # load everything
2207
key1_a = c_map._root_node._items[b'a'].key()
2208
key1_c = c_map._root_node._items[b'c'].key()
2209
c_map.map((b'abb',), b'new abb content')
2210
key2 = c_map._save()
2211
key2_a = c_map._root_node._items[b'a'].key()
2212
key2_c = c_map._root_node._items[b'c'].key()
2213
c_map = chk_map.CHKMap(self.get_chk_bytes(), key1,
2214
chk_map._search_key_plain)
2215
c_map.map((b'ccc',), b'new ccc content')
2216
key3 = c_map._save()
2217
key3_a = c_map._root_node._items[b'a'].key()
2218
key3_c = c_map._root_node._items[b'c'].key()
2219
diff = self.get_difference([key2, key3], [key1],
2220
chk_map._search_key_plain)
2221
root_results = [record.key for record in diff._read_all_roots()]
2222
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2223
# We should have queued up key2_a, and key3_c, but not key2_c or key3_c
2224
self.assertEqual({key2_a, key3_c}, set(diff._new_queue))
2225
self.assertEqual([], diff._new_item_queue)
2226
# And we should have queued up both a and c for the old set
2227
self.assertEqual({key1_a, key1_c}, set(diff._old_queue))
2229
def test__read_all_roots_different_depths(self):
2230
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2231
c_map._dump_tree() # load everything
2233
key1_a = c_map._root_node._items[b'a'].key()
2234
key1_c = c_map._root_node._items[b'c'].key()
2235
key1_d = c_map._root_node._items[b'd'].key()
2237
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2240
key2_aa = c_map2._root_node._items[b'aa'].key()
2241
key2_ad = c_map2._root_node._items[b'ad'].key()
2243
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2244
root_results = [record.key for record in diff._read_all_roots()]
2245
self.assertEqual([key2], root_results)
2246
# Only the 'a' subset should be queued up, since 'c' and 'd' cannot be
2248
self.assertEqual([key1_a], diff._old_queue)
2249
self.assertEqual({key2_aa, key2_ad}, set(diff._new_queue))
2250
self.assertEqual([], diff._new_item_queue)
2252
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2253
root_results = [record.key for record in diff._read_all_roots()]
2254
self.assertEqual([key1], root_results)
2256
self.assertEqual({key2_aa, key2_ad}, set(diff._old_queue))
2257
self.assertEqual({key1_a, key1_c, key1_d}, set(diff._new_queue))
2258
self.assertEqual([], diff._new_item_queue)
2260
def test__read_all_roots_different_depths_16(self):
2261
c_map = self.make_two_deep_map(chk_map._search_key_16)
2262
c_map._dump_tree() # load everything
2264
key1_2 = c_map._root_node._items[b'2'].key()
2265
key1_4 = c_map._root_node._items[b'4'].key()
2266
key1_C = c_map._root_node._items[b'C'].key()
2267
key1_F = c_map._root_node._items[b'F'].key()
2269
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_16)
2272
key2_F0 = c_map2._root_node._items[b'F0'].key()
2273
key2_F3 = c_map2._root_node._items[b'F3'].key()
2274
key2_F4 = c_map2._root_node._items[b'F4'].key()
2275
key2_FD = c_map2._root_node._items[b'FD'].key()
2277
diff = self.get_difference([key2], [key1], chk_map._search_key_16)
2278
root_results = [record.key for record in diff._read_all_roots()]
2279
self.assertEqual([key2], root_results)
2280
# Only the subset of keys that may be present should be queued up.
2281
self.assertEqual([key1_F], diff._old_queue)
2282
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2283
sorted(diff._new_queue))
2284
self.assertEqual([], diff._new_item_queue)
2286
diff = self.get_difference([key1], [key2], chk_map._search_key_16)
2287
root_results = [record.key for record in diff._read_all_roots()]
2288
self.assertEqual([key1], root_results)
2290
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2291
sorted(diff._old_queue))
2292
self.assertEqual(sorted([key1_2, key1_4, key1_C, key1_F]),
2293
sorted(diff._new_queue))
2294
self.assertEqual([], diff._new_item_queue)
2296
def test__read_all_roots_mixed_depth(self):
2297
c_map = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2298
c_map._dump_tree() # load everything
2300
key1_aa = c_map._root_node._items[b'aa'].key()
2301
key1_ad = c_map._root_node._items[b'ad'].key()
2303
c_map2 = self.make_one_deep_one_prefix_map(chk_map._search_key_plain)
2306
key2_a = c_map2._root_node._items[b'a'].key()
2307
key2_b = c_map2._root_node._items[b'b'].key()
2309
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2310
root_results = [record.key for record in diff._read_all_roots()]
2311
self.assertEqual([key2], root_results)
2312
# 'ad' matches exactly 'a' on the other side, so it should be removed,
2313
# and neither side should have it queued for walking
2314
self.assertEqual([], diff._old_queue)
2315
self.assertEqual([key2_b], diff._new_queue)
2316
self.assertEqual([], diff._new_item_queue)
2318
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2319
root_results = [record.key for record in diff._read_all_roots()]
2320
self.assertEqual([key1], root_results)
2321
# Note: This is technically not the 'true minimal' set that we could
2322
# use The reason is that 'a' was matched exactly to 'ad' (by sha
2323
# sum). However, the code gets complicated in the case of more
2324
# than one interesting key, so for now, we live with this
2325
# Consider revising, though benchmarking showing it to be a
2326
# real-world issue should be done
2327
self.assertEqual([key2_a], diff._old_queue)
2328
# self.assertEqual([], diff._old_queue)
2329
self.assertEqual([key1_aa], diff._new_queue)
2330
self.assertEqual([], diff._new_item_queue)
2332
def test__read_all_roots_yields_extra_deep_records(self):
2333
# This is slightly controversial, as we will yield a chk page that we
2334
# might later on find out could be filtered out. (If a root node is
2335
# referenced deeper in the old set.)
2336
# However, even with stacking, we always have all chk pages that we
2337
# will need. So as long as we filter out the referenced keys, we'll
2338
# never run into problems.
2339
# This allows us to yield a root node record immediately, without any
2341
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2342
c_map._dump_tree() # load all keys
2344
key1_a = c_map._root_node._items[b'a'].key()
2345
c_map2 = self.get_map({
2346
(b'acc',): b'initial acc content',
2347
(b'ace',): b'initial ace content',
2348
}, maximum_size=100)
2349
self.assertEqualDiff(
2351
" ('acc',) 'initial acc content'\n"
2352
" ('ace',) 'initial ace content'\n",
2353
c_map2._dump_tree())
2355
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2356
root_results = [record.key for record in diff._read_all_roots()]
2357
self.assertEqual([key2], root_results)
2358
# However, even though we have yielded the root node to be fetched,
2359
# we should have enqued all of the chk pages to be walked, so that we
2360
# can find the keys if they are present
2361
self.assertEqual([key1_a], diff._old_queue)
2362
self.assertEqual({((b'acc',), b'initial acc content'),
2363
((b'ace',), b'initial ace content'),
2364
}, set(diff._new_item_queue))
2366
def test__read_all_roots_multiple_targets(self):
2367
c_map = self.make_root_only_map()
2369
c_map = self.make_one_deep_map()
2372
key2_c = c_map._root_node._items[b'c'].key()
2373
key2_d = c_map._root_node._items[b'd'].key()
2374
c_map.map((b'ccc',), b'new ccc value')
2375
key3 = c_map._save()
2376
key3_c = c_map._root_node._items[b'c'].key()
2377
diff = self.get_difference([key2, key3], [key1],
2378
chk_map._search_key_plain)
2379
root_results = [record.key for record in diff._read_all_roots()]
2380
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2381
self.assertEqual([], diff._old_queue)
2382
# the key 'd' is interesting from key2 and key3, but should only be
2383
# entered into the queue 1 time
2384
self.assertEqual(sorted([key2_c, key3_c, key2_d]),
2385
sorted(diff._new_queue))
2386
self.assertEqual([], diff._new_item_queue)
2388
def test__read_all_roots_no_old(self):
2389
# This is the 'initial branch' case. With nothing in the old
2390
# set, we can just queue up all root nodes into interesting queue, and
2391
# then have them fast-path flushed via _flush_new_queue
2392
c_map = self.make_two_deep_map()
2394
diff = self.get_difference([key1], [], chk_map._search_key_plain)
2395
root_results = [record.key for record in diff._read_all_roots()]
2396
self.assertEqual([], root_results)
2397
self.assertEqual([], diff._old_queue)
2398
self.assertEqual([key1], diff._new_queue)
2399
self.assertEqual([], diff._new_item_queue)
2401
c_map2 = self.make_one_deep_map()
2403
diff = self.get_difference([key1, key2], [], chk_map._search_key_plain)
2404
root_results = [record.key for record in diff._read_all_roots()]
2405
self.assertEqual([], root_results)
2406
self.assertEqual([], diff._old_queue)
2407
self.assertEqual(sorted([key1, key2]), sorted(diff._new_queue))
2408
self.assertEqual([], diff._new_item_queue)
2410
def test__read_all_roots_no_old_16(self):
2411
c_map = self.make_two_deep_map(chk_map._search_key_16)
2413
diff = self.get_difference([key1], [], chk_map._search_key_16)
2414
root_results = [record.key for record in diff._read_all_roots()]
2415
self.assertEqual([], root_results)
2416
self.assertEqual([], diff._old_queue)
2417
self.assertEqual([key1], diff._new_queue)
2418
self.assertEqual([], diff._new_item_queue)
2420
c_map2 = self.make_one_deep_map(chk_map._search_key_16)
2422
diff = self.get_difference([key1, key2], [],
2423
chk_map._search_key_16)
2424
root_results = [record.key for record in diff._read_all_roots()]
2425
self.assertEqual([], root_results)
2426
self.assertEqual([], diff._old_queue)
2427
self.assertEqual(sorted([key1, key2]),
2428
sorted(diff._new_queue))
2429
self.assertEqual([], diff._new_item_queue)
2431
def test__read_all_roots_multiple_old(self):
2432
c_map = self.make_two_deep_map()
2434
c_map._dump_tree() # load everything
2435
key1_a = c_map._root_node._items[b'a'].key()
2436
c_map.map((b'ccc',), b'new ccc value')
2437
key2 = c_map._save()
2438
key2_a = c_map._root_node._items[b'a'].key()
2439
c_map.map((b'add',), b'new add value')
2440
key3 = c_map._save()
2441
key3_a = c_map._root_node._items[b'a'].key()
2442
diff = self.get_difference([key3], [key1, key2],
2443
chk_map._search_key_plain)
2444
root_results = [record.key for record in diff._read_all_roots()]
2445
self.assertEqual([key3], root_results)
2446
# the 'a' keys should not be queued up 2 times, since they are
2448
self.assertEqual([key1_a], diff._old_queue)
2449
self.assertEqual([key3_a], diff._new_queue)
2450
self.assertEqual([], diff._new_item_queue)
2452
def test__process_next_old_batched_no_dupes(self):
2453
c_map = self.make_two_deep_map()
2455
c_map._dump_tree() # load everything
2456
key1_a = c_map._root_node._items[b'a'].key()
2457
key1_aa = c_map._root_node._items[b'a']._items[b'aa'].key()
2458
key1_ab = c_map._root_node._items[b'a']._items[b'ab'].key()
2459
key1_ac = c_map._root_node._items[b'a']._items[b'ac'].key()
2460
key1_ad = c_map._root_node._items[b'a']._items[b'ad'].key()
2461
c_map.map((b'aaa',), b'new aaa value')
2462
key2 = c_map._save()
2463
key2_a = c_map._root_node._items[b'a'].key()
2464
key2_aa = c_map._root_node._items[b'a']._items[b'aa'].key()
2465
c_map.map((b'acc',), b'new acc content')
2466
key3 = c_map._save()
2467
key3_a = c_map._root_node._items[b'a'].key()
2468
key3_ac = c_map._root_node._items[b'a']._items[b'ac'].key()
2469
diff = self.get_difference([key3], [key1, key2],
2470
chk_map._search_key_plain)
2471
root_results = [record.key for record in diff._read_all_roots()]
2472
self.assertEqual([key3], root_results)
2473
self.assertEqual(sorted([key1_a, key2_a]),
2474
sorted(diff._old_queue))
2475
self.assertEqual([key3_a], diff._new_queue)
2476
self.assertEqual([], diff._new_item_queue)
2477
diff._process_next_old()
2478
# All of the old records should be brought in and queued up,
2479
# but we should not have any duplicates
2480
self.assertEqual(sorted([key1_aa, key1_ab, key1_ac, key1_ad, key2_aa]),
2481
sorted(diff._old_queue))
2484
class TestIterInterestingNodes(TestCaseWithExampleMaps):
2486
def get_map_key(self, a_dict, maximum_size=10):
2487
c_map = self.get_map(a_dict, maximum_size=maximum_size)
2490
def assertIterInteresting(self, records, items, interesting_keys,
2492
"""Check the result of iter_interesting_nodes.
2494
Note that we no longer care how many steps are taken, etc, just that
2495
the right contents are returned.
2497
:param records: A list of record keys that should be yielded
2498
:param items: A list of items (key,value) that should be yielded.
2500
store = self.get_chk_bytes()
2501
store._search_key_func = chk_map._search_key_plain
2502
iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
2506
for record, new_items in iter_nodes:
2507
if record is not None:
2508
record_keys.append(record.key)
2510
all_items.extend(new_items)
2511
self.assertEqual(sorted(records), sorted(record_keys))
2512
self.assertEqual(sorted(items), sorted(all_items))
2514
def test_empty_to_one_keys(self):
2515
target = self.get_map_key({(b'a',): b'content'})
2516
self.assertIterInteresting([target],
2517
[((b'a',), b'content')],
2520
def test_none_to_one_key(self):
2521
basis = self.get_map_key({})
2522
target = self.get_map_key({(b'a',): b'content'})
2523
self.assertIterInteresting([target],
2524
[((b'a',), b'content')],
2527
def test_one_to_none_key(self):
2528
basis = self.get_map_key({(b'a',): b'content'})
2529
target = self.get_map_key({})
2530
self.assertIterInteresting([target],
2534
def test_common_pages(self):
2535
basis = self.get_map_key({(b'a',): b'content',
2536
(b'b',): b'content',
2537
(b'c',): b'content',
2539
target = self.get_map_key({(b'a',): b'content',
2540
(b'b',): b'other content',
2541
(b'c',): b'content',
2543
target_map = CHKMap(self.get_chk_bytes(), target)
2544
self.assertEqualDiff(
2547
" ('a',) 'content'\n"
2549
" ('b',) 'other content'\n"
2551
" ('c',) 'content'\n",
2552
target_map._dump_tree())
2553
b_key = target_map._root_node._items[b'b'].key()
2554
# This should return the root node, and the node for the 'b' key
2555
self.assertIterInteresting([target, b_key],
2556
[((b'b',), b'other content')],
2559
def test_common_sub_page(self):
2560
basis = self.get_map_key({(b'aaa',): b'common',
2563
target = self.get_map_key({(b'aaa',): b'common',
2567
target_map = CHKMap(self.get_chk_bytes(), target)
2568
self.assertEqualDiff(
2570
" 'a' InternalNode\n"
2572
" ('aaa',) 'common'\n"
2576
" ('c',) 'common'\n",
2577
target_map._dump_tree())
2578
# The key for the internal aa node
2579
a_key = target_map._root_node._items[b'a'].key()
2580
# The key for the leaf aab node
2581
# aaa_key = target_map._root_node._items['a']._items['aaa'].key()
2582
aab_key = target_map._root_node._items[b'a']._items[b'aab'].key()
2583
self.assertIterInteresting([target, a_key, aab_key],
2584
[((b'aab',), b'new')],
2587
def test_common_leaf(self):
2588
basis = self.get_map_key({})
2589
target1 = self.get_map_key({(b'aaa',): b'common'})
2590
target2 = self.get_map_key({(b'aaa',): b'common',
2593
target3 = self.get_map_key({(b'aaa',): b'common',
2594
(b'aac',): b'other',
2597
# The LeafNode containing 'aaa': 'common' occurs at 3 different levels.
2598
# Once as a root node, once as a second layer, and once as a third
2599
# layer. It should only be returned one time regardless
2600
target1_map = CHKMap(self.get_chk_bytes(), target1)
2601
self.assertEqualDiff(
2603
" ('aaa',) 'common'\n",
2604
target1_map._dump_tree())
2605
target2_map = CHKMap(self.get_chk_bytes(), target2)
2606
self.assertEqualDiff(
2609
" ('aaa',) 'common'\n"
2611
" ('bbb',) 'new'\n",
2612
target2_map._dump_tree())
2613
target3_map = CHKMap(self.get_chk_bytes(), target3)
2614
self.assertEqualDiff(
2616
" 'a' InternalNode\n"
2618
" ('aaa',) 'common'\n"
2620
" ('aac',) 'other'\n"
2622
" ('bbb',) 'new'\n",
2623
target3_map._dump_tree())
2624
aaa_key = target1_map._root_node.key()
2625
b_key = target2_map._root_node._items[b'b'].key()
2626
a_key = target3_map._root_node._items[b'a'].key()
2627
aac_key = target3_map._root_node._items[b'a']._items[b'aac'].key()
2628
self.assertIterInteresting(
2629
[target1, target2, target3, a_key, aac_key, b_key],
2630
[((b'aaa',), b'common'), ((b'bbb',), b'new'), ((b'aac',), b'other')],
2631
[target1, target2, target3], [basis])
2633
self.assertIterInteresting(
2634
[target2, target3, a_key, aac_key, b_key],
2635
[((b'bbb',), b'new'), ((b'aac',), b'other')],
2636
[target2, target3], [target1])
2638
# Technically, target1 could be filtered out, but since it is a root
2639
# node, we yield it immediately, rather than waiting to find out much
2641
self.assertIterInteresting(
2644
[target1], [target3])
2646
def test_multiple_maps(self):
2647
basis1 = self.get_map_key({(b'aaa',): b'common',
2648
(b'aab',): b'basis1',
2650
basis2 = self.get_map_key({(b'bbb',): b'common',
2651
(b'bbc',): b'basis2',
2653
target1 = self.get_map_key({(b'aaa',): b'common',
2654
(b'aac',): b'target1',
2655
(b'bbb',): b'common',
2657
target2 = self.get_map_key({(b'aaa',): b'common',
2658
(b'bba',): b'target2',
2659
(b'bbb',): b'common',
2661
target1_map = CHKMap(self.get_chk_bytes(), target1)
2662
self.assertEqualDiff(
2664
" 'a' InternalNode\n"
2666
" ('aaa',) 'common'\n"
2668
" ('aac',) 'target1'\n"
2670
" ('bbb',) 'common'\n",
2671
target1_map._dump_tree())
2672
# The key for the target1 internal a node
2673
a_key = target1_map._root_node._items[b'a'].key()
2674
# The key for the leaf aac node
2675
aac_key = target1_map._root_node._items[b'a']._items[b'aac'].key()
2677
target2_map = CHKMap(self.get_chk_bytes(), target2)
2678
self.assertEqualDiff(
2681
" ('aaa',) 'common'\n"
2682
" 'b' InternalNode\n"
2684
" ('bba',) 'target2'\n"
2686
" ('bbb',) 'common'\n",
2687
target2_map._dump_tree())
2688
# The key for the target2 internal bb node
2689
b_key = target2_map._root_node._items[b'b'].key()
2690
# The key for the leaf bba node
2691
bba_key = target2_map._root_node._items[b'b']._items[b'bba'].key()
2692
self.assertIterInteresting(
2693
[target1, target2, a_key, aac_key, b_key, bba_key],
2694
[((b'aac',), b'target1'), ((b'bba',), b'target2')],
2695
[target1, target2], [basis1, basis2])
2697
def test_multiple_maps_overlapping_common_new(self):
2698
# Test that when a node found through the interesting_keys iteration
2699
# for *some roots* and also via the old keys iteration, that
2700
# it is still scanned for old refs and items, because its
2701
# not truely new. This requires 2 levels of InternalNodes to expose,
2702
# because of the way the bootstrap in _find_children_info works.
2703
# This suggests that the code is probably amenable to/benefit from
2705
# How does this test work?
2706
# 1) We need a second level InternalNode present in a basis tree.
2707
# 2) We need a left side new tree that uses that InternalNode
2708
# 3) We need a right side new tree that does not use that InternalNode
2709
# at all but that has an unchanged *value* that was reachable inside
2711
basis = self.get_map_key({
2712
# InternalNode, unchanged in left:
2714
(b'abb',): b'right',
2715
# Forces an internalNode at 'a'
2716
(b'ccc',): b'common',
2718
left = self.get_map_key({
2719
# All of basis unchanged
2721
(b'abb',): b'right',
2722
(b'ccc',): b'common',
2723
# And a new top level node so the root key is different
2724
(b'ddd',): b'change',
2726
right = self.get_map_key({
2727
# A value that is unchanged from basis and thus should be filtered
2731
basis_map = CHKMap(self.get_chk_bytes(), basis)
2732
self.assertEqualDiff(
2734
" 'a' InternalNode\n"
2736
" ('aaa',) 'left'\n"
2738
" ('abb',) 'right'\n"
2740
" ('ccc',) 'common'\n",
2741
basis_map._dump_tree())
2742
# Get left expected data
2743
left_map = CHKMap(self.get_chk_bytes(), left)
2744
self.assertEqualDiff(
2746
" 'a' InternalNode\n"
2748
" ('aaa',) 'left'\n"
2750
" ('abb',) 'right'\n"
2752
" ('ccc',) 'common'\n"
2754
" ('ddd',) 'change'\n",
2755
left_map._dump_tree())
2756
# Keys from left side target
2757
l_d_key = left_map._root_node._items[b'd'].key()
2758
# Get right expected data
2759
right_map = CHKMap(self.get_chk_bytes(), right)
2760
self.assertEqualDiff(
2762
" ('abb',) 'right'\n",
2763
right_map._dump_tree())
2764
# Keys from the right side target - none, the root is enough.
2766
self.assertIterInteresting(
2767
[right, left, l_d_key],
2768
[((b'ddd',), b'change')],
2769
[left, right], [basis])
2771
def test_multiple_maps_similar(self):
2772
# We want to have a depth=2 tree, with multiple entries in each leaf
2774
basis = self.get_map_key({
2775
(b'aaa',): b'unchanged',
2776
(b'abb',): b'will change left',
2777
(b'caa',): b'unchanged',
2778
(b'cbb',): b'will change right',
2780
left = self.get_map_key({
2781
(b'aaa',): b'unchanged',
2782
(b'abb',): b'changed left',
2783
(b'caa',): b'unchanged',
2784
(b'cbb',): b'will change right',
2786
right = self.get_map_key({
2787
(b'aaa',): b'unchanged',
2788
(b'abb',): b'will change left',
2789
(b'caa',): b'unchanged',
2790
(b'cbb',): b'changed right',
2792
basis_map = CHKMap(self.get_chk_bytes(), basis)
2793
self.assertEqualDiff(
2796
" ('aaa',) 'unchanged'\n"
2797
" ('abb',) 'will change left'\n"
2799
" ('caa',) 'unchanged'\n"
2800
" ('cbb',) 'will change right'\n",
2801
basis_map._dump_tree())
2802
# Get left expected data
2803
left_map = CHKMap(self.get_chk_bytes(), left)
2804
self.assertEqualDiff(
2807
" ('aaa',) 'unchanged'\n"
2808
" ('abb',) 'changed left'\n"
2810
" ('caa',) 'unchanged'\n"
2811
" ('cbb',) 'will change right'\n",
2812
left_map._dump_tree())
2813
# Keys from left side target
2814
l_a_key = left_map._root_node._items[b'a'].key()
2815
l_c_key = left_map._root_node._items[b'c'].key()
2816
# Get right expected data
2817
right_map = CHKMap(self.get_chk_bytes(), right)
2818
self.assertEqualDiff(
2821
" ('aaa',) 'unchanged'\n"
2822
" ('abb',) 'will change left'\n"
2824
" ('caa',) 'unchanged'\n"
2825
" ('cbb',) 'changed right'\n",
2826
right_map._dump_tree())
2827
r_a_key = right_map._root_node._items[b'a'].key()
2828
r_c_key = right_map._root_node._items[b'c'].key()
2829
self.assertIterInteresting(
2830
[right, left, l_a_key, r_c_key],
2831
[((b'abb',), b'changed left'), ((b'cbb',), b'changed right')],
2832
[left, right], [basis])