1
# Copyright (C) 2008-2011, 2016 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for maps built on a CHK versionedfiles facility."""
28
from ..bzr.chk_map import (
34
from ..static_tuple import StaticTuple
37
class TestNode(tests.TestCase):
39
def assertCommonPrefix(self, expected_common, prefix, key):
40
common = Node.common_prefix(prefix, key)
41
self.assertTrue(len(common) <= len(prefix))
42
self.assertTrue(len(common) <= len(key))
43
self.assertStartsWith(prefix, common)
44
self.assertStartsWith(key, common)
45
self.assertEqual(expected_common, common)
47
def test_common_prefix(self):
48
self.assertCommonPrefix(b'beg', b'beg', b'begin')
50
def test_no_common_prefix(self):
51
self.assertCommonPrefix(b'', b'begin', b'end')
54
self.assertCommonPrefix(b'begin', b'begin', b'begin')
56
def test_not_a_prefix(self):
57
self.assertCommonPrefix(b'b', b'begin', b'b')
60
self.assertCommonPrefix(b'', b'', b'end')
61
self.assertCommonPrefix(b'', b'begin', b'')
62
self.assertCommonPrefix(b'', b'', b'')
65
class TestCaseWithStore(tests.TestCaseWithMemoryTransport):
67
def get_chk_bytes(self):
68
# This creates a standalone CHK store.
69
factory = groupcompress.make_pack_factory(False, False, 1)
70
self.chk_bytes = factory(self.get_transport())
73
def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
74
search_key_func=None):
76
chk_bytes = self.get_chk_bytes()
77
root_key = CHKMap.from_dict(chk_bytes, a_dict,
78
maximum_size=maximum_size, key_width=key_width,
79
search_key_func=search_key_func)
80
root_key2 = CHKMap._create_via_map(chk_bytes, a_dict,
81
maximum_size=maximum_size, key_width=key_width,
82
search_key_func=search_key_func)
83
self.assertEqual(root_key, root_key2, "CHKMap.from_dict() did not"
84
" match CHKMap._create_via_map")
85
chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func)
88
def read_bytes(self, chk_bytes, key):
89
stream = chk_bytes.get_record_stream([key], 'unordered', True)
91
if record.storage_kind == 'absent':
92
self.fail('Store does not contain the key %s' % (key,))
93
return record.get_bytes_as("fulltext")
95
def to_dict(self, node, *args):
96
return dict(node.iteritems(*args))
99
class TestCaseWithExampleMaps(TestCaseWithStore):
101
def get_chk_bytes(self):
102
if getattr(self, '_chk_bytes', None) is None:
103
self._chk_bytes = super(TestCaseWithExampleMaps,
104
self).get_chk_bytes()
105
return self._chk_bytes
107
def get_map(self, a_dict, maximum_size=100, search_key_func=None):
108
c_map = self._get_map(a_dict, maximum_size=maximum_size,
109
chk_bytes=self.get_chk_bytes(),
110
search_key_func=search_key_func)
113
def make_root_only_map(self, search_key_func=None):
114
return self.get_map({
115
(b'aaa',): b'initial aaa content',
116
(b'abb',): b'initial abb content',
117
}, search_key_func=search_key_func)
119
def make_root_only_aaa_ddd_map(self, search_key_func=None):
120
return self.get_map({
121
(b'aaa',): b'initial aaa content',
122
(b'ddd',): b'initial ddd content',
123
}, search_key_func=search_key_func)
125
def make_one_deep_map(self, search_key_func=None):
126
# Same as root_only_map, except it forces an InternalNode at the root
127
return self.get_map({
128
(b'aaa',): b'initial aaa content',
129
(b'abb',): b'initial abb content',
130
(b'ccc',): b'initial ccc content',
131
(b'ddd',): b'initial ddd content',
132
}, search_key_func=search_key_func)
134
def make_two_deep_map(self, search_key_func=None):
135
# Carefully chosen so that it creates a 2-deep map for both
136
# _search_key_plain and for _search_key_16
137
# Also so that things line up with make_one_deep_two_prefix_map
138
return self.get_map({
139
(b'aaa',): b'initial aaa content',
140
(b'abb',): b'initial abb content',
141
(b'acc',): b'initial acc content',
142
(b'ace',): b'initial ace content',
143
(b'add',): b'initial add content',
144
(b'adh',): b'initial adh content',
145
(b'adl',): b'initial adl content',
146
(b'ccc',): b'initial ccc content',
147
(b'ddd',): b'initial ddd content',
148
}, search_key_func=search_key_func)
150
def make_one_deep_two_prefix_map(self, search_key_func=None):
151
"""Create a map with one internal node, but references are extra long.
153
Otherwise has similar content to make_two_deep_map.
155
return self.get_map({
156
(b'aaa',): b'initial aaa content',
157
(b'add',): b'initial add content',
158
(b'adh',): b'initial adh content',
159
(b'adl',): b'initial adl content',
160
}, search_key_func=search_key_func)
162
def make_one_deep_one_prefix_map(self, search_key_func=None):
163
"""Create a map with one internal node, but references are extra long.
165
Similar to make_one_deep_two_prefix_map, except the split is at the
166
first char, rather than the second.
168
return self.get_map({
169
(b'add',): b'initial add content',
170
(b'adh',): b'initial adh content',
171
(b'adl',): b'initial adl content',
172
(b'bbb',): b'initial bbb content',
173
}, search_key_func=search_key_func)
176
class TestTestCaseWithExampleMaps(TestCaseWithExampleMaps):
177
"""Actual tests for the provided examples."""
179
def test_root_only_map_plain(self):
180
c_map = self.make_root_only_map()
181
self.assertEqualDiff(
183
" ('aaa',) 'initial aaa content'\n"
184
" ('abb',) 'initial abb content'\n",
187
def test_root_only_map_16(self):
188
c_map = self.make_root_only_map(search_key_func=chk_map._search_key_16)
189
self.assertEqualDiff(
191
" ('aaa',) 'initial aaa content'\n"
192
" ('abb',) 'initial abb content'\n",
195
def test_one_deep_map_plain(self):
196
c_map = self.make_one_deep_map()
197
self.assertEqualDiff(
200
" ('aaa',) 'initial aaa content'\n"
201
" ('abb',) 'initial abb content'\n"
203
" ('ccc',) 'initial ccc content'\n"
205
" ('ddd',) 'initial ddd content'\n",
208
def test_one_deep_map_16(self):
209
c_map = self.make_one_deep_map(search_key_func=chk_map._search_key_16)
210
self.assertEqualDiff(
213
" ('ccc',) 'initial ccc content'\n"
215
" ('abb',) 'initial abb content'\n"
217
" ('aaa',) 'initial aaa content'\n"
218
" ('ddd',) 'initial ddd content'\n",
221
def test_root_only_aaa_ddd_plain(self):
222
c_map = self.make_root_only_aaa_ddd_map()
223
self.assertEqualDiff(
225
" ('aaa',) 'initial aaa content'\n"
226
" ('ddd',) 'initial ddd content'\n",
229
def test_root_only_aaa_ddd_16(self):
230
c_map = self.make_root_only_aaa_ddd_map(
231
search_key_func=chk_map._search_key_16)
232
# We use 'aaa' and 'ddd' because they happen to map to 'F' when using
234
self.assertEqualDiff(
236
" ('aaa',) 'initial aaa content'\n"
237
" ('ddd',) 'initial ddd content'\n",
240
def test_two_deep_map_plain(self):
241
c_map = self.make_two_deep_map()
242
self.assertEqualDiff(
244
" 'a' InternalNode\n"
246
" ('aaa',) 'initial aaa content'\n"
248
" ('abb',) 'initial abb content'\n"
250
" ('acc',) 'initial acc content'\n"
251
" ('ace',) 'initial ace content'\n"
253
" ('add',) 'initial add content'\n"
254
" ('adh',) 'initial adh content'\n"
255
" ('adl',) 'initial adl content'\n"
257
" ('ccc',) 'initial ccc content'\n"
259
" ('ddd',) 'initial ddd content'\n",
262
def test_two_deep_map_16(self):
263
c_map = self.make_two_deep_map(search_key_func=chk_map._search_key_16)
264
self.assertEqualDiff(
267
" ('acc',) 'initial acc content'\n"
268
" ('ccc',) 'initial ccc content'\n"
270
" ('abb',) 'initial abb content'\n"
272
" ('ace',) 'initial ace content'\n"
273
" 'F' InternalNode\n"
275
" ('aaa',) 'initial aaa content'\n"
277
" ('adl',) 'initial adl content'\n"
279
" ('adh',) 'initial adh content'\n"
281
" ('ddd',) 'initial ddd content'\n"
283
" ('add',) 'initial add content'\n",
286
def test_one_deep_two_prefix_map_plain(self):
287
c_map = self.make_one_deep_two_prefix_map()
288
self.assertEqualDiff(
291
" ('aaa',) 'initial aaa content'\n"
293
" ('add',) 'initial add content'\n"
294
" ('adh',) 'initial adh content'\n"
295
" ('adl',) 'initial adl content'\n",
298
def test_one_deep_two_prefix_map_16(self):
299
c_map = self.make_one_deep_two_prefix_map(
300
search_key_func=chk_map._search_key_16)
301
self.assertEqualDiff(
304
" ('aaa',) 'initial aaa content'\n"
306
" ('adl',) 'initial adl content'\n"
308
" ('adh',) 'initial adh content'\n"
310
" ('add',) 'initial add content'\n",
313
def test_one_deep_one_prefix_map_plain(self):
314
c_map = self.make_one_deep_one_prefix_map()
315
self.assertEqualDiff(
318
" ('add',) 'initial add content'\n"
319
" ('adh',) 'initial adh content'\n"
320
" ('adl',) 'initial adl content'\n"
322
" ('bbb',) 'initial bbb content'\n",
325
def test_one_deep_one_prefix_map_16(self):
326
c_map = self.make_one_deep_one_prefix_map(
327
search_key_func=chk_map._search_key_16)
328
self.assertEqualDiff(
331
" ('bbb',) 'initial bbb content'\n"
333
" ('add',) 'initial add content'\n"
334
" ('adh',) 'initial adh content'\n"
335
" ('adl',) 'initial adl content'\n",
339
class TestMap(TestCaseWithStore):
341
def assertHasABMap(self, chk_bytes):
342
ab_leaf_bytes = b'chkleaf:\n0\n1\n1\na\n\x001\nb\n'
343
ab_sha1 = osutils.sha_string(ab_leaf_bytes)
344
self.assertEqual(b'90986195696b177c8895d48fdb4b7f2366f798a0', ab_sha1)
345
root_key = (b'sha1:' + ab_sha1,)
346
self.assertEqual(ab_leaf_bytes, self.read_bytes(chk_bytes, root_key))
349
def assertHasEmptyMap(self, chk_bytes):
350
empty_leaf_bytes = b'chkleaf:\n0\n1\n0\n\n'
351
empty_sha1 = osutils.sha_string(empty_leaf_bytes)
353
b'8571e09bf1bcc5b9621ce31b3d4c93d6e9a1ed26', empty_sha1)
354
root_key = (b'sha1:' + empty_sha1,)
355
self.assertEqual(empty_leaf_bytes,
356
self.read_bytes(chk_bytes, root_key))
359
def assertMapLayoutEqual(self, map_one, map_two):
360
"""Assert that the internal structure is identical between the maps."""
361
map_one._ensure_root()
362
node_one_stack = [map_one._root_node]
363
map_two._ensure_root()
364
node_two_stack = [map_two._root_node]
365
while node_one_stack:
366
node_one = node_one_stack.pop()
367
node_two = node_two_stack.pop()
368
if node_one.__class__ != node_two.__class__:
369
self.assertEqualDiff(map_one._dump_tree(include_keys=True),
370
map_two._dump_tree(include_keys=True))
371
self.assertEqual(node_one._search_prefix,
372
node_two._search_prefix)
373
if isinstance(node_one, InternalNode):
374
# Internal nodes must have identical references
375
self.assertEqual(sorted(node_one._items.keys()),
376
sorted(node_two._items.keys()))
377
node_one_stack.extend(sorted(
378
[n for n, _ in node_one._iter_nodes(map_one._store)],
379
key=lambda a: a._search_prefix))
380
node_two_stack.extend(sorted(
381
[n for n, _ in node_two._iter_nodes(map_two._store)],
382
key=lambda a: a._search_prefix))
384
# Leaf nodes must have identical contents
385
self.assertEqual(node_one._items, node_two._items)
386
self.assertEqual([], node_two_stack)
388
def assertCanonicalForm(self, chkmap):
389
"""Assert that the chkmap is in 'canonical' form.
391
We do this by adding all of the key value pairs from scratch, both in
392
forward order and reverse order, and assert that the final tree layout
395
items = list(chkmap.iteritems())
396
map_forward = chk_map.CHKMap(None, None)
397
map_forward._root_node.set_maximum_size(chkmap._root_node.maximum_size)
398
for key, value in items:
399
map_forward.map(key, value)
400
self.assertMapLayoutEqual(map_forward, chkmap)
401
map_reverse = chk_map.CHKMap(None, None)
402
map_reverse._root_node.set_maximum_size(chkmap._root_node.maximum_size)
403
for key, value in reversed(items):
404
map_reverse.map(key, value)
405
self.assertMapLayoutEqual(map_reverse, chkmap)
407
def test_assert_map_layout_equal(self):
408
store = self.get_chk_bytes()
409
map_one = CHKMap(store, None)
410
map_one._root_node.set_maximum_size(20)
411
map_two = CHKMap(store, None)
412
map_two._root_node.set_maximum_size(20)
413
self.assertMapLayoutEqual(map_one, map_two)
414
map_one.map((b'aaa', ), b'value')
415
self.assertRaises(AssertionError,
416
self.assertMapLayoutEqual, map_one, map_two)
417
map_two.map((b'aaa', ), b'value')
418
self.assertMapLayoutEqual(map_one, map_two)
419
# Split the tree, so we ensure that internal nodes and leaf nodes are
421
map_one.map((b'aab', ), b'value')
422
self.assertIsInstance(map_one._root_node, InternalNode)
423
self.assertRaises(AssertionError,
424
self.assertMapLayoutEqual, map_one, map_two)
425
map_two.map((b'aab', ), b'value')
426
self.assertMapLayoutEqual(map_one, map_two)
427
map_one.map((b'aac', ), b'value')
428
self.assertRaises(AssertionError,
429
self.assertMapLayoutEqual, map_one, map_two)
430
self.assertCanonicalForm(map_one)
432
def test_from_dict_empty(self):
433
chk_bytes = self.get_chk_bytes()
434
root_key = CHKMap.from_dict(chk_bytes, {})
435
# Check the data was saved and inserted correctly.
436
expected_root_key = self.assertHasEmptyMap(chk_bytes)
437
self.assertEqual(expected_root_key, root_key)
439
def test_from_dict_ab(self):
440
chk_bytes = self.get_chk_bytes()
441
root_key = CHKMap.from_dict(chk_bytes, {(b"a", ): b"b"})
442
# Check the data was saved and inserted correctly.
443
expected_root_key = self.assertHasABMap(chk_bytes)
444
self.assertEqual(expected_root_key, root_key)
446
def test_apply_empty_ab(self):
447
# applying a delta (None, "a", "b") to an empty chkmap generates the
448
# same map as from_dict_ab.
449
chk_bytes = self.get_chk_bytes()
450
root_key = CHKMap.from_dict(chk_bytes, {})
451
chkmap = CHKMap(chk_bytes, root_key)
452
new_root = chkmap.apply_delta([(None, (b"a", ), b"b")])
453
# Check the data was saved and inserted correctly.
454
expected_root_key = self.assertHasABMap(chk_bytes)
455
self.assertEqual(expected_root_key, new_root)
456
# The update should have left us with an in memory root node, with an
458
self.assertEqual(new_root, chkmap._root_node._key)
460
def test_apply_ab_empty(self):
461
# applying a delta ("a", None, None) to a map with 'a' in it generates
463
chk_bytes = self.get_chk_bytes()
464
root_key = CHKMap.from_dict(chk_bytes, {(b"a",): b"b"})
465
chkmap = CHKMap(chk_bytes, root_key)
466
new_root = chkmap.apply_delta([((b"a",), None, None)])
467
# Check the data was saved and inserted correctly.
468
expected_root_key = self.assertHasEmptyMap(chk_bytes)
469
self.assertEqual(expected_root_key, new_root)
470
# The update should have left us with an in memory root node, with an
472
self.assertEqual(new_root, chkmap._root_node._key)
474
def test_apply_delete_to_internal_node(self):
475
# applying a delta should be convert an internal root node to a leaf
476
# node if the delta shrinks the map enough.
477
store = self.get_chk_bytes()
478
chkmap = CHKMap(store, None)
479
# Add three items: 2 small enough to fit in one node, and one huge to
480
# force multiple nodes.
481
chkmap._root_node.set_maximum_size(100)
482
chkmap.map((b'small',), b'value')
483
chkmap.map((b'little',), b'value')
484
chkmap.map((b'very-big',), b'x' * 100)
485
# (Check that we have constructed the scenario we want to test)
486
self.assertIsInstance(chkmap._root_node, InternalNode)
487
# Delete the huge item so that the map fits in one node again.
488
delta = [((b'very-big',), None, None)]
489
chkmap.apply_delta(delta)
490
self.assertCanonicalForm(chkmap)
491
self.assertIsInstance(chkmap._root_node, LeafNode)
493
def test_apply_new_keys_must_be_new(self):
494
# applying a delta (None, "a", "b") to a map with 'a' in it generates
496
chk_bytes = self.get_chk_bytes()
497
root_key = CHKMap.from_dict(chk_bytes, {(b"a",): b"b"})
498
chkmap = CHKMap(chk_bytes, root_key)
499
self.assertRaises(errors.InconsistentDelta, chkmap.apply_delta,
500
[(None, (b"a",), b"b")])
501
# As an error occured, the update should have left us without changing
502
# anything (the root should be unchanged).
503
self.assertEqual(root_key, chkmap._root_node._key)
505
def test_apply_delta_is_deterministic(self):
506
chk_bytes = self.get_chk_bytes()
507
chkmap1 = CHKMap(chk_bytes, None)
508
chkmap1._root_node.set_maximum_size(10)
509
chkmap1.apply_delta([(None, (b'aaa',), b'common'),
510
(None, (b'bba',), b'target2'),
511
(None, (b'bbb',), b'common')])
512
root_key1 = chkmap1._save()
513
self.assertCanonicalForm(chkmap1)
515
chkmap2 = CHKMap(chk_bytes, None)
516
chkmap2._root_node.set_maximum_size(10)
517
chkmap2.apply_delta([(None, (b'bbb',), b'common'),
518
(None, (b'bba',), b'target2'),
519
(None, (b'aaa',), b'common')])
520
root_key2 = chkmap2._save()
521
self.assertEqualDiff(chkmap1._dump_tree(include_keys=True),
522
chkmap2._dump_tree(include_keys=True))
523
self.assertEqual(root_key1, root_key2)
524
self.assertCanonicalForm(chkmap2)
526
def test_stable_splitting(self):
527
store = self.get_chk_bytes()
528
chkmap = CHKMap(store, None)
529
# Should fit 2 keys per LeafNode
530
chkmap._root_node.set_maximum_size(35)
531
chkmap.map((b'aaa',), b'v')
532
self.assertEqualDiff("'' LeafNode\n"
535
chkmap.map((b'aab',), b'v')
536
self.assertEqualDiff("'' LeafNode\n"
540
self.assertCanonicalForm(chkmap)
542
# Creates a new internal node, and splits the others into leaves
543
chkmap.map((b'aac',), b'v')
544
self.assertEqualDiff("'' InternalNode\n"
552
self.assertCanonicalForm(chkmap)
554
# Splits again, because it can't fit in the current structure
555
chkmap.map((b'bbb',), b'v')
556
self.assertEqualDiff("'' InternalNode\n"
557
" 'a' InternalNode\n"
567
self.assertCanonicalForm(chkmap)
569
def test_map_splits_with_longer_key(self):
570
store = self.get_chk_bytes()
571
chkmap = CHKMap(store, None)
572
# Should fit 1 key per LeafNode
573
chkmap._root_node.set_maximum_size(10)
574
chkmap.map((b'aaa',), b'v')
575
chkmap.map((b'aaaa',), b'v')
576
self.assertCanonicalForm(chkmap)
577
self.assertIsInstance(chkmap._root_node, InternalNode)
579
def test_with_linefeed_in_key(self):
580
store = self.get_chk_bytes()
581
chkmap = CHKMap(store, None)
582
# Should fit 1 key per LeafNode
583
chkmap._root_node.set_maximum_size(10)
584
chkmap.map((b'a\ra',), b'val1')
585
chkmap.map((b'a\rb',), b'val2')
586
chkmap.map((b'ac',), b'val3')
587
self.assertCanonicalForm(chkmap)
588
self.assertEqualDiff("'' InternalNode\n"
589
" 'a\\r' InternalNode\n"
590
" 'a\\ra' LeafNode\n"
591
" ('a\\ra',) 'val1'\n"
592
" 'a\\rb' LeafNode\n"
593
" ('a\\rb',) 'val2'\n"
597
# We should also successfully serialise and deserialise these items
598
root_key = chkmap._save()
599
chkmap = CHKMap(store, root_key)
600
self.assertEqualDiff("'' InternalNode\n"
601
" 'a\\r' InternalNode\n"
602
" 'a\\ra' LeafNode\n"
603
" ('a\\ra',) 'val1'\n"
604
" 'a\\rb' LeafNode\n"
605
" ('a\\rb',) 'val2'\n"
610
def test_deep_splitting(self):
611
store = self.get_chk_bytes()
612
chkmap = CHKMap(store, None)
613
# Should fit 2 keys per LeafNode
614
chkmap._root_node.set_maximum_size(40)
615
chkmap.map((b'aaaaaaaa',), b'v')
616
chkmap.map((b'aaaaabaa',), b'v')
617
self.assertEqualDiff("'' LeafNode\n"
618
" ('aaaaaaaa',) 'v'\n"
619
" ('aaaaabaa',) 'v'\n",
621
chkmap.map((b'aaabaaaa',), b'v')
622
chkmap.map((b'aaababaa',), b'v')
623
self.assertEqualDiff("'' InternalNode\n"
625
" ('aaaaaaaa',) 'v'\n"
626
" ('aaaaabaa',) 'v'\n"
628
" ('aaabaaaa',) 'v'\n"
629
" ('aaababaa',) 'v'\n",
631
chkmap.map((b'aaabacaa',), b'v')
632
chkmap.map((b'aaabadaa',), b'v')
633
self.assertEqualDiff("'' InternalNode\n"
635
" ('aaaaaaaa',) 'v'\n"
636
" ('aaaaabaa',) 'v'\n"
637
" 'aaab' InternalNode\n"
638
" 'aaabaa' LeafNode\n"
639
" ('aaabaaaa',) 'v'\n"
640
" 'aaabab' LeafNode\n"
641
" ('aaababaa',) 'v'\n"
642
" 'aaabac' LeafNode\n"
643
" ('aaabacaa',) 'v'\n"
644
" 'aaabad' LeafNode\n"
645
" ('aaabadaa',) 'v'\n",
647
chkmap.map((b'aaababba',), b'val')
648
chkmap.map((b'aaababca',), b'val')
649
self.assertEqualDiff("'' InternalNode\n"
651
" ('aaaaaaaa',) 'v'\n"
652
" ('aaaaabaa',) 'v'\n"
653
" 'aaab' InternalNode\n"
654
" 'aaabaa' LeafNode\n"
655
" ('aaabaaaa',) 'v'\n"
656
" 'aaabab' InternalNode\n"
657
" 'aaababa' LeafNode\n"
658
" ('aaababaa',) 'v'\n"
659
" 'aaababb' LeafNode\n"
660
" ('aaababba',) 'val'\n"
661
" 'aaababc' LeafNode\n"
662
" ('aaababca',) 'val'\n"
663
" 'aaabac' LeafNode\n"
664
" ('aaabacaa',) 'v'\n"
665
" 'aaabad' LeafNode\n"
666
" ('aaabadaa',) 'v'\n",
668
# Now we add a node that should fit around an existing InternalNode,
669
# but has a slightly different key prefix, which causes a new
671
chkmap.map((b'aaabDaaa',), b'v')
672
self.assertEqualDiff("'' InternalNode\n"
674
" ('aaaaaaaa',) 'v'\n"
675
" ('aaaaabaa',) 'v'\n"
676
" 'aaab' InternalNode\n"
677
" 'aaabD' LeafNode\n"
678
" ('aaabDaaa',) 'v'\n"
679
" 'aaaba' InternalNode\n"
680
" 'aaabaa' LeafNode\n"
681
" ('aaabaaaa',) 'v'\n"
682
" 'aaabab' InternalNode\n"
683
" 'aaababa' LeafNode\n"
684
" ('aaababaa',) 'v'\n"
685
" 'aaababb' LeafNode\n"
686
" ('aaababba',) 'val'\n"
687
" 'aaababc' LeafNode\n"
688
" ('aaababca',) 'val'\n"
689
" 'aaabac' LeafNode\n"
690
" ('aaabacaa',) 'v'\n"
691
" 'aaabad' LeafNode\n"
692
" ('aaabadaa',) 'v'\n",
695
def test_map_collapses_if_size_changes(self):
696
store = self.get_chk_bytes()
697
chkmap = CHKMap(store, None)
698
# Should fit 2 keys per LeafNode
699
chkmap._root_node.set_maximum_size(35)
700
chkmap.map((b'aaa',), b'v')
701
chkmap.map((b'aab',), b'very long value that splits')
702
self.assertEqualDiff("'' InternalNode\n"
706
" ('aab',) 'very long value that splits'\n",
708
self.assertCanonicalForm(chkmap)
709
# Now changing the value to something small should cause a rebuild
710
chkmap.map((b'aab',), b'v')
711
self.assertEqualDiff("'' LeafNode\n"
715
self.assertCanonicalForm(chkmap)
717
def test_map_double_deep_collapses(self):
718
store = self.get_chk_bytes()
719
chkmap = CHKMap(store, None)
720
# Should fit 3 small keys per LeafNode
721
chkmap._root_node.set_maximum_size(40)
722
chkmap.map((b'aaa',), b'v')
723
chkmap.map((b'aab',), b'very long value that splits')
724
chkmap.map((b'abc',), b'v')
725
self.assertEqualDiff("'' InternalNode\n"
726
" 'aa' InternalNode\n"
730
" ('aab',) 'very long value that splits'\n"
734
chkmap.map((b'aab',), b'v')
735
self.assertCanonicalForm(chkmap)
736
self.assertEqualDiff("'' LeafNode\n"
742
def test_stable_unmap(self):
743
store = self.get_chk_bytes()
744
chkmap = CHKMap(store, None)
745
# Should fit 2 keys per LeafNode
746
chkmap._root_node.set_maximum_size(35)
747
chkmap.map((b'aaa',), b'v')
748
chkmap.map((b'aab',), b'v')
749
self.assertEqualDiff("'' LeafNode\n"
753
# Creates a new internal node, and splits the others into leaves
754
chkmap.map((b'aac',), b'v')
755
self.assertEqualDiff("'' InternalNode\n"
763
self.assertCanonicalForm(chkmap)
764
# Now lets unmap one of the keys, and assert that we collapse the
766
chkmap.unmap((b'aac',))
767
self.assertEqualDiff("'' LeafNode\n"
771
self.assertCanonicalForm(chkmap)
773
def test_unmap_double_deep(self):
774
store = self.get_chk_bytes()
775
chkmap = CHKMap(store, None)
776
# Should fit 3 keys per LeafNode
777
chkmap._root_node.set_maximum_size(40)
778
chkmap.map((b'aaa',), b'v')
779
chkmap.map((b'aaab',), b'v')
780
chkmap.map((b'aab',), b'very long value')
781
chkmap.map((b'abc',), b'v')
782
self.assertEqualDiff("'' InternalNode\n"
783
" 'aa' InternalNode\n"
788
" ('aab',) 'very long value'\n"
792
# Removing the 'aab' key should cause everything to collapse back to a
794
chkmap.unmap((b'aab',))
795
self.assertEqualDiff("'' LeafNode\n"
801
def test_unmap_double_deep_non_empty_leaf(self):
802
store = self.get_chk_bytes()
803
chkmap = CHKMap(store, None)
804
# Should fit 3 keys per LeafNode
805
chkmap._root_node.set_maximum_size(40)
806
chkmap.map((b'aaa',), b'v')
807
chkmap.map((b'aab',), b'long value')
808
chkmap.map((b'aabb',), b'v')
809
chkmap.map((b'abc',), b'v')
810
self.assertEqualDiff("'' InternalNode\n"
811
" 'aa' InternalNode\n"
815
" ('aab',) 'long value'\n"
820
# Removing the 'aab' key should cause everything to collapse back to a
822
chkmap.unmap((b'aab',))
823
self.assertEqualDiff("'' LeafNode\n"
829
def test_unmap_with_known_internal_node_doesnt_page(self):
830
store = self.get_chk_bytes()
831
chkmap = CHKMap(store, None)
832
# Should fit 3 keys per LeafNode
833
chkmap._root_node.set_maximum_size(30)
834
chkmap.map((b'aaa',), b'v')
835
chkmap.map((b'aab',), b'v')
836
chkmap.map((b'aac',), b'v')
837
chkmap.map((b'abc',), b'v')
838
chkmap.map((b'acd',), b'v')
839
self.assertEqualDiff("'' InternalNode\n"
840
" 'aa' InternalNode\n"
852
# Save everything to the map, and start over
853
chkmap = CHKMap(store, chkmap._save())
854
# Mapping an 'aa' key loads the internal node, but should not map the
855
# 'ab' and 'ac' nodes
856
chkmap.map((b'aad',), b'v')
857
self.assertIsInstance(chkmap._root_node._items[b'aa'], InternalNode)
858
self.assertIsInstance(chkmap._root_node._items[b'ab'], StaticTuple)
859
self.assertIsInstance(chkmap._root_node._items[b'ac'], StaticTuple)
860
# Unmapping 'acd' can notice that 'aa' is an InternalNode and not have
862
chkmap.unmap((b'acd',))
863
self.assertIsInstance(chkmap._root_node._items[b'aa'], InternalNode)
864
self.assertIsInstance(chkmap._root_node._items[b'ab'], StaticTuple)
866
def test_unmap_without_fitting_doesnt_page_in(self):
867
store = self.get_chk_bytes()
868
chkmap = CHKMap(store, None)
869
# Should fit 2 keys per LeafNode
870
chkmap._root_node.set_maximum_size(20)
871
chkmap.map((b'aaa',), b'v')
872
chkmap.map((b'aab',), b'v')
873
self.assertEqualDiff("'' InternalNode\n"
879
# Save everything to the map, and start over
880
chkmap = CHKMap(store, chkmap._save())
881
chkmap.map((b'aac',), b'v')
882
chkmap.map((b'aad',), b'v')
883
chkmap.map((b'aae',), b'v')
884
chkmap.map((b'aaf',), b'v')
885
# At this point, the previous nodes should not be paged in, but the
886
# newly added nodes would be
887
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
888
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
889
self.assertIsInstance(chkmap._root_node._items[b'aac'], LeafNode)
890
self.assertIsInstance(chkmap._root_node._items[b'aad'], LeafNode)
891
self.assertIsInstance(chkmap._root_node._items[b'aae'], LeafNode)
892
self.assertIsInstance(chkmap._root_node._items[b'aaf'], LeafNode)
893
# Now unmapping one of the new nodes will use only the already-paged-in
894
# nodes to determine that we don't need to do more.
895
chkmap.unmap((b'aaf',))
896
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
897
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
898
self.assertIsInstance(chkmap._root_node._items[b'aac'], LeafNode)
899
self.assertIsInstance(chkmap._root_node._items[b'aad'], LeafNode)
900
self.assertIsInstance(chkmap._root_node._items[b'aae'], LeafNode)
902
def test_unmap_pages_in_if_necessary(self):
903
store = self.get_chk_bytes()
904
chkmap = CHKMap(store, None)
905
# Should fit 2 keys per LeafNode
906
chkmap._root_node.set_maximum_size(30)
907
chkmap.map((b'aaa',), b'val')
908
chkmap.map((b'aab',), b'val')
909
chkmap.map((b'aac',), b'val')
910
self.assertEqualDiff("'' InternalNode\n"
918
root_key = chkmap._save()
919
# Save everything to the map, and start over
920
chkmap = CHKMap(store, root_key)
921
chkmap.map((b'aad',), b'v')
922
# At this point, the previous nodes should not be paged in, but the
923
# newly added node would be
924
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
925
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
926
self.assertIsInstance(chkmap._root_node._items[b'aac'], StaticTuple)
927
self.assertIsInstance(chkmap._root_node._items[b'aad'], LeafNode)
928
# Unmapping the new node will check the existing nodes to see if they
930
# Clear the page cache so we ensure we have to read all the children
931
chk_map.clear_cache()
932
chkmap.unmap((b'aad',))
933
self.assertIsInstance(chkmap._root_node._items[b'aaa'], LeafNode)
934
self.assertIsInstance(chkmap._root_node._items[b'aab'], LeafNode)
935
self.assertIsInstance(chkmap._root_node._items[b'aac'], LeafNode)
937
def test_unmap_pages_in_from_page_cache(self):
938
store = self.get_chk_bytes()
939
chkmap = CHKMap(store, None)
940
# Should fit 2 keys per LeafNode
941
chkmap._root_node.set_maximum_size(30)
942
chkmap.map((b'aaa',), b'val')
943
chkmap.map((b'aab',), b'val')
944
chkmap.map((b'aac',), b'val')
945
root_key = chkmap._save()
946
# Save everything to the map, and start over
947
chkmap = CHKMap(store, root_key)
948
chkmap.map((b'aad',), b'val')
949
self.assertEqualDiff("'' InternalNode\n"
959
# Save everything to the map, start over after _dump_tree
960
chkmap = CHKMap(store, root_key)
961
chkmap.map((b'aad',), b'v')
962
# At this point, the previous nodes should not be paged in, but the
963
# newly added node would be
964
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
965
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
966
self.assertIsInstance(chkmap._root_node._items[b'aac'], StaticTuple)
967
self.assertIsInstance(chkmap._root_node._items[b'aad'], LeafNode)
968
# Now clear the page cache, and only include 2 of the children in the
970
aab_key = chkmap._root_node._items[b'aab']
971
aab_bytes = chk_map._get_cache()[aab_key]
972
aac_key = chkmap._root_node._items[b'aac']
973
aac_bytes = chk_map._get_cache()[aac_key]
974
chk_map.clear_cache()
975
chk_map._get_cache()[aab_key] = aab_bytes
976
chk_map._get_cache()[aac_key] = aac_bytes
978
# Unmapping the new node will check the nodes from the page cache
979
# first, and not have to read in 'aaa'
980
chkmap.unmap((b'aad',))
981
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
982
self.assertIsInstance(chkmap._root_node._items[b'aab'], LeafNode)
983
self.assertIsInstance(chkmap._root_node._items[b'aac'], LeafNode)
985
def test_unmap_uses_existing_items(self):
986
store = self.get_chk_bytes()
987
chkmap = CHKMap(store, None)
988
# Should fit 2 keys per LeafNode
989
chkmap._root_node.set_maximum_size(30)
990
chkmap.map((b'aaa',), b'val')
991
chkmap.map((b'aab',), b'val')
992
chkmap.map((b'aac',), b'val')
993
root_key = chkmap._save()
994
# Save everything to the map, and start over
995
chkmap = CHKMap(store, root_key)
996
chkmap.map((b'aad',), b'val')
997
chkmap.map((b'aae',), b'val')
998
chkmap.map((b'aaf',), b'val')
999
# At this point, the previous nodes should not be paged in, but the
1000
# newly added node would be
1001
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
1002
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
1003
self.assertIsInstance(chkmap._root_node._items[b'aac'], StaticTuple)
1004
self.assertIsInstance(chkmap._root_node._items[b'aad'], LeafNode)
1005
self.assertIsInstance(chkmap._root_node._items[b'aae'], LeafNode)
1006
self.assertIsInstance(chkmap._root_node._items[b'aaf'], LeafNode)
1008
# Unmapping a new node will see the other nodes that are already in
1009
# memory, and not need to page in anything else
1010
chkmap.unmap((b'aad',))
1011
self.assertIsInstance(chkmap._root_node._items[b'aaa'], StaticTuple)
1012
self.assertIsInstance(chkmap._root_node._items[b'aab'], StaticTuple)
1013
self.assertIsInstance(chkmap._root_node._items[b'aac'], StaticTuple)
1014
self.assertIsInstance(chkmap._root_node._items[b'aae'], LeafNode)
1015
self.assertIsInstance(chkmap._root_node._items[b'aaf'], LeafNode)
1017
def test_iter_changes_empty_ab(self):
1018
# Asking for changes between an empty dict to a dict with keys returns
1020
basis = self._get_map({}, maximum_size=10)
1021
target = self._get_map(
1022
{(b'a',): b'content here', (b'b',): b'more content'},
1023
chk_bytes=basis._store, maximum_size=10)
1024
self.assertEqual([((b'a',), None, b'content here'),
1025
((b'b',), None, b'more content')],
1026
sorted(list(target.iter_changes(basis))))
1028
def test_iter_changes_ab_empty(self):
1029
# Asking for changes between a dict with keys to an empty dict returns
1031
basis = self._get_map({(b'a',): b'content here', (b'b',): b'more content'},
1033
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1034
self.assertEqual([((b'a',), b'content here', None),
1035
((b'b',), b'more content', None)],
1036
sorted(list(target.iter_changes(basis))))
1038
def test_iter_changes_empty_empty_is_empty(self):
1039
basis = self._get_map({}, maximum_size=10)
1040
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1041
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1043
def test_iter_changes_ab_ab_is_empty(self):
1044
basis = self._get_map({(b'a',): b'content here', (b'b',): b'more content'},
1046
target = self._get_map(
1047
{(b'a',): b'content here', (b'b',): b'more content'},
1048
chk_bytes=basis._store, maximum_size=10)
1049
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1051
def test_iter_changes_ab_ab_nodes_not_loaded(self):
1052
basis = self._get_map({(b'a',): b'content here', (b'b',): b'more content'},
1054
target = self._get_map(
1055
{(b'a',): b'content here', (b'b',): b'more content'},
1056
chk_bytes=basis._store, maximum_size=10)
1057
list(target.iter_changes(basis))
1058
self.assertIsInstance(target._root_node, StaticTuple)
1059
self.assertIsInstance(basis._root_node, StaticTuple)
1061
def test_iter_changes_ab_ab_changed_values_shown(self):
1062
basis = self._get_map({(b'a',): b'content here', (b'b',): b'more content'},
1064
target = self._get_map(
1065
{(b'a',): b'content here', (b'b',): b'different content'},
1066
chk_bytes=basis._store, maximum_size=10)
1067
result = sorted(list(target.iter_changes(basis)))
1068
self.assertEqual([((b'b',), b'more content', b'different content')],
1071
def test_iter_changes_mixed_node_length(self):
1072
# When one side has different node lengths than the other, common
1073
# but different keys still need to be show, and new-and-old included
1075
# aaa - common unaltered
1076
# aab - common altered
1080
# aaa to be not loaded (later test)
1081
# aab, b, at to be returned.
1082
# basis splits at byte 0,1,2, aaa is commonb is basis only
1083
basis_dict = {(b'aaa',): b'foo bar',
1084
(b'aab',): b'common altered a', (b'b',): b'foo bar b'}
1085
# target splits at byte 1,2, at is target only
1086
target_dict = {(b'aaa',): b'foo bar',
1087
(b'aab',): b'common altered b', (b'at',): b'foo bar t'}
1089
((b'aab',), b'common altered a', b'common altered b'),
1090
((b'at',), None, b'foo bar t'),
1091
((b'b',), b'foo bar b', None),
1093
basis = self._get_map(basis_dict, maximum_size=10)
1094
target = self._get_map(target_dict, maximum_size=10,
1095
chk_bytes=basis._store)
1096
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1098
def test_iter_changes_common_pages_not_loaded(self):
1099
# aaa - common unaltered
1100
# aab - common altered
1104
# aaa to be not loaded
1105
# aaa not to be in result.
1106
basis_dict = {(b'aaa',): b'foo bar',
1107
(b'aab',): b'common altered a', (b'b',): b'foo bar b'}
1108
# target splits at byte 1, at is target only
1109
target_dict = {(b'aaa',): b'foo bar',
1110
(b'aab',): b'common altered b', (b'at',): b'foo bar t'}
1111
basis = self._get_map(basis_dict, maximum_size=10)
1112
target = self._get_map(target_dict, maximum_size=10,
1113
chk_bytes=basis._store)
1114
basis_get = basis._store.get_record_stream
1116
def get_record_stream(keys, order, fulltext):
1117
if (b'sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
1118
raise AssertionError("'aaa' pointer was followed %r" % keys)
1119
return basis_get(keys, order, fulltext)
1120
basis._store.get_record_stream = get_record_stream
1121
result = sorted(list(target.iter_changes(basis)))
1122
for change in result:
1123
if change[0] == (b'aaa',):
1124
self.fail("Found unexpected change: %s" % change)
1126
def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
1127
# Within a leaf there are no hash's to exclude keys, make sure multi
1128
# value leaf nodes are handled well.
1129
basis_dict = {(b'aaa',): b'foo bar',
1130
(b'aab',): b'common altered a', (b'b',): b'foo bar b'}
1131
target_dict = {(b'aaa',): b'foo bar',
1132
(b'aab',): b'common altered b', (b'at',): b'foo bar t'}
1134
((b'aab',), b'common altered a', b'common altered b'),
1135
((b'at',), None, b'foo bar t'),
1136
((b'b',), b'foo bar b', None),
1138
basis = self._get_map(basis_dict)
1139
target = self._get_map(target_dict, chk_bytes=basis._store)
1140
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1142
def test_iteritems_empty(self):
1143
chk_bytes = self.get_chk_bytes()
1144
root_key = CHKMap.from_dict(chk_bytes, {})
1145
chkmap = CHKMap(chk_bytes, root_key)
1146
self.assertEqual([], list(chkmap.iteritems()))
1148
def test_iteritems_two_items(self):
1149
chk_bytes = self.get_chk_bytes()
1150
root_key = CHKMap.from_dict(chk_bytes,
1151
{(b"a", ): b"content here", (b"b", ): b"more content"})
1152
chkmap = CHKMap(chk_bytes, root_key)
1153
self.assertEqual([((b"a",), b"content here"), ((b"b",), b"more content")],
1154
sorted(list(chkmap.iteritems())))
1156
def test_iteritems_selected_one_of_two_items(self):
1157
chkmap = self._get_map(
1158
{(b"a",): b"content here", (b"b",): b"more content"})
1159
self.assertEqual({(b"a",): b"content here"},
1160
self.to_dict(chkmap, [(b"a",)]))
1162
def test_iteritems_keys_prefixed_by_2_width_nodes(self):
1163
chkmap = self._get_map(
1164
{(b"a", b"a"): b"content here", (b"a", b"b",): b"more content",
1165
(b"b", b""): b'boring content'},
1166
maximum_size=10, key_width=2)
1168
{(b"a", b"a"): b"content here", (b"a", b"b"): b'more content'},
1169
self.to_dict(chkmap, [(b"a",)]))
1171
def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self):
1172
search_key_func = chk_map.search_key_registry.get(b'hash-16-way')
1173
self.assertEqual(b'E8B7BE43\x00E8B7BE43',
1174
search_key_func(StaticTuple(b'a', b'a')))
1175
self.assertEqual(b'E8B7BE43\x0071BEEFF9',
1176
search_key_func(StaticTuple(b'a', b'b')))
1177
self.assertEqual(b'71BEEFF9\x0000000000',
1178
search_key_func(StaticTuple(b'b', b'')))
1179
chkmap = self._get_map(
1180
{(b"a", b"a"): b"content here", (b"a", b"b",): b"more content",
1181
(b"b", b""): b'boring content'},
1182
maximum_size=10, key_width=2, search_key_func=search_key_func)
1184
{(b"a", b"a"): b"content here", (b"a", b"b"): b'more content'},
1185
self.to_dict(chkmap, [(b"a",)]))
1187
def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
1188
chkmap = self._get_map(
1189
{(b"a", b"a"): b"content here", (b"a", b"b",): b"more content",
1190
(b"b", b""): b'boring content'}, key_width=2)
1192
{(b"a", b"a"): b"content here", (b"a", b"b"): b'more content'},
1193
self.to_dict(chkmap, [(b"a",)]))
1195
def test___len__empty(self):
1196
chkmap = self._get_map({})
1197
self.assertEqual(0, len(chkmap))
1199
def test___len__2(self):
1200
chkmap = self._get_map({(b"foo",): b"bar", (b"gam",): b"quux"})
1201
self.assertEqual(2, len(chkmap))
1203
def test_max_size_100_bytes_new(self):
1204
# When there is a 100 byte upper node limit, a tree is formed.
1205
chkmap = self._get_map(
1206
{(b"k1" * 50,): b"v1", (b"k2" * 50,): b"v2"}, maximum_size=100)
1207
# We expect three nodes:
1208
# A root, with two children, and with two key prefixes - k1 to one, and
1209
# k2 to the other as our node splitting is only just being developed.
1210
# The maximum size should be embedded
1211
chkmap._ensure_root()
1212
self.assertEqual(100, chkmap._root_node.maximum_size)
1213
self.assertEqual(1, chkmap._root_node._key_width)
1214
# There should be two child nodes, and prefix of 2(bytes):
1215
self.assertEqual(2, len(chkmap._root_node._items))
1216
self.assertEqual(b"k", chkmap._root_node._compute_search_prefix())
1217
# The actual nodes pointed at will change as serialisers change; so
1218
# here we test that the key prefix is correct; then load the nodes and
1219
# check they have the right pointed at key; whether they have the
1220
# pointed at value inline or not is also unrelated to this test so we
1221
# don't check that in detail - rather we just check the aggregate
1223
nodes = sorted(chkmap._root_node._items.items())
1226
self.assertEqual(b'k1', ptr1[0])
1227
self.assertEqual(b'k2', ptr2[0])
1228
node1 = chk_map._deserialise(
1229
chkmap._read_bytes(ptr1[1]), ptr1[1], None)
1230
self.assertIsInstance(node1, LeafNode)
1231
self.assertEqual(1, len(node1))
1232
self.assertEqual({(b'k1' * 50,): b'v1'},
1233
self.to_dict(node1, chkmap._store))
1234
node2 = chk_map._deserialise(
1235
chkmap._read_bytes(ptr2[1]), ptr2[1], None)
1236
self.assertIsInstance(node2, LeafNode)
1237
self.assertEqual(1, len(node2))
1238
self.assertEqual({(b'k2' * 50,): b'v2'},
1239
self.to_dict(node2, chkmap._store))
1240
# Having checked we have a good structure, check that the content is
1242
self.assertEqual(2, len(chkmap))
1243
self.assertEqual({(b"k1" * 50,): b"v1", (b"k2" * 50,): b"v2"},
1244
self.to_dict(chkmap))
1246
def test_init_root_is_LeafNode_new(self):
1247
chk_bytes = self.get_chk_bytes()
1248
chkmap = CHKMap(chk_bytes, None)
1249
self.assertIsInstance(chkmap._root_node, LeafNode)
1250
self.assertEqual({}, self.to_dict(chkmap))
1251
self.assertEqual(0, len(chkmap))
1253
def test_init_and_save_new(self):
1254
chk_bytes = self.get_chk_bytes()
1255
chkmap = CHKMap(chk_bytes, None)
1256
key = chkmap._save()
1257
leaf_node = LeafNode()
1258
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1260
def test_map_first_item_new(self):
1261
chk_bytes = self.get_chk_bytes()
1262
chkmap = CHKMap(chk_bytes, None)
1263
chkmap.map((b"foo,",), b"bar")
1264
self.assertEqual({(b'foo,',): b'bar'}, self.to_dict(chkmap))
1265
self.assertEqual(1, len(chkmap))
1266
key = chkmap._save()
1267
leaf_node = LeafNode()
1268
leaf_node.map(chk_bytes, (b"foo,",), b"bar")
1269
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1271
def test_unmap_last_item_root_is_leaf_new(self):
1272
chkmap = self._get_map({(b"k1" * 50,): b"v1", (b"k2" * 50,): b"v2"})
1273
chkmap.unmap((b"k1" * 50,))
1274
chkmap.unmap((b"k2" * 50,))
1275
self.assertEqual(0, len(chkmap))
1276
self.assertEqual({}, self.to_dict(chkmap))
1277
key = chkmap._save()
1278
leaf_node = LeafNode()
1279
self.assertEqual([key], leaf_node.serialise(chkmap._store))
1281
def test__dump_tree(self):
1282
chkmap = self._get_map({(b"aaa",): b"value1", (b"aab",): b"value2",
1283
(b"bbb",): b"value3", },
1285
self.assertEqualDiff("'' InternalNode\n"
1286
" 'a' InternalNode\n"
1288
" ('aaa',) 'value1'\n"
1290
" ('aab',) 'value2'\n"
1292
" ('bbb',) 'value3'\n",
1293
chkmap._dump_tree())
1294
self.assertEqualDiff("'' InternalNode\n"
1295
" 'a' InternalNode\n"
1297
" ('aaa',) 'value1'\n"
1299
" ('aab',) 'value2'\n"
1301
" ('bbb',) 'value3'\n",
1302
chkmap._dump_tree())
1303
self.assertEqualDiff(
1304
"'' InternalNode sha1:0690d471eb0a624f359797d0ee4672bd68f4e236\n"
1305
" 'a' InternalNode sha1:1514c35503da9418d8fd90c1bed553077cb53673\n"
1306
" 'aaa' LeafNode sha1:4cc5970454d40b4ce297a7f13ddb76f63b88fefb\n"
1307
" ('aaa',) 'value1'\n"
1308
" 'aab' LeafNode sha1:1d68bc90914ef8a3edbcc8bb28b00cb4fea4b5e2\n"
1309
" ('aab',) 'value2'\n"
1310
" 'b' LeafNode sha1:3686831435b5596515353364eab0399dc45d49e7\n"
1311
" ('bbb',) 'value3'\n",
1312
chkmap._dump_tree(include_keys=True))
1314
def test__dump_tree_in_progress(self):
1315
chkmap = self._get_map({(b"aaa",): b"value1", (b"aab",): b"value2"},
1317
chkmap.map((b'bbb',), b'value3')
1318
self.assertEqualDiff("'' InternalNode\n"
1319
" 'a' InternalNode\n"
1321
" ('aaa',) 'value1'\n"
1323
" ('aab',) 'value2'\n"
1325
" ('bbb',) 'value3'\n",
1326
chkmap._dump_tree())
1327
# For things that are updated by adding 'bbb', we don't have a sha key
1328
# for them yet, so they are listed as None
1329
self.assertEqualDiff(
1330
"'' InternalNode None\n"
1331
" 'a' InternalNode sha1:6b0d881dd739a66f733c178b24da64395edfaafd\n"
1332
" 'aaa' LeafNode sha1:40b39a08d895babce17b20ae5f62d187eaa4f63a\n"
1333
" ('aaa',) 'value1'\n"
1334
" 'aab' LeafNode sha1:ad1dc7c4e801302c95bf1ba7b20bc45e548cd51a\n"
1335
" ('aab',) 'value2'\n"
1336
" 'b' LeafNode None\n"
1337
" ('bbb',) 'value3'\n",
1338
chkmap._dump_tree(include_keys=True))
1341
def _search_key_single(key):
1342
"""A search key function that maps all nodes to the same value"""
1346
def _test_search_key(key):
1347
return b'test:' + b'\x00'.join(key)
1350
class TestMapSearchKeys(TestCaseWithStore):
1352
def test_default_chk_map_uses_flat_search_key(self):
1353
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None)
1354
self.assertEqual(b'1',
1355
chkmap._search_key_func((b'1',)))
1356
self.assertEqual(b'1\x002',
1357
chkmap._search_key_func((b'1', b'2')))
1358
self.assertEqual(b'1\x002\x003',
1359
chkmap._search_key_func((b'1', b'2', b'3')))
1361
def test_search_key_is_passed_to_root_node(self):
1362
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1363
search_key_func=_test_search_key)
1364
self.assertIs(_test_search_key, chkmap._search_key_func)
1365
self.assertEqual(b'test:1\x002\x003',
1366
chkmap._search_key_func((b'1', b'2', b'3')))
1367
self.assertEqual(b'test:1\x002\x003',
1368
chkmap._root_node._search_key((b'1', b'2', b'3')))
1370
def test_search_key_passed_via__ensure_root(self):
1371
chk_bytes = self.get_chk_bytes()
1372
chkmap = chk_map.CHKMap(chk_bytes, None,
1373
search_key_func=_test_search_key)
1374
root_key = chkmap._save()
1375
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1376
search_key_func=_test_search_key)
1377
chkmap._ensure_root()
1378
self.assertEqual(b'test:1\x002\x003',
1379
chkmap._root_node._search_key((b'1', b'2', b'3')))
1381
def test_search_key_with_internal_node(self):
1382
chk_bytes = self.get_chk_bytes()
1383
chkmap = chk_map.CHKMap(chk_bytes, None,
1384
search_key_func=_test_search_key)
1385
chkmap._root_node.set_maximum_size(10)
1386
chkmap.map((b'1',), b'foo')
1387
chkmap.map((b'2',), b'bar')
1388
chkmap.map((b'3',), b'baz')
1389
self.assertEqualDiff("'' InternalNode\n"
1390
" 'test:1' LeafNode\n"
1392
" 'test:2' LeafNode\n"
1394
" 'test:3' LeafNode\n"
1395
" ('3',) 'baz'\n", chkmap._dump_tree())
1396
root_key = chkmap._save()
1397
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1398
search_key_func=_test_search_key)
1399
self.assertEqualDiff("'' InternalNode\n"
1400
" 'test:1' LeafNode\n"
1402
" 'test:2' LeafNode\n"
1404
" 'test:3' LeafNode\n"
1405
" ('3',) 'baz'\n", chkmap._dump_tree())
1407
def test_search_key_16(self):
1408
chk_bytes = self.get_chk_bytes()
1409
chkmap = chk_map.CHKMap(chk_bytes, None,
1410
search_key_func=chk_map._search_key_16)
1411
chkmap._root_node.set_maximum_size(10)
1412
chkmap.map((b'1',), b'foo')
1413
chkmap.map((b'2',), b'bar')
1414
chkmap.map((b'3',), b'baz')
1415
self.assertEqualDiff("'' InternalNode\n"
1421
" ('1',) 'foo'\n", chkmap._dump_tree())
1422
root_key = chkmap._save()
1423
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1424
search_key_func=chk_map._search_key_16)
1425
# We can get the values back correctly
1426
self.assertEqual([((b'1',), b'foo')],
1427
list(chkmap.iteritems([(b'1',)])))
1428
self.assertEqualDiff("'' InternalNode\n"
1434
" ('1',) 'foo'\n", chkmap._dump_tree())
1436
def test_search_key_255(self):
1437
chk_bytes = self.get_chk_bytes()
1438
chkmap = chk_map.CHKMap(chk_bytes, None,
1439
search_key_func=chk_map._search_key_255)
1440
chkmap._root_node.set_maximum_size(10)
1441
chkmap.map((b'1',), b'foo')
1442
chkmap.map((b'2',), b'bar')
1443
chkmap.map((b'3',), b'baz')
1444
self.assertEqualDiff("'' InternalNode\n"
1445
" '\\x1a' LeafNode\n"
1449
" '\\x83' LeafNode\n"
1450
" ('1',) 'foo'\n", chkmap._dump_tree(encoding='latin1'))
1451
root_key = chkmap._save()
1452
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1453
search_key_func=chk_map._search_key_255)
1454
# We can get the values back correctly
1455
self.assertEqual([((b'1',), b'foo')],
1456
list(chkmap.iteritems([(b'1',)])))
1457
self.assertEqualDiff("'' InternalNode\n"
1458
" '\\x1a' LeafNode\n"
1462
" '\\x83' LeafNode\n"
1463
" ('1',) 'foo'\n", chkmap._dump_tree(encoding='latin1'))
1465
def test_search_key_collisions(self):
1466
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1467
search_key_func=_search_key_single)
1468
# The node will want to expand, but it cannot, because it knows that
1469
# all the keys must map to this node
1470
chkmap._root_node.set_maximum_size(20)
1471
chkmap.map((b'1',), b'foo')
1472
chkmap.map((b'2',), b'bar')
1473
chkmap.map((b'3',), b'baz')
1474
self.assertEqualDiff("'' LeafNode\n"
1477
" ('3',) 'baz'\n", chkmap._dump_tree())
1480
class TestLeafNode(TestCaseWithStore):
1482
def test_current_size_empty(self):
1484
self.assertEqual(16, node._current_size())
1486
def test_current_size_size_changed(self):
1488
node.set_maximum_size(10)
1489
self.assertEqual(17, node._current_size())
1491
def test_current_size_width_changed(self):
1493
node._key_width = 10
1494
self.assertEqual(17, node._current_size())
1496
def test_current_size_items(self):
1498
base_size = node._current_size()
1499
node.map(None, (b"foo bar",), b"baz")
1500
self.assertEqual(base_size + 14, node._current_size())
1502
def test_deserialise_empty(self):
1503
node = LeafNode.deserialise(b"chkleaf:\n10\n1\n0\n\n", (b"sha1:1234",))
1504
self.assertEqual(0, len(node))
1505
self.assertEqual(10, node.maximum_size)
1506
self.assertEqual((b"sha1:1234",), node.key())
1507
self.assertIs(None, node._search_prefix)
1508
self.assertIs(None, node._common_serialised_prefix)
1510
def test_deserialise_items(self):
1511
node = LeafNode.deserialise(
1512
b"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1514
self.assertEqual(2, len(node))
1515
self.assertEqual([((b"foo bar",), b"baz"), ((b"quux",), b"blarh")],
1516
sorted(node.iteritems(None)))
1518
def test_deserialise_item_with_null_width_1(self):
1519
node = LeafNode.deserialise(
1520
b"chkleaf:\n0\n1\n2\n\nfoo\x001\nbar\x00baz\nquux\x001\nblarh\n",
1522
self.assertEqual(2, len(node))
1523
self.assertEqual([((b"foo",), b"bar\x00baz"), ((b"quux",), b"blarh")],
1524
sorted(node.iteritems(None)))
1526
def test_deserialise_item_with_null_width_2(self):
1527
node = LeafNode.deserialise(
1528
b"chkleaf:\n0\n2\n2\n\nfoo\x001\x001\nbar\x00baz\n"
1529
b"quux\x00\x001\nblarh\n",
1531
self.assertEqual(2, len(node))
1532
self.assertEqual([((b"foo", b"1"), b"bar\x00baz"), ((b"quux", b""), b"blarh")],
1533
sorted(node.iteritems(None)))
1535
def test_iteritems_selected_one_of_two_items(self):
1536
node = LeafNode.deserialise(
1537
b"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1539
self.assertEqual(2, len(node))
1540
self.assertEqual([((b"quux",), b"blarh")],
1541
sorted(node.iteritems(None, [(b"quux",), (b"qaz",)])))
1543
def test_deserialise_item_with_common_prefix(self):
1544
node = LeafNode.deserialise(
1545
b"chkleaf:\n0\n2\n2\nfoo\x00\n1\x001\nbar\x00baz\n2\x001\nblarh\n",
1547
self.assertEqual(2, len(node))
1548
self.assertEqual([((b"foo", b"1"), b"bar\x00baz"), ((b"foo", b"2"), b"blarh")],
1549
sorted(node.iteritems(None)))
1550
self.assertIs(chk_map._unknown, node._search_prefix)
1551
self.assertEqual(b'foo\x00', node._common_serialised_prefix)
1553
def test_deserialise_multi_line(self):
1554
node = LeafNode.deserialise(
1555
b"chkleaf:\n0\n2\n2\nfoo\x00\n1\x002\nbar\nbaz\n2\x002\nblarh\n\n",
1557
self.assertEqual(2, len(node))
1558
self.assertEqual([((b"foo", b"1"), b"bar\nbaz"),
1559
((b"foo", b"2"), b"blarh\n"),
1560
], sorted(node.iteritems(None)))
1561
self.assertIs(chk_map._unknown, node._search_prefix)
1562
self.assertEqual(b'foo\x00', node._common_serialised_prefix)
1564
def test_key_new(self):
1566
self.assertEqual(None, node.key())
1568
def test_key_after_map(self):
1569
node = LeafNode.deserialise(b"chkleaf:\n10\n1\n0\n\n", (b"sha1:1234",))
1570
node.map(None, (b"foo bar",), b"baz quux")
1571
self.assertEqual(None, node.key())
1573
def test_key_after_unmap(self):
1574
node = LeafNode.deserialise(
1575
b"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1577
node.unmap(None, (b"foo bar",))
1578
self.assertEqual(None, node.key())
1580
def test_map_exceeding_max_size_only_entry_new(self):
1582
node.set_maximum_size(10)
1583
result = node.map(None, (b"foo bar",), b"baz quux")
1584
self.assertEqual((b"foo bar", [(b"", node)]), result)
1585
self.assertTrue(10 < node._current_size())
1587
def test_map_exceeding_max_size_second_entry_early_difference_new(self):
1589
node.set_maximum_size(10)
1590
node.map(None, (b"foo bar",), b"baz quux")
1591
prefix, result = list(node.map(None, (b"blue",), b"red"))
1592
self.assertEqual(b"", prefix)
1593
self.assertEqual(2, len(result))
1594
split_chars = {result[0][0], result[1][0]}
1595
self.assertEqual({b"f", b"b"}, split_chars)
1596
nodes = dict(result)
1598
self.assertEqual({(b"foo bar",): b"baz quux"},
1599
self.to_dict(node, None))
1600
self.assertEqual(10, node.maximum_size)
1601
self.assertEqual(1, node._key_width)
1603
self.assertEqual({(b"blue",): b"red"}, self.to_dict(node, None))
1604
self.assertEqual(10, node.maximum_size)
1605
self.assertEqual(1, node._key_width)
1607
def test_map_first(self):
1609
result = node.map(None, (b"foo bar",), b"baz quux")
1610
self.assertEqual((b"foo bar", [(b"", node)]), result)
1611
self.assertEqual({(b"foo bar",): b"baz quux"},
1612
self.to_dict(node, None))
1613
self.assertEqual(1, len(node))
1615
def test_map_second(self):
1617
node.map(None, (b"foo bar",), b"baz quux")
1618
result = node.map(None, (b"bingo",), b"bango")
1619
self.assertEqual((b"", [(b"", node)]), result)
1620
self.assertEqual({(b"foo bar",): b"baz quux", (b"bingo",): b"bango"},
1621
self.to_dict(node, None))
1622
self.assertEqual(2, len(node))
1624
def test_map_replacement(self):
1626
node.map(None, (b"foo bar",), b"baz quux")
1627
result = node.map(None, (b"foo bar",), b"bango")
1628
self.assertEqual((b"foo bar", [(b"", node)]), result)
1629
self.assertEqual({(b"foo bar",): b"bango"},
1630
self.to_dict(node, None))
1631
self.assertEqual(1, len(node))
1633
def test_serialise_empty(self):
1634
store = self.get_chk_bytes()
1636
node.set_maximum_size(10)
1637
expected_key = (b"sha1:f34c3f0634ea3f85953dffa887620c0a5b1f4a51",)
1638
self.assertEqual([expected_key],
1639
list(node.serialise(store)))
1640
self.assertEqual(b"chkleaf:\n10\n1\n0\n\n",
1641
self.read_bytes(store, expected_key))
1642
self.assertEqual(expected_key, node.key())
1644
def test_serialise_items(self):
1645
store = self.get_chk_bytes()
1647
node.set_maximum_size(10)
1648
node.map(None, (b"foo bar",), b"baz quux")
1649
expected_key = (b"sha1:f89fac7edfc6bdb1b1b54a556012ff0c646ef5e0",)
1650
self.assertEqual(b'foo bar', node._common_serialised_prefix)
1651
self.assertEqual([expected_key],
1652
list(node.serialise(store)))
1653
self.assertEqual(b"chkleaf:\n10\n1\n1\nfoo bar\n\x001\nbaz quux\n",
1654
self.read_bytes(store, expected_key))
1655
self.assertEqual(expected_key, node.key())
1657
def test_unique_serialised_prefix_empty_new(self):
1659
self.assertIs(None, node._compute_search_prefix())
1661
def test_unique_serialised_prefix_one_item_new(self):
1663
node.map(None, (b"foo bar", b"baz"), b"baz quux")
1664
self.assertEqual(b"foo bar\x00baz", node._compute_search_prefix())
1666
def test_unmap_missing(self):
1668
self.assertRaises(KeyError, node.unmap, None, (b"foo bar",))
1670
def test_unmap_present(self):
1672
node.map(None, (b"foo bar",), b"baz quux")
1673
result = node.unmap(None, (b"foo bar",))
1674
self.assertEqual(node, result)
1675
self.assertEqual({}, self.to_dict(node, None))
1676
self.assertEqual(0, len(node))
1678
def test_map_maintains_common_prefixes(self):
1681
node.map(None, (b"foo bar", b"baz"), b"baz quux")
1682
self.assertEqual(b'foo bar\x00baz', node._search_prefix)
1683
self.assertEqual(b'foo bar\x00baz', node._common_serialised_prefix)
1684
node.map(None, (b"foo bar", b"bing"), b"baz quux")
1685
self.assertEqual(b'foo bar\x00b', node._search_prefix)
1686
self.assertEqual(b'foo bar\x00b', node._common_serialised_prefix)
1687
node.map(None, (b"fool", b"baby"), b"baz quux")
1688
self.assertEqual(b'foo', node._search_prefix)
1689
self.assertEqual(b'foo', node._common_serialised_prefix)
1690
node.map(None, (b"foo bar", b"baz"), b"replaced")
1691
self.assertEqual(b'foo', node._search_prefix)
1692
self.assertEqual(b'foo', node._common_serialised_prefix)
1693
node.map(None, (b"very", b"different"), b"value")
1694
self.assertEqual(b'', node._search_prefix)
1695
self.assertEqual(b'', node._common_serialised_prefix)
1697
def test_unmap_maintains_common_prefixes(self):
1700
node.map(None, (b"foo bar", b"baz"), b"baz quux")
1701
node.map(None, (b"foo bar", b"bing"), b"baz quux")
1702
node.map(None, (b"fool", b"baby"), b"baz quux")
1703
node.map(None, (b"very", b"different"), b"value")
1704
self.assertEqual(b'', node._search_prefix)
1705
self.assertEqual(b'', node._common_serialised_prefix)
1706
node.unmap(None, (b"very", b"different"))
1707
self.assertEqual(b"foo", node._search_prefix)
1708
self.assertEqual(b"foo", node._common_serialised_prefix)
1709
node.unmap(None, (b"fool", b"baby"))
1710
self.assertEqual(b'foo bar\x00b', node._search_prefix)
1711
self.assertEqual(b'foo bar\x00b', node._common_serialised_prefix)
1712
node.unmap(None, (b"foo bar", b"baz"))
1713
self.assertEqual(b'foo bar\x00bing', node._search_prefix)
1714
self.assertEqual(b'foo bar\x00bing', node._common_serialised_prefix)
1715
node.unmap(None, (b"foo bar", b"bing"))
1716
self.assertEqual(None, node._search_prefix)
1717
self.assertEqual(None, node._common_serialised_prefix)
1720
class TestInternalNode(TestCaseWithStore):
1722
def test_add_node_empty_new(self):
1723
node = InternalNode(b'fo')
1725
child.set_maximum_size(100)
1726
child.map(None, (b"foo",), b"bar")
1727
node.add_node(b"foo", child)
1728
# Note that node isn't strictly valid now as a tree (only one child),
1729
# but thats ok for this test.
1730
# The first child defines the node's width:
1731
self.assertEqual(3, node._node_width)
1732
# We should be able to iterate over the contents without doing IO.
1733
self.assertEqual({(b'foo',): b'bar'}, self.to_dict(node, None))
1734
# The length should be known:
1735
self.assertEqual(1, len(node))
1736
# serialising the node should serialise the child and the node.
1737
chk_bytes = self.get_chk_bytes()
1738
keys = list(node.serialise(chk_bytes))
1739
child_key = child.serialise(chk_bytes)[0]
1741
[child_key, (b'sha1:cf67e9997d8228a907c1f5bfb25a8bd9cd916fac',)],
1743
# We should be able to access deserialised content.
1744
bytes = self.read_bytes(chk_bytes, keys[1])
1745
node = chk_map._deserialise(bytes, keys[1], None)
1746
self.assertEqual(1, len(node))
1747
self.assertEqual({(b'foo',): b'bar'}, self.to_dict(node, chk_bytes))
1748
self.assertEqual(3, node._node_width)
1750
def test_add_node_resets_key_new(self):
1751
node = InternalNode(b'fo')
1753
child.set_maximum_size(100)
1754
child.map(None, (b"foo",), b"bar")
1755
node.add_node(b"foo", child)
1756
chk_bytes = self.get_chk_bytes()
1757
keys = list(node.serialise(chk_bytes))
1758
self.assertEqual(keys[1], node._key)
1759
node.add_node(b"fos", child)
1760
self.assertEqual(None, node._key)
1762
# def test_add_node_empty_oversized_one_ok_new(self):
1763
# def test_add_node_one_oversized_second_kept_minimum_fan(self):
1764
# def test_add_node_two_oversized_third_kept_minimum_fan(self):
1765
# def test_add_node_one_oversized_second_splits_errors(self):
1767
def test__iter_nodes_no_key_filter(self):
1768
node = InternalNode(b'')
1770
child.set_maximum_size(100)
1771
child.map(None, (b"foo",), b"bar")
1772
node.add_node(b"f", child)
1774
child.set_maximum_size(100)
1775
child.map(None, (b"bar",), b"baz")
1776
node.add_node(b"b", child)
1778
for child, node_key_filter in node._iter_nodes(None, key_filter=None):
1779
self.assertEqual(None, node_key_filter)
1781
def test__iter_nodes_splits_key_filter(self):
1782
node = InternalNode(b'')
1784
child.set_maximum_size(100)
1785
child.map(None, (b"foo",), b"bar")
1786
node.add_node(b"f", child)
1788
child.set_maximum_size(100)
1789
child.map(None, (b"bar",), b"baz")
1790
node.add_node(b"b", child)
1792
# foo and bar both match exactly one leaf node, but 'cat' should not
1793
# match any, and should not be placed in one.
1794
key_filter = ((b'foo',), (b'bar',), (b'cat',))
1795
for child, node_key_filter in node._iter_nodes(None,
1796
key_filter=key_filter):
1797
# each child could only match one key filter, so make sure it was
1799
self.assertEqual(1, len(node_key_filter))
1801
def test__iter_nodes_with_multiple_matches(self):
1802
node = InternalNode(b'')
1804
child.set_maximum_size(100)
1805
child.map(None, (b"foo",), b"val")
1806
child.map(None, (b"fob",), b"val")
1807
node.add_node(b"f", child)
1809
child.set_maximum_size(100)
1810
child.map(None, (b"bar",), b"val")
1811
child.map(None, (b"baz",), b"val")
1812
node.add_node(b"b", child)
1814
# Note that 'ram' doesn't match anything, so it should be freely
1816
key_filter = ((b'foo',), (b'fob',), (b'bar',), (b'baz',), (b'ram',))
1817
for child, node_key_filter in node._iter_nodes(None,
1818
key_filter=key_filter):
1819
# each child could match two key filters, so make sure they were
1821
self.assertEqual(2, len(node_key_filter))
1823
def make_fo_fa_node(self):
1824
node = InternalNode(b'f')
1826
child.set_maximum_size(100)
1827
child.map(None, (b"foo",), b"val")
1828
child.map(None, (b"fob",), b"val")
1829
node.add_node(b'fo', child)
1831
child.set_maximum_size(100)
1832
child.map(None, (b"far",), b"val")
1833
child.map(None, (b"faz",), b"val")
1834
node.add_node(b"fa", child)
1837
def test__iter_nodes_single_entry(self):
1838
node = self.make_fo_fa_node()
1839
key_filter = [(b'foo',)]
1840
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1841
self.assertEqual(1, len(nodes))
1842
self.assertEqual(key_filter, nodes[0][1])
1844
def test__iter_nodes_single_entry_misses(self):
1845
node = self.make_fo_fa_node()
1846
key_filter = [(b'bar',)]
1847
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1848
self.assertEqual(0, len(nodes))
1850
def test__iter_nodes_mixed_key_width(self):
1851
node = self.make_fo_fa_node()
1852
key_filter = [(b'foo', b'bar'), (b'foo',), (b'fo',), (b'b',)]
1853
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1854
self.assertEqual(1, len(nodes))
1855
matches = key_filter[:]
1856
matches.remove((b'b',))
1857
self.assertEqual(sorted(matches), sorted(nodes[0][1]))
1859
def test__iter_nodes_match_all(self):
1860
node = self.make_fo_fa_node()
1861
key_filter = [(b'foo', b'bar'), (b'foo',), (b'fo',), (b'f',)]
1862
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1863
self.assertEqual(2, len(nodes))
1865
def test__iter_nodes_fixed_widths_and_misses(self):
1866
node = self.make_fo_fa_node()
1867
# foo and faa should both match one child, baz should miss
1868
key_filter = [(b'foo',), (b'faa',), (b'baz',)]
1869
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1870
self.assertEqual(2, len(nodes))
1871
for node, matches in nodes:
1872
self.assertEqual(1, len(matches))
1874
def test_iteritems_empty_new(self):
1875
node = InternalNode()
1876
self.assertEqual([], sorted(node.iteritems(None)))
1878
def test_iteritems_two_children(self):
1879
node = InternalNode()
1881
leaf1.map(None, (b'foo bar',), b'quux')
1883
leaf2.map(None, (b'strange',), b'beast')
1884
node.add_node(b"f", leaf1)
1885
node.add_node(b"s", leaf2)
1886
self.assertEqual([((b'foo bar',), b'quux'), ((b'strange',), b'beast')],
1887
sorted(node.iteritems(None)))
1889
def test_iteritems_two_children_partial(self):
1890
node = InternalNode()
1892
leaf1.map(None, (b'foo bar',), b'quux')
1894
leaf2.map(None, (b'strange',), b'beast')
1895
node.add_node(b"f", leaf1)
1896
# This sets up a path that should not be followed - it will error if
1897
# the code tries to.
1898
node._items[b'f'] = None
1899
node.add_node(b"s", leaf2)
1900
self.assertEqual([((b'strange',), b'beast')],
1901
sorted(node.iteritems(None, [(b'strange',), (b'weird',)])))
1903
def test_iteritems_two_children_with_hash(self):
1904
search_key_func = chk_map.search_key_registry.get(b'hash-255-way')
1905
node = InternalNode(search_key_func=search_key_func)
1906
leaf1 = LeafNode(search_key_func=search_key_func)
1907
leaf1.map(None, StaticTuple(b'foo bar',), b'quux')
1908
leaf2 = LeafNode(search_key_func=search_key_func)
1909
leaf2.map(None, StaticTuple(b'strange',), b'beast')
1910
self.assertEqual(b'\xbeF\x014', search_key_func(
1911
StaticTuple(b'foo bar',)))
1912
self.assertEqual(b'\x85\xfa\xf7K', search_key_func(
1913
StaticTuple(b'strange',)))
1914
node.add_node(b"\xbe", leaf1)
1915
# This sets up a path that should not be followed - it will error if
1916
# the code tries to.
1917
node._items[b'\xbe'] = None
1918
node.add_node(b"\x85", leaf2)
1919
self.assertEqual([((b'strange',), b'beast')],
1920
sorted(node.iteritems(None, [StaticTuple(b'strange',),
1921
StaticTuple(b'weird',)])))
1923
def test_iteritems_partial_empty(self):
1924
node = InternalNode()
1925
self.assertEqual([], sorted(node.iteritems([(b'missing',)])))
1927
def test_map_to_new_child_new(self):
1928
chkmap = self._get_map(
1929
{(b'k1',): b'foo', (b'k2',): b'bar'}, maximum_size=10)
1930
chkmap._ensure_root()
1931
node = chkmap._root_node
1932
# Ensure test validity: nothing paged in below the root.
1934
len([value for value in node._items.values()
1935
if isinstance(value, StaticTuple)]))
1936
# now, mapping to k3 should add a k3 leaf
1937
prefix, nodes = node.map(None, (b'k3',), b'quux')
1938
self.assertEqual(b"k", prefix)
1939
self.assertEqual([(b"", node)], nodes)
1940
# check new child details
1941
child = node._items[b'k3']
1942
self.assertIsInstance(child, LeafNode)
1943
self.assertEqual(1, len(child))
1944
self.assertEqual({(b'k3',): b'quux'}, self.to_dict(child, None))
1945
self.assertEqual(None, child._key)
1946
self.assertEqual(10, child.maximum_size)
1947
self.assertEqual(1, child._key_width)
1948
# Check overall structure:
1949
self.assertEqual(3, len(chkmap))
1950
self.assertEqual({(b'k1',): b'foo', (b'k2',): b'bar', (b'k3',): b'quux'},
1951
self.to_dict(chkmap))
1952
# serialising should only serialise the new data - k3 and the internal
1954
keys = list(node.serialise(chkmap._store))
1955
child_key = child.serialise(chkmap._store)[0]
1956
self.assertEqual([child_key, keys[1]], keys)
1958
def test_map_to_child_child_splits_new(self):
1959
chkmap = self._get_map(
1960
{(b'k1',): b'foo', (b'k22',): b'bar'}, maximum_size=10)
1961
# Check for the canonical root value for this tree:
1962
self.assertEqualDiff("'' InternalNode\n"
1966
" ('k22',) 'bar'\n", chkmap._dump_tree())
1967
# _dump_tree pages everything in, so reload using just the root
1968
chkmap = CHKMap(chkmap._store, chkmap._root_node)
1969
chkmap._ensure_root()
1970
node = chkmap._root_node
1971
# Ensure test validity: nothing paged in below the root.
1973
len([value for value in node._items.values()
1974
if isinstance(value, StaticTuple)]))
1975
# now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
1976
# k23, which for simplicity in the current implementation generates
1977
# a new internal node between node, and k22/k23.
1978
prefix, nodes = node.map(chkmap._store, (b'k23',), b'quux')
1979
self.assertEqual(b"k", prefix)
1980
self.assertEqual([(b"", node)], nodes)
1981
# check new child details
1982
child = node._items[b'k2']
1983
self.assertIsInstance(child, InternalNode)
1984
self.assertEqual(2, len(child))
1985
self.assertEqual({(b'k22',): b'bar', (b'k23',): b'quux'},
1986
self.to_dict(child, None))
1987
self.assertEqual(None, child._key)
1988
self.assertEqual(10, child.maximum_size)
1989
self.assertEqual(1, child._key_width)
1990
self.assertEqual(3, child._node_width)
1991
# Check overall structure:
1992
self.assertEqual(3, len(chkmap))
1993
self.assertEqual({(b'k1',): b'foo', (b'k22',): b'bar', (b'k23',): b'quux'},
1994
self.to_dict(chkmap))
1995
# serialising should only serialise the new data - although k22 hasn't
1996
# changed because its a special corner case (splitting on with only one
1997
# key leaves one node unaltered), in general k22 is serialised, so we
1998
# expect k22, k23, the new internal node, and node, to be serialised.
1999
keys = list(node.serialise(chkmap._store))
2000
child_key = child._key
2001
k22_key = child._items[b'k22']._key
2002
k23_key = child._items[b'k23']._key
2003
self.assertEqual({k22_key, k23_key, child_key, node.key()}, set(keys))
2004
self.assertEqualDiff("'' InternalNode\n"
2007
" 'k2' InternalNode\n"
2011
" ('k23',) 'quux'\n", chkmap._dump_tree())
2013
def test__search_prefix_filter_with_hash(self):
2014
search_key_func = chk_map.search_key_registry.get(b'hash-16-way')
2015
node = InternalNode(search_key_func=search_key_func)
2017
node._node_width = 4
2018
self.assertEqual(b'E8B7BE43\x0071BEEFF9', search_key_func(
2019
StaticTuple(b'a', b'b')))
2020
self.assertEqual(b'E8B7', node._search_prefix_filter(
2021
StaticTuple(b'a', b'b')))
2022
self.assertEqual(b'E8B7', node._search_prefix_filter(
2023
StaticTuple(b'a',)))
2025
def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
2026
chkmap = self._get_map(
2027
{(b'k1',): b'foo', (b'k22',): b'bar', (b'k23',): b'quux'}, maximum_size=10)
2028
# Check we have the expected tree.
2029
self.assertEqualDiff("'' InternalNode\n"
2032
" 'k2' InternalNode\n"
2036
" ('k23',) 'quux'\n", chkmap._dump_tree())
2037
chkmap = CHKMap(chkmap._store, chkmap._root_node)
2038
chkmap._ensure_root()
2039
node = chkmap._root_node
2040
# unmapping k23 should give us a root, with k1 and k22 as direct
2042
result = node.unmap(chkmap._store, (b'k23',))
2043
# check the pointed-at object within node - k2 should now point at the
2044
# k22 leaf (which has been paged in to see if we can collapse the tree)
2045
child = node._items[b'k2']
2046
self.assertIsInstance(child, LeafNode)
2047
self.assertEqual(1, len(child))
2048
self.assertEqual({(b'k22',): b'bar'},
2049
self.to_dict(child, None))
2050
# Check overall structure is instact:
2051
self.assertEqual(2, len(chkmap))
2052
self.assertEqual({(b'k1',): b'foo', (b'k22',): b'bar'},
2053
self.to_dict(chkmap))
2054
# serialising should only serialise the new data - the root node.
2055
keys = list(node.serialise(chkmap._store))
2056
self.assertEqual([keys[-1]], keys)
2057
chkmap = CHKMap(chkmap._store, keys[-1])
2058
self.assertEqualDiff("'' InternalNode\n"
2062
" ('k22',) 'bar'\n", chkmap._dump_tree())
2064
def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
2065
chkmap = self._get_map(
2066
{(b'k1',): b'foo', (b'k22',): b'bar', (b'k23',): b'quux'}, maximum_size=10)
2067
self.assertEqualDiff("'' InternalNode\n"
2070
" 'k2' InternalNode\n"
2074
" ('k23',) 'quux'\n", chkmap._dump_tree())
2075
orig_root = chkmap._root_node
2076
chkmap = CHKMap(chkmap._store, orig_root)
2077
chkmap._ensure_root()
2078
node = chkmap._root_node
2079
k2_ptr = node._items[b'k2']
2080
# unmapping k1 should give us a root, with k22 and k23 as direct
2081
# children, and should not have needed to page in the subtree.
2082
result = node.unmap(chkmap._store, (b'k1',))
2083
self.assertEqual(k2_ptr, result)
2084
chkmap = CHKMap(chkmap._store, orig_root)
2085
# Unmapping at the CHKMap level should switch to the new root
2086
chkmap.unmap((b'k1',))
2087
self.assertEqual(k2_ptr, chkmap._root_node)
2088
self.assertEqualDiff("'' InternalNode\n"
2092
" ('k23',) 'quux'\n", chkmap._dump_tree())
2096
# map -> fits - done
2097
# map -> doesn't fit - shrink from left till fits
2098
# key data to return: the common prefix, new nodes.
2100
# unmap -> how to tell if siblings can be combined.
2101
# combing leaf nodes means expanding the prefix to the left; so gather the size of
2102
# all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
2103
# is an internal node, we know that that is a dense subtree - can't combine.
2104
# otherwise as soon as the sum of serialised values exceeds the split threshold
2105
# we know we can't combine - stop.
2106
# unmap -> key return data - space in node, common prefix length? and key count
2108
# variable length prefixes? -> later start with fixed width to get something going
2109
# map -> fits - update pointer to leaf
2110
# return [prefix and node] - seems sound.
2111
# map -> doesn't fit - find unique prefix and shift right
2112
# create internal nodes for all the partitions, return list of unique
2113
# prefixes and nodes.
2114
# map -> new prefix - create a leaf
2115
# unmap -> if child key count 0, remove
2116
# unmap -> return space in node, common prefix length? (why?), key count
2118
# map, if 1 node returned, use it, otherwise make an internal and populate.
2119
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
2121
# map inits as empty leafnode.
2127
# AA, AB, AC, AD, BA
2128
# packed internal node - ideal:
2129
# AA, AB, AC, AD, BA
2130
# single byte fanout - A,B, AA,AB,AC,AD, BA
2133
# AB - split, but we want to end up with AB, BA, in one node, with
2137
class TestCHKMapDifference(TestCaseWithExampleMaps):
2139
def get_difference(self, new_roots, old_roots,
2140
search_key_func=None):
2141
if search_key_func is None:
2142
search_key_func = chk_map._search_key_plain
2143
return chk_map.CHKMapDifference(self.get_chk_bytes(),
2144
new_roots, old_roots, search_key_func)
2146
def test__init__(self):
2147
c_map = self.make_root_only_map()
2149
c_map.map((b'aaa',), b'new aaa content')
2150
key2 = c_map._save()
2151
diff = self.get_difference([key2], [key1])
2152
self.assertEqual({key1}, diff._all_old_chks)
2153
self.assertEqual([], diff._old_queue)
2154
self.assertEqual([], diff._new_queue)
2156
def help__read_all_roots(self, search_key_func):
2157
c_map = self.make_root_only_map(search_key_func=search_key_func)
2159
c_map.map((b'aaa',), b'new aaa content')
2160
key2 = c_map._save()
2161
diff = self.get_difference([key2], [key1], search_key_func)
2162
root_results = [record.key for record in diff._read_all_roots()]
2163
self.assertEqual([key2], root_results)
2164
# We should have queued up only items that aren't in the old
2166
self.assertEqual([((b'aaa',), b'new aaa content')],
2167
diff._new_item_queue)
2168
self.assertEqual([], diff._new_queue)
2169
# And there are no old references, so that queue should be
2171
self.assertEqual([], diff._old_queue)
2173
def test__read_all_roots_plain(self):
2174
self.help__read_all_roots(search_key_func=chk_map._search_key_plain)
2176
def test__read_all_roots_16(self):
2177
self.help__read_all_roots(search_key_func=chk_map._search_key_16)
2179
def test__read_all_roots_skips_known_old(self):
2180
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2182
c_map2 = self.make_root_only_map(chk_map._search_key_plain)
2184
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2185
root_results = [record.key for record in diff._read_all_roots()]
2186
# We should have no results. key2 is completely contained within key1,
2187
# and we should have seen that in the first pass
2188
self.assertEqual([], root_results)
2190
def test__read_all_roots_prepares_queues(self):
2191
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2193
c_map._dump_tree() # load everything
2194
key1_a = c_map._root_node._items[b'a'].key()
2195
c_map.map((b'abb',), b'new abb content')
2196
key2 = c_map._save()
2197
key2_a = c_map._root_node._items[b'a'].key()
2198
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2199
root_results = [record.key for record in diff._read_all_roots()]
2200
self.assertEqual([key2], root_results)
2201
# At this point, we should have queued up only the 'a' Leaf on both
2202
# sides, both 'c' and 'd' are known to not have changed on both sides
2203
self.assertEqual([key2_a], diff._new_queue)
2204
self.assertEqual([], diff._new_item_queue)
2205
self.assertEqual([key1_a], diff._old_queue)
2207
def test__read_all_roots_multi_new_prepares_queues(self):
2208
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2210
c_map._dump_tree() # load everything
2211
key1_a = c_map._root_node._items[b'a'].key()
2212
key1_c = c_map._root_node._items[b'c'].key()
2213
c_map.map((b'abb',), b'new abb content')
2214
key2 = c_map._save()
2215
key2_a = c_map._root_node._items[b'a'].key()
2216
key2_c = c_map._root_node._items[b'c'].key()
2217
c_map = chk_map.CHKMap(self.get_chk_bytes(), key1,
2218
chk_map._search_key_plain)
2219
c_map.map((b'ccc',), b'new ccc content')
2220
key3 = c_map._save()
2221
key3_a = c_map._root_node._items[b'a'].key()
2222
key3_c = c_map._root_node._items[b'c'].key()
2223
diff = self.get_difference([key2, key3], [key1],
2224
chk_map._search_key_plain)
2225
root_results = [record.key for record in diff._read_all_roots()]
2226
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2227
# We should have queued up key2_a, and key3_c, but not key2_c or key3_c
2228
self.assertEqual({key2_a, key3_c}, set(diff._new_queue))
2229
self.assertEqual([], diff._new_item_queue)
2230
# And we should have queued up both a and c for the old set
2231
self.assertEqual({key1_a, key1_c}, set(diff._old_queue))
2233
def test__read_all_roots_different_depths(self):
2234
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2235
c_map._dump_tree() # load everything
2237
key1_a = c_map._root_node._items[b'a'].key()
2238
key1_c = c_map._root_node._items[b'c'].key()
2239
key1_d = c_map._root_node._items[b'd'].key()
2241
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2244
key2_aa = c_map2._root_node._items[b'aa'].key()
2245
key2_ad = c_map2._root_node._items[b'ad'].key()
2247
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2248
root_results = [record.key for record in diff._read_all_roots()]
2249
self.assertEqual([key2], root_results)
2250
# Only the 'a' subset should be queued up, since 'c' and 'd' cannot be
2252
self.assertEqual([key1_a], diff._old_queue)
2253
self.assertEqual({key2_aa, key2_ad}, set(diff._new_queue))
2254
self.assertEqual([], diff._new_item_queue)
2256
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2257
root_results = [record.key for record in diff._read_all_roots()]
2258
self.assertEqual([key1], root_results)
2260
self.assertEqual({key2_aa, key2_ad}, set(diff._old_queue))
2261
self.assertEqual({key1_a, key1_c, key1_d}, set(diff._new_queue))
2262
self.assertEqual([], diff._new_item_queue)
2264
def test__read_all_roots_different_depths_16(self):
2265
c_map = self.make_two_deep_map(chk_map._search_key_16)
2266
c_map._dump_tree() # load everything
2268
key1_2 = c_map._root_node._items[b'2'].key()
2269
key1_4 = c_map._root_node._items[b'4'].key()
2270
key1_C = c_map._root_node._items[b'C'].key()
2271
key1_F = c_map._root_node._items[b'F'].key()
2273
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_16)
2276
key2_F0 = c_map2._root_node._items[b'F0'].key()
2277
key2_F3 = c_map2._root_node._items[b'F3'].key()
2278
key2_F4 = c_map2._root_node._items[b'F4'].key()
2279
key2_FD = c_map2._root_node._items[b'FD'].key()
2281
diff = self.get_difference([key2], [key1], chk_map._search_key_16)
2282
root_results = [record.key for record in diff._read_all_roots()]
2283
self.assertEqual([key2], root_results)
2284
# Only the subset of keys that may be present should be queued up.
2285
self.assertEqual([key1_F], diff._old_queue)
2286
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2287
sorted(diff._new_queue))
2288
self.assertEqual([], diff._new_item_queue)
2290
diff = self.get_difference([key1], [key2], chk_map._search_key_16)
2291
root_results = [record.key for record in diff._read_all_roots()]
2292
self.assertEqual([key1], root_results)
2294
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2295
sorted(diff._old_queue))
2296
self.assertEqual(sorted([key1_2, key1_4, key1_C, key1_F]),
2297
sorted(diff._new_queue))
2298
self.assertEqual([], diff._new_item_queue)
2300
def test__read_all_roots_mixed_depth(self):
2301
c_map = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2302
c_map._dump_tree() # load everything
2304
key1_aa = c_map._root_node._items[b'aa'].key()
2305
key1_ad = c_map._root_node._items[b'ad'].key()
2307
c_map2 = self.make_one_deep_one_prefix_map(chk_map._search_key_plain)
2310
key2_a = c_map2._root_node._items[b'a'].key()
2311
key2_b = c_map2._root_node._items[b'b'].key()
2313
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2314
root_results = [record.key for record in diff._read_all_roots()]
2315
self.assertEqual([key2], root_results)
2316
# 'ad' matches exactly 'a' on the other side, so it should be removed,
2317
# and neither side should have it queued for walking
2318
self.assertEqual([], diff._old_queue)
2319
self.assertEqual([key2_b], diff._new_queue)
2320
self.assertEqual([], diff._new_item_queue)
2322
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2323
root_results = [record.key for record in diff._read_all_roots()]
2324
self.assertEqual([key1], root_results)
2325
# Note: This is technically not the 'true minimal' set that we could
2326
# use The reason is that 'a' was matched exactly to 'ad' (by sha
2327
# sum). However, the code gets complicated in the case of more
2328
# than one interesting key, so for now, we live with this
2329
# Consider revising, though benchmarking showing it to be a
2330
# real-world issue should be done
2331
self.assertEqual([key2_a], diff._old_queue)
2332
# self.assertEqual([], diff._old_queue)
2333
self.assertEqual([key1_aa], diff._new_queue)
2334
self.assertEqual([], diff._new_item_queue)
2336
def test__read_all_roots_yields_extra_deep_records(self):
2337
# This is slightly controversial, as we will yield a chk page that we
2338
# might later on find out could be filtered out. (If a root node is
2339
# referenced deeper in the old set.)
2340
# However, even with stacking, we always have all chk pages that we
2341
# will need. So as long as we filter out the referenced keys, we'll
2342
# never run into problems.
2343
# This allows us to yield a root node record immediately, without any
2345
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2346
c_map._dump_tree() # load all keys
2348
key1_a = c_map._root_node._items[b'a'].key()
2349
c_map2 = self.get_map({
2350
(b'acc',): b'initial acc content',
2351
(b'ace',): b'initial ace content',
2352
}, maximum_size=100)
2353
self.assertEqualDiff(
2355
" ('acc',) 'initial acc content'\n"
2356
" ('ace',) 'initial ace content'\n",
2357
c_map2._dump_tree())
2359
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2360
root_results = [record.key for record in diff._read_all_roots()]
2361
self.assertEqual([key2], root_results)
2362
# However, even though we have yielded the root node to be fetched,
2363
# we should have enqued all of the chk pages to be walked, so that we
2364
# can find the keys if they are present
2365
self.assertEqual([key1_a], diff._old_queue)
2366
self.assertEqual({((b'acc',), b'initial acc content'),
2367
((b'ace',), b'initial ace content'),
2368
}, set(diff._new_item_queue))
2370
def test__read_all_roots_multiple_targets(self):
2371
c_map = self.make_root_only_map()
2373
c_map = self.make_one_deep_map()
2376
key2_c = c_map._root_node._items[b'c'].key()
2377
key2_d = c_map._root_node._items[b'd'].key()
2378
c_map.map((b'ccc',), b'new ccc value')
2379
key3 = c_map._save()
2380
key3_c = c_map._root_node._items[b'c'].key()
2381
diff = self.get_difference([key2, key3], [key1],
2382
chk_map._search_key_plain)
2383
root_results = [record.key for record in diff._read_all_roots()]
2384
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2385
self.assertEqual([], diff._old_queue)
2386
# the key 'd' is interesting from key2 and key3, but should only be
2387
# entered into the queue 1 time
2388
self.assertEqual(sorted([key2_c, key3_c, key2_d]),
2389
sorted(diff._new_queue))
2390
self.assertEqual([], diff._new_item_queue)
2392
def test__read_all_roots_no_old(self):
2393
# This is the 'initial branch' case. With nothing in the old
2394
# set, we can just queue up all root nodes into interesting queue, and
2395
# then have them fast-path flushed via _flush_new_queue
2396
c_map = self.make_two_deep_map()
2398
diff = self.get_difference([key1], [], chk_map._search_key_plain)
2399
root_results = [record.key for record in diff._read_all_roots()]
2400
self.assertEqual([], root_results)
2401
self.assertEqual([], diff._old_queue)
2402
self.assertEqual([key1], diff._new_queue)
2403
self.assertEqual([], diff._new_item_queue)
2405
c_map2 = self.make_one_deep_map()
2407
diff = self.get_difference([key1, key2], [], chk_map._search_key_plain)
2408
root_results = [record.key for record in diff._read_all_roots()]
2409
self.assertEqual([], root_results)
2410
self.assertEqual([], diff._old_queue)
2411
self.assertEqual(sorted([key1, key2]), sorted(diff._new_queue))
2412
self.assertEqual([], diff._new_item_queue)
2414
def test__read_all_roots_no_old_16(self):
2415
c_map = self.make_two_deep_map(chk_map._search_key_16)
2417
diff = self.get_difference([key1], [], chk_map._search_key_16)
2418
root_results = [record.key for record in diff._read_all_roots()]
2419
self.assertEqual([], root_results)
2420
self.assertEqual([], diff._old_queue)
2421
self.assertEqual([key1], diff._new_queue)
2422
self.assertEqual([], diff._new_item_queue)
2424
c_map2 = self.make_one_deep_map(chk_map._search_key_16)
2426
diff = self.get_difference([key1, key2], [],
2427
chk_map._search_key_16)
2428
root_results = [record.key for record in diff._read_all_roots()]
2429
self.assertEqual([], root_results)
2430
self.assertEqual([], diff._old_queue)
2431
self.assertEqual(sorted([key1, key2]),
2432
sorted(diff._new_queue))
2433
self.assertEqual([], diff._new_item_queue)
2435
def test__read_all_roots_multiple_old(self):
2436
c_map = self.make_two_deep_map()
2438
c_map._dump_tree() # load everything
2439
key1_a = c_map._root_node._items[b'a'].key()
2440
c_map.map((b'ccc',), b'new ccc value')
2441
key2 = c_map._save()
2442
key2_a = c_map._root_node._items[b'a'].key()
2443
c_map.map((b'add',), b'new add value')
2444
key3 = c_map._save()
2445
key3_a = c_map._root_node._items[b'a'].key()
2446
diff = self.get_difference([key3], [key1, key2],
2447
chk_map._search_key_plain)
2448
root_results = [record.key for record in diff._read_all_roots()]
2449
self.assertEqual([key3], root_results)
2450
# the 'a' keys should not be queued up 2 times, since they are
2452
self.assertEqual([key1_a], diff._old_queue)
2453
self.assertEqual([key3_a], diff._new_queue)
2454
self.assertEqual([], diff._new_item_queue)
2456
def test__process_next_old_batched_no_dupes(self):
2457
c_map = self.make_two_deep_map()
2459
c_map._dump_tree() # load everything
2460
key1_a = c_map._root_node._items[b'a'].key()
2461
key1_aa = c_map._root_node._items[b'a']._items[b'aa'].key()
2462
key1_ab = c_map._root_node._items[b'a']._items[b'ab'].key()
2463
key1_ac = c_map._root_node._items[b'a']._items[b'ac'].key()
2464
key1_ad = c_map._root_node._items[b'a']._items[b'ad'].key()
2465
c_map.map((b'aaa',), b'new aaa value')
2466
key2 = c_map._save()
2467
key2_a = c_map._root_node._items[b'a'].key()
2468
key2_aa = c_map._root_node._items[b'a']._items[b'aa'].key()
2469
c_map.map((b'acc',), b'new acc content')
2470
key3 = c_map._save()
2471
key3_a = c_map._root_node._items[b'a'].key()
2472
key3_ac = c_map._root_node._items[b'a']._items[b'ac'].key()
2473
diff = self.get_difference([key3], [key1, key2],
2474
chk_map._search_key_plain)
2475
root_results = [record.key for record in diff._read_all_roots()]
2476
self.assertEqual([key3], root_results)
2477
self.assertEqual(sorted([key1_a, key2_a]),
2478
sorted(diff._old_queue))
2479
self.assertEqual([key3_a], diff._new_queue)
2480
self.assertEqual([], diff._new_item_queue)
2481
diff._process_next_old()
2482
# All of the old records should be brought in and queued up,
2483
# but we should not have any duplicates
2484
self.assertEqual(sorted([key1_aa, key1_ab, key1_ac, key1_ad, key2_aa]),
2485
sorted(diff._old_queue))
2488
class TestIterInterestingNodes(TestCaseWithExampleMaps):
2490
def get_map_key(self, a_dict, maximum_size=10):
2491
c_map = self.get_map(a_dict, maximum_size=maximum_size)
2494
def assertIterInteresting(self, records, items, interesting_keys,
2496
"""Check the result of iter_interesting_nodes.
2498
Note that we no longer care how many steps are taken, etc, just that
2499
the right contents are returned.
2501
:param records: A list of record keys that should be yielded
2502
:param items: A list of items (key,value) that should be yielded.
2504
store = self.get_chk_bytes()
2505
store._search_key_func = chk_map._search_key_plain
2506
iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
2510
for record, new_items in iter_nodes:
2511
if record is not None:
2512
record_keys.append(record.key)
2514
all_items.extend(new_items)
2515
self.assertEqual(sorted(records), sorted(record_keys))
2516
self.assertEqual(sorted(items), sorted(all_items))
2518
def test_empty_to_one_keys(self):
2519
target = self.get_map_key({(b'a',): b'content'})
2520
self.assertIterInteresting([target],
2521
[((b'a',), b'content')],
2524
def test_none_to_one_key(self):
2525
basis = self.get_map_key({})
2526
target = self.get_map_key({(b'a',): b'content'})
2527
self.assertIterInteresting([target],
2528
[((b'a',), b'content')],
2531
def test_one_to_none_key(self):
2532
basis = self.get_map_key({(b'a',): b'content'})
2533
target = self.get_map_key({})
2534
self.assertIterInteresting([target],
2538
def test_common_pages(self):
2539
basis = self.get_map_key({(b'a',): b'content',
2540
(b'b',): b'content',
2541
(b'c',): b'content',
2543
target = self.get_map_key({(b'a',): b'content',
2544
(b'b',): b'other content',
2545
(b'c',): b'content',
2547
target_map = CHKMap(self.get_chk_bytes(), target)
2548
self.assertEqualDiff(
2551
" ('a',) 'content'\n"
2553
" ('b',) 'other content'\n"
2555
" ('c',) 'content'\n",
2556
target_map._dump_tree())
2557
b_key = target_map._root_node._items[b'b'].key()
2558
# This should return the root node, and the node for the 'b' key
2559
self.assertIterInteresting([target, b_key],
2560
[((b'b',), b'other content')],
2563
def test_common_sub_page(self):
2564
basis = self.get_map_key({(b'aaa',): b'common',
2567
target = self.get_map_key({(b'aaa',): b'common',
2571
target_map = CHKMap(self.get_chk_bytes(), target)
2572
self.assertEqualDiff(
2574
" 'a' InternalNode\n"
2576
" ('aaa',) 'common'\n"
2580
" ('c',) 'common'\n",
2581
target_map._dump_tree())
2582
# The key for the internal aa node
2583
a_key = target_map._root_node._items[b'a'].key()
2584
# The key for the leaf aab node
2585
# aaa_key = target_map._root_node._items['a']._items['aaa'].key()
2586
aab_key = target_map._root_node._items[b'a']._items[b'aab'].key()
2587
self.assertIterInteresting([target, a_key, aab_key],
2588
[((b'aab',), b'new')],
2591
def test_common_leaf(self):
2592
basis = self.get_map_key({})
2593
target1 = self.get_map_key({(b'aaa',): b'common'})
2594
target2 = self.get_map_key({(b'aaa',): b'common',
2597
target3 = self.get_map_key({(b'aaa',): b'common',
2598
(b'aac',): b'other',
2601
# The LeafNode containing 'aaa': 'common' occurs at 3 different levels.
2602
# Once as a root node, once as a second layer, and once as a third
2603
# layer. It should only be returned one time regardless
2604
target1_map = CHKMap(self.get_chk_bytes(), target1)
2605
self.assertEqualDiff(
2607
" ('aaa',) 'common'\n",
2608
target1_map._dump_tree())
2609
target2_map = CHKMap(self.get_chk_bytes(), target2)
2610
self.assertEqualDiff(
2613
" ('aaa',) 'common'\n"
2615
" ('bbb',) 'new'\n",
2616
target2_map._dump_tree())
2617
target3_map = CHKMap(self.get_chk_bytes(), target3)
2618
self.assertEqualDiff(
2620
" 'a' InternalNode\n"
2622
" ('aaa',) 'common'\n"
2624
" ('aac',) 'other'\n"
2626
" ('bbb',) 'new'\n",
2627
target3_map._dump_tree())
2628
aaa_key = target1_map._root_node.key()
2629
b_key = target2_map._root_node._items[b'b'].key()
2630
a_key = target3_map._root_node._items[b'a'].key()
2631
aac_key = target3_map._root_node._items[b'a']._items[b'aac'].key()
2632
self.assertIterInteresting(
2633
[target1, target2, target3, a_key, aac_key, b_key],
2634
[((b'aaa',), b'common'), ((b'bbb',), b'new'), ((b'aac',), b'other')],
2635
[target1, target2, target3], [basis])
2637
self.assertIterInteresting(
2638
[target2, target3, a_key, aac_key, b_key],
2639
[((b'bbb',), b'new'), ((b'aac',), b'other')],
2640
[target2, target3], [target1])
2642
# Technically, target1 could be filtered out, but since it is a root
2643
# node, we yield it immediately, rather than waiting to find out much
2645
self.assertIterInteresting(
2648
[target1], [target3])
2650
def test_multiple_maps(self):
2651
basis1 = self.get_map_key({(b'aaa',): b'common',
2652
(b'aab',): b'basis1',
2654
basis2 = self.get_map_key({(b'bbb',): b'common',
2655
(b'bbc',): b'basis2',
2657
target1 = self.get_map_key({(b'aaa',): b'common',
2658
(b'aac',): b'target1',
2659
(b'bbb',): b'common',
2661
target2 = self.get_map_key({(b'aaa',): b'common',
2662
(b'bba',): b'target2',
2663
(b'bbb',): b'common',
2665
target1_map = CHKMap(self.get_chk_bytes(), target1)
2666
self.assertEqualDiff(
2668
" 'a' InternalNode\n"
2670
" ('aaa',) 'common'\n"
2672
" ('aac',) 'target1'\n"
2674
" ('bbb',) 'common'\n",
2675
target1_map._dump_tree())
2676
# The key for the target1 internal a node
2677
a_key = target1_map._root_node._items[b'a'].key()
2678
# The key for the leaf aac node
2679
aac_key = target1_map._root_node._items[b'a']._items[b'aac'].key()
2681
target2_map = CHKMap(self.get_chk_bytes(), target2)
2682
self.assertEqualDiff(
2685
" ('aaa',) 'common'\n"
2686
" 'b' InternalNode\n"
2688
" ('bba',) 'target2'\n"
2690
" ('bbb',) 'common'\n",
2691
target2_map._dump_tree())
2692
# The key for the target2 internal bb node
2693
b_key = target2_map._root_node._items[b'b'].key()
2694
# The key for the leaf bba node
2695
bba_key = target2_map._root_node._items[b'b']._items[b'bba'].key()
2696
self.assertIterInteresting(
2697
[target1, target2, a_key, aac_key, b_key, bba_key],
2698
[((b'aac',), b'target1'), ((b'bba',), b'target2')],
2699
[target1, target2], [basis1, basis2])
2701
def test_multiple_maps_overlapping_common_new(self):
2702
# Test that when a node found through the interesting_keys iteration
2703
# for *some roots* and also via the old keys iteration, that
2704
# it is still scanned for old refs and items, because its
2705
# not truely new. This requires 2 levels of InternalNodes to expose,
2706
# because of the way the bootstrap in _find_children_info works.
2707
# This suggests that the code is probably amenable to/benefit from
2709
# How does this test work?
2710
# 1) We need a second level InternalNode present in a basis tree.
2711
# 2) We need a left side new tree that uses that InternalNode
2712
# 3) We need a right side new tree that does not use that InternalNode
2713
# at all but that has an unchanged *value* that was reachable inside
2715
basis = self.get_map_key({
2716
# InternalNode, unchanged in left:
2718
(b'abb',): b'right',
2719
# Forces an internalNode at 'a'
2720
(b'ccc',): b'common',
2722
left = self.get_map_key({
2723
# All of basis unchanged
2725
(b'abb',): b'right',
2726
(b'ccc',): b'common',
2727
# And a new top level node so the root key is different
2728
(b'ddd',): b'change',
2730
right = self.get_map_key({
2731
# A value that is unchanged from basis and thus should be filtered
2735
basis_map = CHKMap(self.get_chk_bytes(), basis)
2736
self.assertEqualDiff(
2738
" 'a' InternalNode\n"
2740
" ('aaa',) 'left'\n"
2742
" ('abb',) 'right'\n"
2744
" ('ccc',) 'common'\n",
2745
basis_map._dump_tree())
2746
# Get left expected data
2747
left_map = CHKMap(self.get_chk_bytes(), left)
2748
self.assertEqualDiff(
2750
" 'a' InternalNode\n"
2752
" ('aaa',) 'left'\n"
2754
" ('abb',) 'right'\n"
2756
" ('ccc',) 'common'\n"
2758
" ('ddd',) 'change'\n",
2759
left_map._dump_tree())
2760
# Keys from left side target
2761
l_d_key = left_map._root_node._items[b'd'].key()
2762
# Get right expected data
2763
right_map = CHKMap(self.get_chk_bytes(), right)
2764
self.assertEqualDiff(
2766
" ('abb',) 'right'\n",
2767
right_map._dump_tree())
2768
# Keys from the right side target - none, the root is enough.
2770
self.assertIterInteresting(
2771
[right, left, l_d_key],
2772
[((b'ddd',), b'change')],
2773
[left, right], [basis])
2775
def test_multiple_maps_similar(self):
2776
# We want to have a depth=2 tree, with multiple entries in each leaf
2778
basis = self.get_map_key({
2779
(b'aaa',): b'unchanged',
2780
(b'abb',): b'will change left',
2781
(b'caa',): b'unchanged',
2782
(b'cbb',): b'will change right',
2784
left = self.get_map_key({
2785
(b'aaa',): b'unchanged',
2786
(b'abb',): b'changed left',
2787
(b'caa',): b'unchanged',
2788
(b'cbb',): b'will change right',
2790
right = self.get_map_key({
2791
(b'aaa',): b'unchanged',
2792
(b'abb',): b'will change left',
2793
(b'caa',): b'unchanged',
2794
(b'cbb',): b'changed right',
2796
basis_map = CHKMap(self.get_chk_bytes(), basis)
2797
self.assertEqualDiff(
2800
" ('aaa',) 'unchanged'\n"
2801
" ('abb',) 'will change left'\n"
2803
" ('caa',) 'unchanged'\n"
2804
" ('cbb',) 'will change right'\n",
2805
basis_map._dump_tree())
2806
# Get left expected data
2807
left_map = CHKMap(self.get_chk_bytes(), left)
2808
self.assertEqualDiff(
2811
" ('aaa',) 'unchanged'\n"
2812
" ('abb',) 'changed left'\n"
2814
" ('caa',) 'unchanged'\n"
2815
" ('cbb',) 'will change right'\n",
2816
left_map._dump_tree())
2817
# Keys from left side target
2818
l_a_key = left_map._root_node._items[b'a'].key()
2819
l_c_key = left_map._root_node._items[b'c'].key()
2820
# Get right expected data
2821
right_map = CHKMap(self.get_chk_bytes(), right)
2822
self.assertEqualDiff(
2825
" ('aaa',) 'unchanged'\n"
2826
" ('abb',) 'will change left'\n"
2828
" ('caa',) 'unchanged'\n"
2829
" ('cbb',) 'changed right'\n",
2830
right_map._dump_tree())
2831
r_a_key = right_map._root_node._items[b'a'].key()
2832
r_c_key = right_map._root_node._items[b'c'].key()
2833
self.assertIterInteresting(
2834
[right, left, l_a_key, r_c_key],
2835
[((b'abb',), b'changed left'), ((b'cbb',), b'changed right')],
2836
[left, right], [basis])