1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for maps built on a CHK versionedfiles facility."""
19
from itertools import izip
27
from bzrlib.chk_map import (
35
class TestNode(tests.TestCase):
37
def assertCommonPrefix(self, expected_common, prefix, key):
38
common = Node.common_prefix(prefix, key)
39
self.assertTrue(len(common) <= len(prefix))
40
self.assertTrue(len(common) <= len(key))
41
self.assertStartsWith(prefix, common)
42
self.assertStartsWith(key, common)
43
self.assertEquals(expected_common, common)
45
def test_common_prefix(self):
46
self.assertCommonPrefix('beg', 'beg', 'begin')
48
def test_no_common_prefix(self):
49
self.assertCommonPrefix('', 'begin', 'end')
52
self.assertCommonPrefix('begin', 'begin', 'begin')
54
def test_not_a_prefix(self):
55
self.assertCommonPrefix('b', 'begin', 'b')
58
self.assertCommonPrefix('', '', 'end')
59
self.assertCommonPrefix('', 'begin', '')
60
self.assertCommonPrefix('', '', '')
63
class TestCaseWithStore(tests.TestCaseWithMemoryTransport):
65
def get_chk_bytes(self):
66
# The easiest way to get a CHK store is a development6 repository and
67
# then work with the chk_bytes attribute directly.
68
factory = groupcompress.make_pack_factory(False, False, 1)
69
self.chk_bytes = factory(self.get_transport())
72
def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
73
search_key_func=None):
75
chk_bytes = self.get_chk_bytes()
76
root_key = CHKMap.from_dict(chk_bytes, a_dict,
77
maximum_size=maximum_size, key_width=key_width,
78
search_key_func=search_key_func)
79
root_key2 = CHKMap._create_via_map(chk_bytes, a_dict,
80
maximum_size=maximum_size, key_width=key_width,
81
search_key_func=search_key_func)
82
self.assertEqual(root_key, root_key2, "CHKMap.from_dict() did not"
83
" match CHKMap._create_via_map")
84
chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func)
87
def read_bytes(self, chk_bytes, key):
88
stream = chk_bytes.get_record_stream([key], 'unordered', True)
89
record = stream.next()
90
if record.storage_kind == 'absent':
91
self.fail('Store does not contain the key %s' % (key,))
92
return record.get_bytes_as("fulltext")
94
def to_dict(self, node, *args):
95
return dict(node.iteritems(*args))
98
class TestCaseWithExampleMaps(TestCaseWithStore):
100
def get_chk_bytes(self):
101
if getattr(self, '_chk_bytes', None) is None:
102
self._chk_bytes = super(TestCaseWithExampleMaps,
103
self).get_chk_bytes()
104
return self._chk_bytes
106
def get_map(self, a_dict, maximum_size=100, search_key_func=None):
107
c_map = self._get_map(a_dict, maximum_size=maximum_size,
108
chk_bytes=self.get_chk_bytes(),
109
search_key_func=search_key_func)
112
def make_root_only_map(self, search_key_func=None):
113
return self.get_map({
114
('aaa',): 'initial aaa content',
115
('abb',): 'initial abb content',
116
}, search_key_func=search_key_func)
118
def make_root_only_aaa_ddd_map(self, search_key_func=None):
119
return self.get_map({
120
('aaa',): 'initial aaa content',
121
('ddd',): 'initial ddd content',
122
}, search_key_func=search_key_func)
124
def make_one_deep_map(self, search_key_func=None):
125
# Same as root_only_map, except it forces an InternalNode at the root
126
return self.get_map({
127
('aaa',): 'initial aaa content',
128
('abb',): 'initial abb content',
129
('ccc',): 'initial ccc content',
130
('ddd',): 'initial ddd content',
131
}, search_key_func=search_key_func)
133
def make_two_deep_map(self, search_key_func=None):
134
# Carefully chosen so that it creates a 2-deep map for both
135
# _search_key_plain and for _search_key_16
136
# Also so that things line up with make_one_deep_two_prefix_map
137
return self.get_map({
138
('aaa',): 'initial aaa content',
139
('abb',): 'initial abb content',
140
('acc',): 'initial acc content',
141
('ace',): 'initial ace content',
142
('add',): 'initial add content',
143
('adh',): 'initial adh content',
144
('adl',): 'initial adl content',
145
('ccc',): 'initial ccc content',
146
('ddd',): 'initial ddd content',
147
}, search_key_func=search_key_func)
149
def make_one_deep_two_prefix_map(self, search_key_func=None):
150
"""Create a map with one internal node, but references are extra long.
152
Otherwise has similar content to make_two_deep_map.
154
return self.get_map({
155
('aaa',): 'initial aaa content',
156
('add',): 'initial add content',
157
('adh',): 'initial adh content',
158
('adl',): 'initial adl content',
159
}, search_key_func=search_key_func)
161
def make_one_deep_one_prefix_map(self, search_key_func=None):
162
"""Create a map with one internal node, but references are extra long.
164
Similar to make_one_deep_two_prefix_map, except the split is at the
165
first char, rather than the second.
167
return self.get_map({
168
('add',): 'initial add content',
169
('adh',): 'initial adh content',
170
('adl',): 'initial adl content',
171
('bbb',): 'initial bbb content',
172
}, search_key_func=search_key_func)
175
class TestTestCaseWithExampleMaps(TestCaseWithExampleMaps):
176
"""Actual tests for the provided examples."""
178
def test_root_only_map_plain(self):
179
c_map = self.make_root_only_map()
180
self.assertEqualDiff(
182
" ('aaa',) 'initial aaa content'\n"
183
" ('abb',) 'initial abb content'\n",
186
def test_root_only_map_16(self):
187
c_map = self.make_root_only_map(search_key_func=chk_map._search_key_16)
188
self.assertEqualDiff(
190
" ('aaa',) 'initial aaa content'\n"
191
" ('abb',) 'initial abb content'\n",
194
def test_one_deep_map_plain(self):
195
c_map = self.make_one_deep_map()
196
self.assertEqualDiff(
199
" ('aaa',) 'initial aaa content'\n"
200
" ('abb',) 'initial abb content'\n"
202
" ('ccc',) 'initial ccc content'\n"
204
" ('ddd',) 'initial ddd content'\n",
207
def test_one_deep_map_16(self):
208
c_map = self.make_one_deep_map(search_key_func=chk_map._search_key_16)
209
self.assertEqualDiff(
212
" ('ccc',) 'initial ccc content'\n"
214
" ('abb',) 'initial abb content'\n"
216
" ('aaa',) 'initial aaa content'\n"
217
" ('ddd',) 'initial ddd content'\n",
220
def test_root_only_aaa_ddd_plain(self):
221
c_map = self.make_root_only_aaa_ddd_map()
222
self.assertEqualDiff(
224
" ('aaa',) 'initial aaa content'\n"
225
" ('ddd',) 'initial ddd content'\n",
228
def test_one_deep_map_16(self):
229
c_map = self.make_root_only_aaa_ddd_map(
230
search_key_func=chk_map._search_key_16)
231
# We use 'aaa' and 'ddd' because they happen to map to 'F' when using
233
self.assertEqualDiff(
235
" ('aaa',) 'initial aaa content'\n"
236
" ('ddd',) 'initial ddd content'\n",
239
def test_two_deep_map_plain(self):
240
c_map = self.make_two_deep_map()
241
self.assertEqualDiff(
243
" 'a' InternalNode\n"
245
" ('aaa',) 'initial aaa content'\n"
247
" ('abb',) 'initial abb content'\n"
249
" ('acc',) 'initial acc content'\n"
250
" ('ace',) 'initial ace content'\n"
252
" ('add',) 'initial add content'\n"
253
" ('adh',) 'initial adh content'\n"
254
" ('adl',) 'initial adl content'\n"
256
" ('ccc',) 'initial ccc content'\n"
258
" ('ddd',) 'initial ddd content'\n",
261
def test_two_deep_map_16(self):
262
c_map = self.make_two_deep_map(search_key_func=chk_map._search_key_16)
263
self.assertEqualDiff(
266
" ('acc',) 'initial acc content'\n"
267
" ('ccc',) 'initial ccc content'\n"
269
" ('abb',) 'initial abb content'\n"
271
" ('ace',) 'initial ace content'\n"
272
" 'F' InternalNode\n"
274
" ('aaa',) 'initial aaa content'\n"
276
" ('adl',) 'initial adl content'\n"
278
" ('adh',) 'initial adh content'\n"
280
" ('ddd',) 'initial ddd content'\n"
282
" ('add',) 'initial add content'\n",
285
def test_one_deep_two_prefix_map_plain(self):
286
c_map = self.make_one_deep_two_prefix_map()
287
self.assertEqualDiff(
290
" ('aaa',) 'initial aaa content'\n"
292
" ('add',) 'initial add content'\n"
293
" ('adh',) 'initial adh content'\n"
294
" ('adl',) 'initial adl content'\n",
297
def test_one_deep_two_prefix_map_16(self):
298
c_map = self.make_one_deep_two_prefix_map(
299
search_key_func=chk_map._search_key_16)
300
self.assertEqualDiff(
303
" ('aaa',) 'initial aaa content'\n"
305
" ('adl',) 'initial adl content'\n"
307
" ('adh',) 'initial adh content'\n"
309
" ('add',) 'initial add content'\n",
312
def test_one_deep_one_prefix_map_plain(self):
313
c_map = self.make_one_deep_one_prefix_map()
314
self.assertEqualDiff(
317
" ('add',) 'initial add content'\n"
318
" ('adh',) 'initial adh content'\n"
319
" ('adl',) 'initial adl content'\n"
321
" ('bbb',) 'initial bbb content'\n",
324
def test_one_deep_one_prefix_map_16(self):
325
c_map = self.make_one_deep_one_prefix_map(
326
search_key_func=chk_map._search_key_16)
327
self.assertEqualDiff(
330
" ('bbb',) 'initial bbb content'\n"
332
" ('add',) 'initial add content'\n"
333
" ('adh',) 'initial adh content'\n"
334
" ('adl',) 'initial adl content'\n",
338
class TestMap(TestCaseWithStore):
340
def assertHasABMap(self, chk_bytes):
341
ab_leaf_bytes = 'chkleaf:\n0\n1\n1\na\n\x001\nb\n'
342
ab_sha1 = osutils.sha_string(ab_leaf_bytes)
343
self.assertEqual('90986195696b177c8895d48fdb4b7f2366f798a0', ab_sha1)
344
root_key = ('sha1:' + ab_sha1,)
345
self.assertEqual(ab_leaf_bytes, self.read_bytes(chk_bytes, root_key))
348
def assertHasEmptyMap(self, chk_bytes):
349
empty_leaf_bytes = 'chkleaf:\n0\n1\n0\n\n'
350
empty_sha1 = osutils.sha_string(empty_leaf_bytes)
351
self.assertEqual('8571e09bf1bcc5b9621ce31b3d4c93d6e9a1ed26', empty_sha1)
352
root_key = ('sha1:' + empty_sha1,)
353
self.assertEqual(empty_leaf_bytes, self.read_bytes(chk_bytes, root_key))
356
def assertMapLayoutEqual(self, map_one, map_two):
357
"""Assert that the internal structure is identical between the maps."""
358
map_one._ensure_root()
359
node_one_stack = [map_one._root_node]
360
map_two._ensure_root()
361
node_two_stack = [map_two._root_node]
362
while node_one_stack:
363
node_one = node_one_stack.pop()
364
node_two = node_two_stack.pop()
365
if node_one.__class__ != node_two.__class__:
366
self.assertEqualDiff(map_one._dump_tree(include_keys=True),
367
map_two._dump_tree(include_keys=True))
368
self.assertEqual(node_one._search_prefix,
369
node_two._search_prefix)
370
if isinstance(node_one, InternalNode):
371
# Internal nodes must have identical references
372
self.assertEqual(sorted(node_one._items.keys()),
373
sorted(node_two._items.keys()))
374
node_one_stack.extend([n for n, _ in
375
node_one._iter_nodes(map_one._store)])
376
node_two_stack.extend([n for n, _ in
377
node_two._iter_nodes(map_two._store)])
379
# Leaf nodes must have identical contents
380
self.assertEqual(node_one._items, node_two._items)
381
self.assertEquals([], node_two_stack)
383
def assertCanonicalForm(self, chkmap):
384
"""Assert that the chkmap is in 'canonical' form.
386
We do this by adding all of the key value pairs from scratch, both in
387
forward order and reverse order, and assert that the final tree layout
390
items = list(chkmap.iteritems())
391
map_forward = chk_map.CHKMap(None, None)
392
map_forward._root_node.set_maximum_size(chkmap._root_node.maximum_size)
393
for key, value in items:
394
map_forward.map(key, value)
395
self.assertMapLayoutEqual(map_forward, chkmap)
396
map_reverse = chk_map.CHKMap(None, None)
397
map_reverse._root_node.set_maximum_size(chkmap._root_node.maximum_size)
398
for key, value in reversed(items):
399
map_reverse.map(key, value)
400
self.assertMapLayoutEqual(map_reverse, chkmap)
402
def test_assert_map_layout_equal(self):
403
store = self.get_chk_bytes()
404
map_one = CHKMap(store, None)
405
map_one._root_node.set_maximum_size(20)
406
map_two = CHKMap(store, None)
407
map_two._root_node.set_maximum_size(20)
408
self.assertMapLayoutEqual(map_one, map_two)
409
map_one.map('aaa', 'value')
410
self.assertRaises(AssertionError,
411
self.assertMapLayoutEqual, map_one, map_two)
412
map_two.map('aaa', 'value')
413
self.assertMapLayoutEqual(map_one, map_two)
414
# Split the tree, so we ensure that internal nodes and leaf nodes are
416
map_one.map('aab', 'value')
417
self.assertIsInstance(map_one._root_node, InternalNode)
418
self.assertRaises(AssertionError,
419
self.assertMapLayoutEqual, map_one, map_two)
420
map_two.map('aab', 'value')
421
self.assertMapLayoutEqual(map_one, map_two)
422
map_one.map('aac', 'value')
423
self.assertRaises(AssertionError,
424
self.assertMapLayoutEqual, map_one, map_two)
425
self.assertCanonicalForm(map_one)
427
def test_from_dict_empty(self):
428
chk_bytes = self.get_chk_bytes()
429
root_key = CHKMap.from_dict(chk_bytes, {})
430
# Check the data was saved and inserted correctly.
431
expected_root_key = self.assertHasEmptyMap(chk_bytes)
432
self.assertEqual(expected_root_key, root_key)
434
def test_from_dict_ab(self):
435
chk_bytes = self.get_chk_bytes()
436
root_key = CHKMap.from_dict(chk_bytes, {"a": "b"})
437
# Check the data was saved and inserted correctly.
438
expected_root_key = self.assertHasABMap(chk_bytes)
439
self.assertEqual(expected_root_key, root_key)
441
def test_apply_empty_ab(self):
442
# applying a delta (None, "a", "b") to an empty chkmap generates the
443
# same map as from_dict_ab.
444
chk_bytes = self.get_chk_bytes()
445
root_key = CHKMap.from_dict(chk_bytes, {})
446
chkmap = CHKMap(chk_bytes, root_key)
447
new_root = chkmap.apply_delta([(None, "a", "b")])
448
# Check the data was saved and inserted correctly.
449
expected_root_key = self.assertHasABMap(chk_bytes)
450
self.assertEqual(expected_root_key, new_root)
451
# The update should have left us with an in memory root node, with an
453
self.assertEqual(new_root, chkmap._root_node._key)
455
def test_apply_ab_empty(self):
456
# applying a delta ("a", None, None) to a map with 'a' in it generates
458
chk_bytes = self.get_chk_bytes()
459
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
460
chkmap = CHKMap(chk_bytes, root_key)
461
new_root = chkmap.apply_delta([(("a",), None, None)])
462
# Check the data was saved and inserted correctly.
463
expected_root_key = self.assertHasEmptyMap(chk_bytes)
464
self.assertEqual(expected_root_key, new_root)
465
# The update should have left us with an in memory root node, with an
467
self.assertEqual(new_root, chkmap._root_node._key)
469
def test_apply_delta_is_deterministic(self):
470
chk_bytes = self.get_chk_bytes()
471
chkmap1 = CHKMap(chk_bytes, None)
472
chkmap1._root_node.set_maximum_size(10)
473
chkmap1.apply_delta([(None, ('aaa',), 'common'),
474
(None, ('bba',), 'target2'),
475
(None, ('bbb',), 'common')])
476
root_key1 = chkmap1._save()
477
self.assertCanonicalForm(chkmap1)
479
chkmap2 = CHKMap(chk_bytes, None)
480
chkmap2._root_node.set_maximum_size(10)
481
chkmap2.apply_delta([(None, ('bbb',), 'common'),
482
(None, ('bba',), 'target2'),
483
(None, ('aaa',), 'common')])
484
root_key2 = chkmap2._save()
485
self.assertEqualDiff(chkmap1._dump_tree(include_keys=True),
486
chkmap2._dump_tree(include_keys=True))
487
self.assertEqual(root_key1, root_key2)
488
self.assertCanonicalForm(chkmap2)
490
def test_stable_splitting(self):
491
store = self.get_chk_bytes()
492
chkmap = CHKMap(store, None)
493
# Should fit 2 keys per LeafNode
494
chkmap._root_node.set_maximum_size(35)
495
chkmap.map(('aaa',), 'v')
496
self.assertEqualDiff("'' LeafNode\n"
499
chkmap.map(('aab',), 'v')
500
self.assertEqualDiff("'' LeafNode\n"
504
self.assertCanonicalForm(chkmap)
506
# Creates a new internal node, and splits the others into leaves
507
chkmap.map(('aac',), 'v')
508
self.assertEqualDiff("'' InternalNode\n"
516
self.assertCanonicalForm(chkmap)
518
# Splits again, because it can't fit in the current structure
519
chkmap.map(('bbb',), 'v')
520
self.assertEqualDiff("'' InternalNode\n"
521
" 'a' InternalNode\n"
531
self.assertCanonicalForm(chkmap)
533
def test_map_splits_with_longer_key(self):
534
store = self.get_chk_bytes()
535
chkmap = CHKMap(store, None)
536
# Should fit 1 key per LeafNode
537
chkmap._root_node.set_maximum_size(10)
538
chkmap.map(('aaa',), 'v')
539
chkmap.map(('aaaa',), 'v')
540
self.assertCanonicalForm(chkmap)
541
self.assertIsInstance(chkmap._root_node, InternalNode)
543
def test_with_linefeed_in_key(self):
544
store = self.get_chk_bytes()
545
chkmap = CHKMap(store, None)
546
# Should fit 1 key per LeafNode
547
chkmap._root_node.set_maximum_size(10)
548
chkmap.map(('a\ra',), 'val1')
549
chkmap.map(('a\rb',), 'val2')
550
chkmap.map(('ac',), 'val3')
551
self.assertCanonicalForm(chkmap)
552
self.assertEqualDiff("'' InternalNode\n"
553
" 'a\\r' InternalNode\n"
554
" 'a\\ra' LeafNode\n"
555
" ('a\\ra',) 'val1'\n"
556
" 'a\\rb' LeafNode\n"
557
" ('a\\rb',) 'val2'\n"
561
# We should also successfully serialise and deserialise these items
562
root_key = chkmap._save()
563
chkmap = CHKMap(store, root_key)
564
self.assertEqualDiff("'' InternalNode\n"
565
" 'a\\r' InternalNode\n"
566
" 'a\\ra' LeafNode\n"
567
" ('a\\ra',) 'val1'\n"
568
" 'a\\rb' LeafNode\n"
569
" ('a\\rb',) 'val2'\n"
574
def test_deep_splitting(self):
575
store = self.get_chk_bytes()
576
chkmap = CHKMap(store, None)
577
# Should fit 2 keys per LeafNode
578
chkmap._root_node.set_maximum_size(40)
579
chkmap.map(('aaaaaaaa',), 'v')
580
chkmap.map(('aaaaabaa',), 'v')
581
self.assertEqualDiff("'' LeafNode\n"
582
" ('aaaaaaaa',) 'v'\n"
583
" ('aaaaabaa',) 'v'\n",
585
chkmap.map(('aaabaaaa',), 'v')
586
chkmap.map(('aaababaa',), 'v')
587
self.assertEqualDiff("'' InternalNode\n"
589
" ('aaaaaaaa',) 'v'\n"
590
" ('aaaaabaa',) 'v'\n"
592
" ('aaabaaaa',) 'v'\n"
593
" ('aaababaa',) 'v'\n",
595
chkmap.map(('aaabacaa',), 'v')
596
chkmap.map(('aaabadaa',), 'v')
597
self.assertEqualDiff("'' InternalNode\n"
599
" ('aaaaaaaa',) 'v'\n"
600
" ('aaaaabaa',) 'v'\n"
601
" 'aaab' InternalNode\n"
602
" 'aaabaa' LeafNode\n"
603
" ('aaabaaaa',) 'v'\n"
604
" 'aaabab' LeafNode\n"
605
" ('aaababaa',) 'v'\n"
606
" 'aaabac' LeafNode\n"
607
" ('aaabacaa',) 'v'\n"
608
" 'aaabad' LeafNode\n"
609
" ('aaabadaa',) 'v'\n",
611
chkmap.map(('aaababba',), 'val')
612
chkmap.map(('aaababca',), 'val')
613
self.assertEqualDiff("'' InternalNode\n"
615
" ('aaaaaaaa',) 'v'\n"
616
" ('aaaaabaa',) 'v'\n"
617
" 'aaab' InternalNode\n"
618
" 'aaabaa' LeafNode\n"
619
" ('aaabaaaa',) 'v'\n"
620
" 'aaabab' InternalNode\n"
621
" 'aaababa' LeafNode\n"
622
" ('aaababaa',) 'v'\n"
623
" 'aaababb' LeafNode\n"
624
" ('aaababba',) 'val'\n"
625
" 'aaababc' LeafNode\n"
626
" ('aaababca',) 'val'\n"
627
" 'aaabac' LeafNode\n"
628
" ('aaabacaa',) 'v'\n"
629
" 'aaabad' LeafNode\n"
630
" ('aaabadaa',) 'v'\n",
632
# Now we add a node that should fit around an existing InternalNode,
633
# but has a slightly different key prefix, which causes a new
635
chkmap.map(('aaabDaaa',), 'v')
636
self.assertEqualDiff("'' InternalNode\n"
638
" ('aaaaaaaa',) 'v'\n"
639
" ('aaaaabaa',) 'v'\n"
640
" 'aaab' InternalNode\n"
641
" 'aaabD' LeafNode\n"
642
" ('aaabDaaa',) 'v'\n"
643
" 'aaaba' InternalNode\n"
644
" 'aaabaa' LeafNode\n"
645
" ('aaabaaaa',) 'v'\n"
646
" 'aaabab' InternalNode\n"
647
" 'aaababa' LeafNode\n"
648
" ('aaababaa',) 'v'\n"
649
" 'aaababb' LeafNode\n"
650
" ('aaababba',) 'val'\n"
651
" 'aaababc' LeafNode\n"
652
" ('aaababca',) 'val'\n"
653
" 'aaabac' LeafNode\n"
654
" ('aaabacaa',) 'v'\n"
655
" 'aaabad' LeafNode\n"
656
" ('aaabadaa',) 'v'\n",
659
def test_map_collapses_if_size_changes(self):
660
store = self.get_chk_bytes()
661
chkmap = CHKMap(store, None)
662
# Should fit 2 keys per LeafNode
663
chkmap._root_node.set_maximum_size(35)
664
chkmap.map(('aaa',), 'v')
665
chkmap.map(('aab',), 'very long value that splits')
666
self.assertEqualDiff("'' InternalNode\n"
670
" ('aab',) 'very long value that splits'\n",
672
self.assertCanonicalForm(chkmap)
673
# Now changing the value to something small should cause a rebuild
674
chkmap.map(('aab',), 'v')
675
self.assertEqualDiff("'' LeafNode\n"
679
self.assertCanonicalForm(chkmap)
681
def test_map_double_deep_collapses(self):
682
store = self.get_chk_bytes()
683
chkmap = CHKMap(store, None)
684
# Should fit 3 small keys per LeafNode
685
chkmap._root_node.set_maximum_size(40)
686
chkmap.map(('aaa',), 'v')
687
chkmap.map(('aab',), 'very long value that splits')
688
chkmap.map(('abc',), 'v')
689
self.assertEqualDiff("'' InternalNode\n"
690
" 'aa' InternalNode\n"
694
" ('aab',) 'very long value that splits'\n"
698
chkmap.map(('aab',), 'v')
699
self.assertCanonicalForm(chkmap)
700
self.assertEqualDiff("'' LeafNode\n"
706
def test_stable_unmap(self):
707
store = self.get_chk_bytes()
708
chkmap = CHKMap(store, None)
709
# Should fit 2 keys per LeafNode
710
chkmap._root_node.set_maximum_size(35)
711
chkmap.map(('aaa',), 'v')
712
chkmap.map(('aab',), 'v')
713
self.assertEqualDiff("'' LeafNode\n"
717
# Creates a new internal node, and splits the others into leaves
718
chkmap.map(('aac',), 'v')
719
self.assertEqualDiff("'' InternalNode\n"
727
self.assertCanonicalForm(chkmap)
728
# Now lets unmap one of the keys, and assert that we collapse the
730
chkmap.unmap(('aac',))
731
self.assertEqualDiff("'' LeafNode\n"
735
self.assertCanonicalForm(chkmap)
737
def test_unmap_double_deep(self):
738
store = self.get_chk_bytes()
739
chkmap = CHKMap(store, None)
740
# Should fit 3 keys per LeafNode
741
chkmap._root_node.set_maximum_size(40)
742
chkmap.map(('aaa',), 'v')
743
chkmap.map(('aaab',), 'v')
744
chkmap.map(('aab',), 'very long value')
745
chkmap.map(('abc',), 'v')
746
self.assertEqualDiff("'' InternalNode\n"
747
" 'aa' InternalNode\n"
752
" ('aab',) 'very long value'\n"
756
# Removing the 'aab' key should cause everything to collapse back to a
758
chkmap.unmap(('aab',))
759
self.assertEqualDiff("'' LeafNode\n"
765
def test_unmap_double_deep_non_empty_leaf(self):
766
store = self.get_chk_bytes()
767
chkmap = CHKMap(store, None)
768
# Should fit 3 keys per LeafNode
769
chkmap._root_node.set_maximum_size(40)
770
chkmap.map(('aaa',), 'v')
771
chkmap.map(('aab',), 'long value')
772
chkmap.map(('aabb',), 'v')
773
chkmap.map(('abc',), 'v')
774
self.assertEqualDiff("'' InternalNode\n"
775
" 'aa' InternalNode\n"
779
" ('aab',) 'long value'\n"
784
# Removing the 'aab' key should cause everything to collapse back to a
786
chkmap.unmap(('aab',))
787
self.assertEqualDiff("'' LeafNode\n"
793
def test_unmap_with_known_internal_node_doesnt_page(self):
794
store = self.get_chk_bytes()
795
chkmap = CHKMap(store, None)
796
# Should fit 3 keys per LeafNode
797
chkmap._root_node.set_maximum_size(30)
798
chkmap.map(('aaa',), 'v')
799
chkmap.map(('aab',), 'v')
800
chkmap.map(('aac',), 'v')
801
chkmap.map(('abc',), 'v')
802
chkmap.map(('acd',), 'v')
803
self.assertEqualDiff("'' InternalNode\n"
804
" 'aa' InternalNode\n"
816
# Save everything to the map, and start over
817
chkmap = CHKMap(store, chkmap._save())
818
# Mapping an 'aa' key loads the internal node, but should not map the
819
# 'ab' and 'ac' nodes
820
chkmap.map(('aad',), 'v')
821
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
822
self.assertIsInstance(chkmap._root_node._items['ab'], tuple)
823
self.assertIsInstance(chkmap._root_node._items['ac'], tuple)
824
# Unmapping 'acd' can notice that 'aa' is an InternalNode and not have
826
chkmap.unmap(('acd',))
827
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
828
self.assertIsInstance(chkmap._root_node._items['ab'], tuple)
830
def test_unmap_without_fitting_doesnt_page_in(self):
831
store = self.get_chk_bytes()
832
chkmap = CHKMap(store, None)
833
# Should fit 2 keys per LeafNode
834
chkmap._root_node.set_maximum_size(20)
835
chkmap.map(('aaa',), 'v')
836
chkmap.map(('aab',), 'v')
837
self.assertEqualDiff("'' InternalNode\n"
843
# Save everything to the map, and start over
844
chkmap = CHKMap(store, chkmap._save())
845
chkmap.map(('aac',), 'v')
846
chkmap.map(('aad',), 'v')
847
chkmap.map(('aae',), 'v')
848
chkmap.map(('aaf',), 'v')
849
# At this point, the previous nodes should not be paged in, but the
850
# newly added nodes would be
851
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
852
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
853
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
854
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
855
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
856
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
857
# Now unmapping one of the new nodes will use only the already-paged-in
858
# nodes to determine that we don't need to do more.
859
chkmap.unmap(('aaf',))
860
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
861
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
862
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
863
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
864
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
866
def test_unmap_pages_in_if_necessary(self):
867
store = self.get_chk_bytes()
868
chkmap = CHKMap(store, None)
869
# Should fit 2 keys per LeafNode
870
chkmap._root_node.set_maximum_size(30)
871
chkmap.map(('aaa',), 'val')
872
chkmap.map(('aab',), 'val')
873
chkmap.map(('aac',), 'val')
874
self.assertEqualDiff("'' InternalNode\n"
882
root_key = chkmap._save()
883
# Save everything to the map, and start over
884
chkmap = CHKMap(store, root_key)
885
chkmap.map(('aad',), 'v')
886
# At this point, the previous nodes should not be paged in, but the
887
# newly added node would be
888
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
889
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
890
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
891
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
892
# Unmapping the new node will check the existing nodes to see if they
894
# Clear the page cache so we ensure we have to read all the children
895
chk_map._page_cache.clear()
896
chkmap.unmap(('aad',))
897
self.assertIsInstance(chkmap._root_node._items['aaa'], LeafNode)
898
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
899
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
901
def test_unmap_pages_in_from_page_cache(self):
902
store = self.get_chk_bytes()
903
chkmap = CHKMap(store, None)
904
# Should fit 2 keys per LeafNode
905
chkmap._root_node.set_maximum_size(30)
906
chkmap.map(('aaa',), 'val')
907
chkmap.map(('aab',), 'val')
908
chkmap.map(('aac',), 'val')
909
root_key = chkmap._save()
910
# Save everything to the map, and start over
911
chkmap = CHKMap(store, root_key)
912
chkmap.map(('aad',), 'val')
913
self.assertEqualDiff("'' InternalNode\n"
923
# Save everything to the map, start over after _dump_tree
924
chkmap = CHKMap(store, root_key)
925
chkmap.map(('aad',), 'v')
926
# At this point, the previous nodes should not be paged in, but the
927
# newly added node would be
928
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
929
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
930
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
931
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
932
# Now clear the page cache, and only include 2 of the children in the
934
aab_key = chkmap._root_node._items['aab']
935
aab_bytes = chk_map._page_cache[aab_key]
936
aac_key = chkmap._root_node._items['aac']
937
aac_bytes = chk_map._page_cache[aac_key]
938
chk_map._page_cache.clear()
939
chk_map._page_cache[aab_key] = aab_bytes
940
chk_map._page_cache[aac_key] = aac_bytes
942
# Unmapping the new node will check the nodes from the page cache
943
# first, and not have to read in 'aaa'
944
chkmap.unmap(('aad',))
945
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
946
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
947
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
949
def test_unmap_uses_existing_items(self):
950
store = self.get_chk_bytes()
951
chkmap = CHKMap(store, None)
952
# Should fit 2 keys per LeafNode
953
chkmap._root_node.set_maximum_size(30)
954
chkmap.map(('aaa',), 'val')
955
chkmap.map(('aab',), 'val')
956
chkmap.map(('aac',), 'val')
957
root_key = chkmap._save()
958
# Save everything to the map, and start over
959
chkmap = CHKMap(store, root_key)
960
chkmap.map(('aad',), 'val')
961
chkmap.map(('aae',), 'val')
962
chkmap.map(('aaf',), 'val')
963
# At this point, the previous nodes should not be paged in, but the
964
# newly added node would be
965
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
966
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
967
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
968
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
969
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
970
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
972
# Unmapping a new node will see the other nodes that are already in
973
# memory, and not need to page in anything else
974
chkmap.unmap(('aad',))
975
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
976
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
977
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
978
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
979
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
981
def test_iter_changes_empty_ab(self):
982
# Asking for changes between an empty dict to a dict with keys returns
984
basis = self._get_map({}, maximum_size=10)
985
target = self._get_map(
986
{('a',): 'content here', ('b',): 'more content'},
987
chk_bytes=basis._store, maximum_size=10)
988
self.assertEqual([(('a',), None, 'content here'),
989
(('b',), None, 'more content')],
990
sorted(list(target.iter_changes(basis))))
992
def test_iter_changes_ab_empty(self):
993
# Asking for changes between a dict with keys to an empty dict returns
995
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
997
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
998
self.assertEqual([(('a',), 'content here', None),
999
(('b',), 'more content', None)],
1000
sorted(list(target.iter_changes(basis))))
1002
def test_iter_changes_empty_empty_is_empty(self):
1003
basis = self._get_map({}, maximum_size=10)
1004
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1005
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1007
def test_iter_changes_ab_ab_is_empty(self):
1008
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1010
target = self._get_map(
1011
{('a',): 'content here', ('b',): 'more content'},
1012
chk_bytes=basis._store, maximum_size=10)
1013
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1015
def test_iter_changes_ab_ab_nodes_not_loaded(self):
1016
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1018
target = self._get_map(
1019
{('a',): 'content here', ('b',): 'more content'},
1020
chk_bytes=basis._store, maximum_size=10)
1021
list(target.iter_changes(basis))
1022
self.assertIsInstance(target._root_node, tuple)
1023
self.assertIsInstance(basis._root_node, tuple)
1025
def test_iter_changes_ab_ab_changed_values_shown(self):
1026
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1028
target = self._get_map(
1029
{('a',): 'content here', ('b',): 'different content'},
1030
chk_bytes=basis._store, maximum_size=10)
1031
result = sorted(list(target.iter_changes(basis)))
1032
self.assertEqual([(('b',), 'more content', 'different content')],
1035
def test_iter_changes_mixed_node_length(self):
1036
# When one side has different node lengths than the other, common
1037
# but different keys still need to be show, and new-and-old included
1039
# aaa - common unaltered
1040
# aab - common altered
1044
# aaa to be not loaded (later test)
1045
# aab, b, at to be returned.
1046
# basis splits at byte 0,1,2, aaa is commonb is basis only
1047
basis_dict = {('aaa',): 'foo bar',
1048
('aab',): 'common altered a', ('b',): 'foo bar b'}
1049
# target splits at byte 1,2, at is target only
1050
target_dict = {('aaa',): 'foo bar',
1051
('aab',): 'common altered b', ('at',): 'foo bar t'}
1053
(('aab',), 'common altered a', 'common altered b'),
1054
(('at',), None, 'foo bar t'),
1055
(('b',), 'foo bar b', None),
1057
basis = self._get_map(basis_dict, maximum_size=10)
1058
target = self._get_map(target_dict, maximum_size=10,
1059
chk_bytes=basis._store)
1060
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1062
def test_iter_changes_common_pages_not_loaded(self):
1063
# aaa - common unaltered
1064
# aab - common altered
1068
# aaa to be not loaded
1069
# aaa not to be in result.
1070
basis_dict = {('aaa',): 'foo bar',
1071
('aab',): 'common altered a', ('b',): 'foo bar b'}
1072
# target splits at byte 1, at is target only
1073
target_dict = {('aaa',): 'foo bar',
1074
('aab',): 'common altered b', ('at',): 'foo bar t'}
1075
basis = self._get_map(basis_dict, maximum_size=10)
1076
target = self._get_map(target_dict, maximum_size=10,
1077
chk_bytes=basis._store)
1078
basis_get = basis._store.get_record_stream
1079
def get_record_stream(keys, order, fulltext):
1080
if ('sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
1081
self.fail("'aaa' pointer was followed %r" % keys)
1082
return basis_get(keys, order, fulltext)
1083
basis._store.get_record_stream = get_record_stream
1084
result = sorted(list(target.iter_changes(basis)))
1085
for change in result:
1086
if change[0] == ('aaa',):
1087
self.fail("Found unexpected change: %s" % change)
1089
def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
1090
# Within a leaf there are no hash's to exclude keys, make sure multi
1091
# value leaf nodes are handled well.
1092
basis_dict = {('aaa',): 'foo bar',
1093
('aab',): 'common altered a', ('b',): 'foo bar b'}
1094
target_dict = {('aaa',): 'foo bar',
1095
('aab',): 'common altered b', ('at',): 'foo bar t'}
1097
(('aab',), 'common altered a', 'common altered b'),
1098
(('at',), None, 'foo bar t'),
1099
(('b',), 'foo bar b', None),
1101
basis = self._get_map(basis_dict)
1102
target = self._get_map(target_dict, chk_bytes=basis._store)
1103
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1105
def test_iteritems_empty(self):
1106
chk_bytes = self.get_chk_bytes()
1107
root_key = CHKMap.from_dict(chk_bytes, {})
1108
chkmap = CHKMap(chk_bytes, root_key)
1109
self.assertEqual([], list(chkmap.iteritems()))
1111
def test_iteritems_two_items(self):
1112
chk_bytes = self.get_chk_bytes()
1113
root_key = CHKMap.from_dict(chk_bytes,
1114
{"a":"content here", "b":"more content"})
1115
chkmap = CHKMap(chk_bytes, root_key)
1116
self.assertEqual([(("a",), "content here"), (("b",), "more content")],
1117
sorted(list(chkmap.iteritems())))
1119
def test_iteritems_selected_one_of_two_items(self):
1120
chkmap = self._get_map( {("a",):"content here", ("b",):"more content"})
1121
self.assertEqual({("a",): "content here"},
1122
self.to_dict(chkmap, [("a",)]))
1124
def test_iteritems_keys_prefixed_by_2_width_nodes(self):
1125
chkmap = self._get_map(
1126
{("a","a"):"content here", ("a", "b",):"more content",
1127
("b", ""): 'boring content'},
1128
maximum_size=10, key_width=2)
1130
{("a", "a"): "content here", ("a", "b"): 'more content'},
1131
self.to_dict(chkmap, [("a",)]))
1133
def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self):
1134
search_key_func = chk_map.search_key_registry.get('hash-16-way')
1135
self.assertEqual('E8B7BE43\x00E8B7BE43', search_key_func(('a', 'a')))
1136
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
1137
self.assertEqual('71BEEFF9\x0000000000', search_key_func(('b', '')))
1138
chkmap = self._get_map(
1139
{("a","a"):"content here", ("a", "b",):"more content",
1140
("b", ""): 'boring content'},
1141
maximum_size=10, key_width=2, search_key_func=search_key_func)
1143
{("a", "a"): "content here", ("a", "b"): 'more content'},
1144
self.to_dict(chkmap, [("a",)]))
1146
def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
1147
chkmap = self._get_map(
1148
{("a","a"):"content here", ("a", "b",):"more content",
1149
("b", ""): 'boring content'}, key_width=2)
1151
{("a", "a"): "content here", ("a", "b"): 'more content'},
1152
self.to_dict(chkmap, [("a",)]))
1154
def test___len__empty(self):
1155
chkmap = self._get_map({})
1156
self.assertEqual(0, len(chkmap))
1158
def test___len__2(self):
1159
chkmap = self._get_map({("foo",):"bar", ("gam",):"quux"})
1160
self.assertEqual(2, len(chkmap))
1162
def test_max_size_100_bytes_new(self):
1163
# When there is a 100 byte upper node limit, a tree is formed.
1164
chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100)
1165
# We expect three nodes:
1166
# A root, with two children, and with two key prefixes - k1 to one, and
1167
# k2 to the other as our node splitting is only just being developed.
1168
# The maximum size should be embedded
1169
chkmap._ensure_root()
1170
self.assertEqual(100, chkmap._root_node.maximum_size)
1171
self.assertEqual(1, chkmap._root_node._key_width)
1172
# There should be two child nodes, and prefix of 2(bytes):
1173
self.assertEqual(2, len(chkmap._root_node._items))
1174
self.assertEqual("k", chkmap._root_node._compute_search_prefix())
1175
# The actual nodes pointed at will change as serialisers change; so
1176
# here we test that the key prefix is correct; then load the nodes and
1177
# check they have the right pointed at key; whether they have the
1178
# pointed at value inline or not is also unrelated to this test so we
1179
# don't check that in detail - rather we just check the aggregate
1181
nodes = sorted(chkmap._root_node._items.items())
1184
self.assertEqual('k1', ptr1[0])
1185
self.assertEqual('k2', ptr2[0])
1186
node1 = chk_map._deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1], None)
1187
self.assertIsInstance(node1, LeafNode)
1188
self.assertEqual(1, len(node1))
1189
self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store))
1190
node2 = chk_map._deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1], None)
1191
self.assertIsInstance(node2, LeafNode)
1192
self.assertEqual(1, len(node2))
1193
self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store))
1194
# Having checked we have a good structure, check that the content is
1196
self.assertEqual(2, len(chkmap))
1197
self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"},
1198
self.to_dict(chkmap))
1200
def test_init_root_is_LeafNode_new(self):
1201
chk_bytes = self.get_chk_bytes()
1202
chkmap = CHKMap(chk_bytes, None)
1203
self.assertIsInstance(chkmap._root_node, LeafNode)
1204
self.assertEqual({}, self.to_dict(chkmap))
1205
self.assertEqual(0, len(chkmap))
1207
def test_init_and_save_new(self):
1208
chk_bytes = self.get_chk_bytes()
1209
chkmap = CHKMap(chk_bytes, None)
1210
key = chkmap._save()
1211
leaf_node = LeafNode()
1212
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1214
def test_map_first_item_new(self):
1215
chk_bytes = self.get_chk_bytes()
1216
chkmap = CHKMap(chk_bytes, None)
1217
chkmap.map(("foo,",), "bar")
1218
self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap))
1219
self.assertEqual(1, len(chkmap))
1220
key = chkmap._save()
1221
leaf_node = LeafNode()
1222
leaf_node.map(chk_bytes, ("foo,",), "bar")
1223
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1225
def test_unmap_last_item_root_is_leaf_new(self):
1226
chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"})
1227
chkmap.unmap(("k1"*50,))
1228
chkmap.unmap(("k2"*50,))
1229
self.assertEqual(0, len(chkmap))
1230
self.assertEqual({}, self.to_dict(chkmap))
1231
key = chkmap._save()
1232
leaf_node = LeafNode()
1233
self.assertEqual([key], leaf_node.serialise(chkmap._store))
1235
def test__dump_tree(self):
1236
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2",
1237
("bbb",): "value3",},
1239
self.assertEqualDiff("'' InternalNode\n"
1240
" 'a' InternalNode\n"
1242
" ('aaa',) 'value1'\n"
1244
" ('aab',) 'value2'\n"
1246
" ('bbb',) 'value3'\n",
1247
chkmap._dump_tree())
1248
self.assertEqualDiff("'' InternalNode\n"
1249
" 'a' InternalNode\n"
1251
" ('aaa',) 'value1'\n"
1253
" ('aab',) 'value2'\n"
1255
" ('bbb',) 'value3'\n",
1256
chkmap._dump_tree())
1257
self.assertEqualDiff(
1258
"'' InternalNode sha1:0690d471eb0a624f359797d0ee4672bd68f4e236\n"
1259
" 'a' InternalNode sha1:1514c35503da9418d8fd90c1bed553077cb53673\n"
1260
" 'aaa' LeafNode sha1:4cc5970454d40b4ce297a7f13ddb76f63b88fefb\n"
1261
" ('aaa',) 'value1'\n"
1262
" 'aab' LeafNode sha1:1d68bc90914ef8a3edbcc8bb28b00cb4fea4b5e2\n"
1263
" ('aab',) 'value2'\n"
1264
" 'b' LeafNode sha1:3686831435b5596515353364eab0399dc45d49e7\n"
1265
" ('bbb',) 'value3'\n",
1266
chkmap._dump_tree(include_keys=True))
1268
def test__dump_tree_in_progress(self):
1269
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2"},
1271
chkmap.map(('bbb',), 'value3')
1272
self.assertEqualDiff("'' InternalNode\n"
1273
" 'a' InternalNode\n"
1275
" ('aaa',) 'value1'\n"
1277
" ('aab',) 'value2'\n"
1279
" ('bbb',) 'value3'\n",
1280
chkmap._dump_tree())
1281
# For things that are updated by adding 'bbb', we don't have a sha key
1282
# for them yet, so they are listed as None
1283
self.assertEqualDiff(
1284
"'' InternalNode None\n"
1285
" 'a' InternalNode sha1:6b0d881dd739a66f733c178b24da64395edfaafd\n"
1286
" 'aaa' LeafNode sha1:40b39a08d895babce17b20ae5f62d187eaa4f63a\n"
1287
" ('aaa',) 'value1'\n"
1288
" 'aab' LeafNode sha1:ad1dc7c4e801302c95bf1ba7b20bc45e548cd51a\n"
1289
" ('aab',) 'value2'\n"
1290
" 'b' LeafNode None\n"
1291
" ('bbb',) 'value3'\n",
1292
chkmap._dump_tree(include_keys=True))
1295
def _search_key_single(key):
1296
"""A search key function that maps all nodes to the same value"""
1299
def _test_search_key(key):
1300
return 'test:' + '\x00'.join(key)
1303
class TestMapSearchKeys(TestCaseWithStore):
1305
def test_default_chk_map_uses_flat_search_key(self):
1306
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None)
1307
self.assertEqual('1',
1308
chkmap._search_key_func(('1',)))
1309
self.assertEqual('1\x002',
1310
chkmap._search_key_func(('1', '2')))
1311
self.assertEqual('1\x002\x003',
1312
chkmap._search_key_func(('1', '2', '3')))
1314
def test_search_key_is_passed_to_root_node(self):
1315
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1316
search_key_func=_test_search_key)
1317
self.assertIs(_test_search_key, chkmap._search_key_func)
1318
self.assertEqual('test:1\x002\x003',
1319
chkmap._search_key_func(('1', '2', '3')))
1320
self.assertEqual('test:1\x002\x003',
1321
chkmap._root_node._search_key(('1', '2', '3')))
1323
def test_search_key_passed_via__ensure_root(self):
1324
chk_bytes = self.get_chk_bytes()
1325
chkmap = chk_map.CHKMap(chk_bytes, None,
1326
search_key_func=_test_search_key)
1327
root_key = chkmap._save()
1328
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1329
search_key_func=_test_search_key)
1330
chkmap._ensure_root()
1331
self.assertEqual('test:1\x002\x003',
1332
chkmap._root_node._search_key(('1', '2', '3')))
1334
def test_search_key_with_internal_node(self):
1335
chk_bytes = self.get_chk_bytes()
1336
chkmap = chk_map.CHKMap(chk_bytes, None,
1337
search_key_func=_test_search_key)
1338
chkmap._root_node.set_maximum_size(10)
1339
chkmap.map(('1',), 'foo')
1340
chkmap.map(('2',), 'bar')
1341
chkmap.map(('3',), 'baz')
1342
self.assertEqualDiff("'' InternalNode\n"
1343
" 'test:1' LeafNode\n"
1345
" 'test:2' LeafNode\n"
1347
" 'test:3' LeafNode\n"
1349
, chkmap._dump_tree())
1350
root_key = chkmap._save()
1351
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1352
search_key_func=_test_search_key)
1353
self.assertEqualDiff("'' InternalNode\n"
1354
" 'test:1' LeafNode\n"
1356
" 'test:2' LeafNode\n"
1358
" 'test:3' LeafNode\n"
1360
, chkmap._dump_tree())
1362
def test_search_key_16(self):
1363
chk_bytes = self.get_chk_bytes()
1364
chkmap = chk_map.CHKMap(chk_bytes, None,
1365
search_key_func=chk_map._search_key_16)
1366
chkmap._root_node.set_maximum_size(10)
1367
chkmap.map(('1',), 'foo')
1368
chkmap.map(('2',), 'bar')
1369
chkmap.map(('3',), 'baz')
1370
self.assertEqualDiff("'' InternalNode\n"
1377
, chkmap._dump_tree())
1378
root_key = chkmap._save()
1379
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1380
search_key_func=chk_map._search_key_16)
1381
# We can get the values back correctly
1382
self.assertEqual([(('1',), 'foo')],
1383
list(chkmap.iteritems([('1',)])))
1384
self.assertEqualDiff("'' InternalNode\n"
1391
, chkmap._dump_tree())
1393
def test_search_key_255(self):
1394
chk_bytes = self.get_chk_bytes()
1395
chkmap = chk_map.CHKMap(chk_bytes, None,
1396
search_key_func=chk_map._search_key_255)
1397
chkmap._root_node.set_maximum_size(10)
1398
chkmap.map(('1',), 'foo')
1399
chkmap.map(('2',), 'bar')
1400
chkmap.map(('3',), 'baz')
1401
self.assertEqualDiff("'' InternalNode\n"
1402
" '\\x1a' LeafNode\n"
1406
" '\\x83' LeafNode\n"
1408
, chkmap._dump_tree())
1409
root_key = chkmap._save()
1410
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1411
search_key_func=chk_map._search_key_255)
1412
# We can get the values back correctly
1413
self.assertEqual([(('1',), 'foo')],
1414
list(chkmap.iteritems([('1',)])))
1415
self.assertEqualDiff("'' InternalNode\n"
1416
" '\\x1a' LeafNode\n"
1420
" '\\x83' LeafNode\n"
1422
, chkmap._dump_tree())
1424
def test_search_key_collisions(self):
1425
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1426
search_key_func=_search_key_single)
1427
# The node will want to expand, but it cannot, because it knows that
1428
# all the keys must map to this node
1429
chkmap._root_node.set_maximum_size(20)
1430
chkmap.map(('1',), 'foo')
1431
chkmap.map(('2',), 'bar')
1432
chkmap.map(('3',), 'baz')
1433
self.assertEqualDiff("'' LeafNode\n"
1437
, chkmap._dump_tree())
1440
class TestSearchKeyFuncs(tests.TestCase):
1442
def assertSearchKey16(self, expected, key):
1443
self.assertEqual(expected, chk_map._search_key_16(key))
1445
def assertSearchKey255(self, expected, key):
1446
actual = chk_map._search_key_255(key)
1447
self.assertEqual(expected, actual, 'actual: %r' % (actual,))
1449
def test_simple_16(self):
1450
self.assertSearchKey16('8C736521', ('foo',))
1451
self.assertSearchKey16('8C736521\x008C736521', ('foo', 'foo'))
1452
self.assertSearchKey16('8C736521\x0076FF8CAA', ('foo', 'bar'))
1453
self.assertSearchKey16('ED82CD11', ('abcd',))
1455
def test_simple_255(self):
1456
self.assertSearchKey255('\x8cse!', ('foo',))
1457
self.assertSearchKey255('\x8cse!\x00\x8cse!', ('foo', 'foo'))
1458
self.assertSearchKey255('\x8cse!\x00v\xff\x8c\xaa', ('foo', 'bar'))
1459
# The standard mapping for these would include '\n', so it should be
1461
self.assertSearchKey255('\xfdm\x93_\x00P_\x1bL', ('<', 'V'))
1463
def test_255_does_not_include_newline(self):
1464
# When mapping via _search_key_255, we should never have the '\n'
1465
# character, but all other 255 values should be present
1467
for char_in in range(256):
1468
search_key = chk_map._search_key_255((chr(char_in),))
1469
chars_used.update(search_key)
1470
all_chars = set([chr(x) for x in range(256)])
1471
unused_chars = all_chars.symmetric_difference(chars_used)
1472
self.assertEqual(set('\n'), unused_chars)
1475
class TestLeafNode(TestCaseWithStore):
1477
def test_current_size_empty(self):
1479
self.assertEqual(16, node._current_size())
1481
def test_current_size_size_changed(self):
1483
node.set_maximum_size(10)
1484
self.assertEqual(17, node._current_size())
1486
def test_current_size_width_changed(self):
1488
node._key_width = 10
1489
self.assertEqual(17, node._current_size())
1491
def test_current_size_items(self):
1493
base_size = node._current_size()
1494
node.map(None, ("foo bar",), "baz")
1495
self.assertEqual(base_size + 14, node._current_size())
1497
def test_deserialise_empty(self):
1498
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1499
self.assertEqual(0, len(node))
1500
self.assertEqual(10, node.maximum_size)
1501
self.assertEqual(("sha1:1234",), node.key())
1502
self.assertIs(None, node._search_prefix)
1503
self.assertIs(None, node._common_serialised_prefix)
1505
def test_deserialise_items(self):
1506
node = LeafNode.deserialise(
1507
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1509
self.assertEqual(2, len(node))
1510
self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
1511
sorted(node.iteritems(None)))
1513
def test_deserialise_item_with_null_width_1(self):
1514
node = LeafNode.deserialise(
1515
"chkleaf:\n0\n1\n2\n\nfoo\x001\nbar\x00baz\nquux\x001\nblarh\n",
1517
self.assertEqual(2, len(node))
1518
self.assertEqual([(("foo",), "bar\x00baz"), (("quux",), "blarh")],
1519
sorted(node.iteritems(None)))
1521
def test_deserialise_item_with_null_width_2(self):
1522
node = LeafNode.deserialise(
1523
"chkleaf:\n0\n2\n2\n\nfoo\x001\x001\nbar\x00baz\n"
1524
"quux\x00\x001\nblarh\n",
1526
self.assertEqual(2, len(node))
1527
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("quux", ""), "blarh")],
1528
sorted(node.iteritems(None)))
1530
def test_iteritems_selected_one_of_two_items(self):
1531
node = LeafNode.deserialise(
1532
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1534
self.assertEqual(2, len(node))
1535
self.assertEqual([(("quux",), "blarh")],
1536
sorted(node.iteritems(None, [("quux",), ("qaz",)])))
1538
def test_deserialise_item_with_common_prefix(self):
1539
node = LeafNode.deserialise(
1540
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x001\nbar\x00baz\n2\x001\nblarh\n",
1542
self.assertEqual(2, len(node))
1543
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("foo", "2"), "blarh")],
1544
sorted(node.iteritems(None)))
1545
self.assertIs(chk_map._unknown, node._search_prefix)
1546
self.assertEqual('foo\x00', node._common_serialised_prefix)
1548
def test_deserialise_multi_line(self):
1549
node = LeafNode.deserialise(
1550
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x002\nbar\nbaz\n2\x002\nblarh\n\n",
1552
self.assertEqual(2, len(node))
1553
self.assertEqual([(("foo", "1"), "bar\nbaz"),
1554
(("foo", "2"), "blarh\n"),
1555
], sorted(node.iteritems(None)))
1556
self.assertIs(chk_map._unknown, node._search_prefix)
1557
self.assertEqual('foo\x00', node._common_serialised_prefix)
1559
def test_key_new(self):
1561
self.assertEqual(None, node.key())
1563
def test_key_after_map(self):
1564
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1565
node.map(None, ("foo bar",), "baz quux")
1566
self.assertEqual(None, node.key())
1568
def test_key_after_unmap(self):
1569
node = LeafNode.deserialise(
1570
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1572
node.unmap(None, ("foo bar",))
1573
self.assertEqual(None, node.key())
1575
def test_map_exceeding_max_size_only_entry_new(self):
1577
node.set_maximum_size(10)
1578
result = node.map(None, ("foo bar",), "baz quux")
1579
self.assertEqual(("foo bar", [("", node)]), result)
1580
self.assertTrue(10 < node._current_size())
1582
def test_map_exceeding_max_size_second_entry_early_difference_new(self):
1584
node.set_maximum_size(10)
1585
node.map(None, ("foo bar",), "baz quux")
1586
prefix, result = list(node.map(None, ("blue",), "red"))
1587
self.assertEqual("", prefix)
1588
self.assertEqual(2, len(result))
1589
split_chars = set([result[0][0], result[1][0]])
1590
self.assertEqual(set(["f", "b"]), split_chars)
1591
nodes = dict(result)
1593
self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None))
1594
self.assertEqual(10, node.maximum_size)
1595
self.assertEqual(1, node._key_width)
1597
self.assertEqual({("blue",): "red"}, self.to_dict(node, None))
1598
self.assertEqual(10, node.maximum_size)
1599
self.assertEqual(1, node._key_width)
1601
def test_map_first(self):
1603
result = node.map(None, ("foo bar",), "baz quux")
1604
self.assertEqual(("foo bar", [("", node)]), result)
1605
self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node, None))
1606
self.assertEqual(1, len(node))
1608
def test_map_second(self):
1610
node.map(None, ("foo bar",), "baz quux")
1611
result = node.map(None, ("bingo",), "bango")
1612
self.assertEqual(("", [("", node)]), result)
1613
self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
1614
self.to_dict(node, None))
1615
self.assertEqual(2, len(node))
1617
def test_map_replacement(self):
1619
node.map(None, ("foo bar",), "baz quux")
1620
result = node.map(None, ("foo bar",), "bango")
1621
self.assertEqual(("foo bar", [("", node)]), result)
1622
self.assertEqual({("foo bar",): "bango"},
1623
self.to_dict(node, None))
1624
self.assertEqual(1, len(node))
1626
def test_serialise_empty(self):
1627
store = self.get_chk_bytes()
1629
node.set_maximum_size(10)
1630
expected_key = ("sha1:f34c3f0634ea3f85953dffa887620c0a5b1f4a51",)
1631
self.assertEqual([expected_key],
1632
list(node.serialise(store)))
1633
self.assertEqual("chkleaf:\n10\n1\n0\n\n", self.read_bytes(store, expected_key))
1634
self.assertEqual(expected_key, node.key())
1636
def test_serialise_items(self):
1637
store = self.get_chk_bytes()
1639
node.set_maximum_size(10)
1640
node.map(None, ("foo bar",), "baz quux")
1641
expected_key = ("sha1:f89fac7edfc6bdb1b1b54a556012ff0c646ef5e0",)
1642
self.assertEqual('foo bar', node._common_serialised_prefix)
1643
self.assertEqual([expected_key],
1644
list(node.serialise(store)))
1645
self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\n\x001\nbaz quux\n",
1646
self.read_bytes(store, expected_key))
1647
self.assertEqual(expected_key, node.key())
1649
def test_unique_serialised_prefix_empty_new(self):
1651
self.assertIs(None, node._compute_search_prefix())
1653
def test_unique_serialised_prefix_one_item_new(self):
1655
node.map(None, ("foo bar", "baz"), "baz quux")
1656
self.assertEqual("foo bar\x00baz", node._compute_search_prefix())
1658
def test_unmap_missing(self):
1660
self.assertRaises(KeyError, node.unmap, None, ("foo bar",))
1662
def test_unmap_present(self):
1664
node.map(None, ("foo bar",), "baz quux")
1665
result = node.unmap(None, ("foo bar",))
1666
self.assertEqual(node, result)
1667
self.assertEqual({}, self.to_dict(node, None))
1668
self.assertEqual(0, len(node))
1670
def test_map_maintains_common_prefixes(self):
1673
node.map(None, ("foo bar", "baz"), "baz quux")
1674
self.assertEqual('foo bar\x00baz', node._search_prefix)
1675
self.assertEqual('foo bar\x00baz', node._common_serialised_prefix)
1676
node.map(None, ("foo bar", "bing"), "baz quux")
1677
self.assertEqual('foo bar\x00b', node._search_prefix)
1678
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1679
node.map(None, ("fool", "baby"), "baz quux")
1680
self.assertEqual('foo', node._search_prefix)
1681
self.assertEqual('foo', node._common_serialised_prefix)
1682
node.map(None, ("foo bar", "baz"), "replaced")
1683
self.assertEqual('foo', node._search_prefix)
1684
self.assertEqual('foo', node._common_serialised_prefix)
1685
node.map(None, ("very", "different"), "value")
1686
self.assertEqual('', node._search_prefix)
1687
self.assertEqual('', node._common_serialised_prefix)
1689
def test_unmap_maintains_common_prefixes(self):
1692
node.map(None, ("foo bar", "baz"), "baz quux")
1693
node.map(None, ("foo bar", "bing"), "baz quux")
1694
node.map(None, ("fool", "baby"), "baz quux")
1695
node.map(None, ("very", "different"), "value")
1696
self.assertEqual('', node._search_prefix)
1697
self.assertEqual('', node._common_serialised_prefix)
1698
node.unmap(None, ("very", "different"))
1699
self.assertEqual("foo", node._search_prefix)
1700
self.assertEqual("foo", node._common_serialised_prefix)
1701
node.unmap(None, ("fool", "baby"))
1702
self.assertEqual('foo bar\x00b', node._search_prefix)
1703
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1704
node.unmap(None, ("foo bar", "baz"))
1705
self.assertEqual('foo bar\x00bing', node._search_prefix)
1706
self.assertEqual('foo bar\x00bing', node._common_serialised_prefix)
1707
node.unmap(None, ("foo bar", "bing"))
1708
self.assertEqual(None, node._search_prefix)
1709
self.assertEqual(None, node._common_serialised_prefix)
1712
class TestInternalNode(TestCaseWithStore):
1714
def test_add_node_empty_new(self):
1715
node = InternalNode('fo')
1717
child.set_maximum_size(100)
1718
child.map(None, ("foo",), "bar")
1719
node.add_node("foo", child)
1720
# Note that node isn't strictly valid now as a tree (only one child),
1721
# but thats ok for this test.
1722
# The first child defines the node's width:
1723
self.assertEqual(3, node._node_width)
1724
# We should be able to iterate over the contents without doing IO.
1725
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None))
1726
# The length should be known:
1727
self.assertEqual(1, len(node))
1728
# serialising the node should serialise the child and the node.
1729
chk_bytes = self.get_chk_bytes()
1730
keys = list(node.serialise(chk_bytes))
1731
child_key = child.serialise(chk_bytes)[0]
1733
[child_key, ('sha1:cf67e9997d8228a907c1f5bfb25a8bd9cd916fac',)],
1735
# We should be able to access deserialised content.
1736
bytes = self.read_bytes(chk_bytes, keys[1])
1737
node = chk_map._deserialise(bytes, keys[1], None)
1738
self.assertEqual(1, len(node))
1739
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes))
1740
self.assertEqual(3, node._node_width)
1742
def test_add_node_resets_key_new(self):
1743
node = InternalNode('fo')
1745
child.set_maximum_size(100)
1746
child.map(None, ("foo",), "bar")
1747
node.add_node("foo", child)
1748
chk_bytes = self.get_chk_bytes()
1749
keys = list(node.serialise(chk_bytes))
1750
self.assertEqual(keys[1], node._key)
1751
node.add_node("fos", child)
1752
self.assertEqual(None, node._key)
1754
# def test_add_node_empty_oversized_one_ok_new(self):
1755
# def test_add_node_one_oversized_second_kept_minimum_fan(self):
1756
# def test_add_node_two_oversized_third_kept_minimum_fan(self):
1757
# def test_add_node_one_oversized_second_splits_errors(self):
1759
def test__iter_nodes_no_key_filter(self):
1760
node = InternalNode('')
1762
child.set_maximum_size(100)
1763
child.map(None, ("foo",), "bar")
1764
node.add_node("f", child)
1766
child.set_maximum_size(100)
1767
child.map(None, ("bar",), "baz")
1768
node.add_node("b", child)
1770
for child, node_key_filter in node._iter_nodes(None, key_filter=None):
1771
self.assertEqual(None, node_key_filter)
1773
def test__iter_nodes_splits_key_filter(self):
1774
node = InternalNode('')
1776
child.set_maximum_size(100)
1777
child.map(None, ("foo",), "bar")
1778
node.add_node("f", child)
1780
child.set_maximum_size(100)
1781
child.map(None, ("bar",), "baz")
1782
node.add_node("b", child)
1784
# foo and bar both match exactly one leaf node, but 'cat' should not
1785
# match any, and should not be placed in one.
1786
key_filter = (('foo',), ('bar',), ('cat',))
1787
for child, node_key_filter in node._iter_nodes(None,
1788
key_filter=key_filter):
1789
# each child could only match one key filter, so make sure it was
1791
self.assertEqual(1, len(node_key_filter))
1793
def test__iter_nodes_with_multiple_matches(self):
1794
node = InternalNode('')
1796
child.set_maximum_size(100)
1797
child.map(None, ("foo",), "val")
1798
child.map(None, ("fob",), "val")
1799
node.add_node("f", child)
1801
child.set_maximum_size(100)
1802
child.map(None, ("bar",), "val")
1803
child.map(None, ("baz",), "val")
1804
node.add_node("b", child)
1806
# Note that 'ram' doesn't match anything, so it should be freely
1808
key_filter = (('foo',), ('fob',), ('bar',), ('baz',), ('ram',))
1809
for child, node_key_filter in node._iter_nodes(None,
1810
key_filter=key_filter):
1811
# each child could match two key filters, so make sure they were
1813
self.assertEqual(2, len(node_key_filter))
1815
def make_fo_fa_node(self):
1816
node = InternalNode('f')
1818
child.set_maximum_size(100)
1819
child.map(None, ("foo",), "val")
1820
child.map(None, ("fob",), "val")
1821
node.add_node('fo', child)
1823
child.set_maximum_size(100)
1824
child.map(None, ("far",), "val")
1825
child.map(None, ("faz",), "val")
1826
node.add_node("fa", child)
1829
def test__iter_nodes_single_entry(self):
1830
node = self.make_fo_fa_node()
1831
key_filter = [('foo',)]
1832
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1833
self.assertEqual(1, len(nodes))
1834
self.assertEqual(key_filter, nodes[0][1])
1836
def test__iter_nodes_single_entry_misses(self):
1837
node = self.make_fo_fa_node()
1838
key_filter = [('bar',)]
1839
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1840
self.assertEqual(0, len(nodes))
1842
def test__iter_nodes_mixed_key_width(self):
1843
node = self.make_fo_fa_node()
1844
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('b',)]
1845
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1846
self.assertEqual(1, len(nodes))
1847
matches = key_filter[:]
1848
matches.remove(('b',))
1849
self.assertEqual(sorted(matches), sorted(nodes[0][1]))
1851
def test__iter_nodes_match_all(self):
1852
node = self.make_fo_fa_node()
1853
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('f',)]
1854
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1855
self.assertEqual(2, len(nodes))
1857
def test__iter_nodes_fixed_widths_and_misses(self):
1858
node = self.make_fo_fa_node()
1859
# foo and faa should both match one child, baz should miss
1860
key_filter = [('foo',), ('faa',), ('baz',)]
1861
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1862
self.assertEqual(2, len(nodes))
1863
for node, matches in nodes:
1864
self.assertEqual(1, len(matches))
1866
def test_iteritems_empty_new(self):
1867
node = InternalNode()
1868
self.assertEqual([], sorted(node.iteritems(None)))
1870
def test_iteritems_two_children(self):
1871
node = InternalNode()
1873
leaf1.map(None, ('foo bar',), 'quux')
1875
leaf2.map(None, ('strange',), 'beast')
1876
node.add_node("f", leaf1)
1877
node.add_node("s", leaf2)
1878
self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
1879
sorted(node.iteritems(None)))
1881
def test_iteritems_two_children_partial(self):
1882
node = InternalNode()
1884
leaf1.map(None, ('foo bar',), 'quux')
1886
leaf2.map(None, ('strange',), 'beast')
1887
node.add_node("f", leaf1)
1888
# This sets up a path that should not be followed - it will error if
1889
# the code tries to.
1890
node._items['f'] = None
1891
node.add_node("s", leaf2)
1892
self.assertEqual([(('strange',), 'beast')],
1893
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1895
def test_iteritems_two_children_with_hash(self):
1896
search_key_func = chk_map.search_key_registry.get('hash-255-way')
1897
node = InternalNode(search_key_func=search_key_func)
1898
leaf1 = LeafNode(search_key_func=search_key_func)
1899
leaf1.map(None, ('foo bar',), 'quux')
1900
leaf2 = LeafNode(search_key_func=search_key_func)
1901
leaf2.map(None, ('strange',), 'beast')
1902
self.assertEqual('\xbeF\x014', search_key_func(('foo bar',)))
1903
self.assertEqual('\x85\xfa\xf7K', search_key_func(('strange',)))
1904
node.add_node("\xbe", leaf1)
1905
# This sets up a path that should not be followed - it will error if
1906
# the code tries to.
1907
node._items['\xbe'] = None
1908
node.add_node("\x85", leaf2)
1909
self.assertEqual([(('strange',), 'beast')],
1910
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1912
def test_iteritems_partial_empty(self):
1913
node = InternalNode()
1914
self.assertEqual([], sorted(node.iteritems([('missing',)])))
1916
def test_map_to_new_child_new(self):
1917
chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10)
1918
chkmap._ensure_root()
1919
node = chkmap._root_node
1920
# Ensure test validity: nothing paged in below the root.
1922
len([value for value in node._items.values()
1923
if type(value) == tuple]))
1924
# now, mapping to k3 should add a k3 leaf
1925
prefix, nodes = node.map(None, ('k3',), 'quux')
1926
self.assertEqual("k", prefix)
1927
self.assertEqual([("", node)], nodes)
1928
# check new child details
1929
child = node._items['k3']
1930
self.assertIsInstance(child, LeafNode)
1931
self.assertEqual(1, len(child))
1932
self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None))
1933
self.assertEqual(None, child._key)
1934
self.assertEqual(10, child.maximum_size)
1935
self.assertEqual(1, child._key_width)
1936
# Check overall structure:
1937
self.assertEqual(3, len(chkmap))
1938
self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'},
1939
self.to_dict(chkmap))
1940
# serialising should only serialise the new data - k3 and the internal
1942
keys = list(node.serialise(chkmap._store))
1943
child_key = child.serialise(chkmap._store)[0]
1944
self.assertEqual([child_key, keys[1]], keys)
1946
def test_map_to_child_child_splits_new(self):
1947
chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10)
1948
# Check for the canonical root value for this tree:
1949
self.assertEqualDiff("'' InternalNode\n"
1954
, chkmap._dump_tree())
1955
# _dump_tree pages everything in, so reload using just the root
1956
chkmap = CHKMap(chkmap._store, chkmap._root_node)
1957
chkmap._ensure_root()
1958
node = chkmap._root_node
1959
# Ensure test validity: nothing paged in below the root.
1961
len([value for value in node._items.values()
1962
if type(value) == tuple]))
1963
# now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
1964
# k23, which for simplicity in the current implementation generates
1965
# a new internal node between node, and k22/k23.
1966
prefix, nodes = node.map(chkmap._store, ('k23',), 'quux')
1967
self.assertEqual("k", prefix)
1968
self.assertEqual([("", node)], nodes)
1969
# check new child details
1970
child = node._items['k2']
1971
self.assertIsInstance(child, InternalNode)
1972
self.assertEqual(2, len(child))
1973
self.assertEqual({('k22',): 'bar', ('k23',): 'quux'},
1974
self.to_dict(child, None))
1975
self.assertEqual(None, child._key)
1976
self.assertEqual(10, child.maximum_size)
1977
self.assertEqual(1, child._key_width)
1978
self.assertEqual(3, child._node_width)
1979
# Check overall structure:
1980
self.assertEqual(3, len(chkmap))
1981
self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'},
1982
self.to_dict(chkmap))
1983
# serialising should only serialise the new data - although k22 hasn't
1984
# changed because its a special corner case (splitting on with only one
1985
# key leaves one node unaltered), in general k22 is serialised, so we
1986
# expect k22, k23, the new internal node, and node, to be serialised.
1987
keys = list(node.serialise(chkmap._store))
1988
child_key = child._key
1989
k22_key = child._items['k22']._key
1990
k23_key = child._items['k23']._key
1991
self.assertEqual([k22_key, k23_key, child_key, node.key()], keys)
1992
self.assertEqualDiff("'' InternalNode\n"
1995
" 'k2' InternalNode\n"
1999
" ('k23',) 'quux'\n"
2000
, chkmap._dump_tree())
2002
def test__search_prefix_filter_with_hash(self):
2003
search_key_func = chk_map.search_key_registry.get('hash-16-way')
2004
node = InternalNode(search_key_func=search_key_func)
2006
node._node_width = 4
2007
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
2008
self.assertEqual('E8B7', node._search_prefix_filter(('a', 'b')))
2009
self.assertEqual('E8B7', node._search_prefix_filter(('a',)))
2011
def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
2012
chkmap = self._get_map(
2013
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
2014
# Check we have the expected tree.
2015
self.assertEqualDiff("'' InternalNode\n"
2018
" 'k2' InternalNode\n"
2022
" ('k23',) 'quux'\n"
2023
, chkmap._dump_tree())
2024
chkmap = CHKMap(chkmap._store, chkmap._root_node)
2025
chkmap._ensure_root()
2026
node = chkmap._root_node
2027
# unmapping k23 should give us a root, with k1 and k22 as direct
2029
result = node.unmap(chkmap._store, ('k23',))
2030
# check the pointed-at object within node - k2 should now point at the
2031
# k22 leaf (which has been paged in to see if we can collapse the tree)
2032
child = node._items['k2']
2033
self.assertIsInstance(child, LeafNode)
2034
self.assertEqual(1, len(child))
2035
self.assertEqual({('k22',): 'bar'},
2036
self.to_dict(child, None))
2037
# Check overall structure is instact:
2038
self.assertEqual(2, len(chkmap))
2039
self.assertEqual({('k1',): 'foo', ('k22',): 'bar'},
2040
self.to_dict(chkmap))
2041
# serialising should only serialise the new data - the root node.
2042
keys = list(node.serialise(chkmap._store))
2043
self.assertEqual([keys[-1]], keys)
2044
chkmap = CHKMap(chkmap._store, keys[-1])
2045
self.assertEqualDiff("'' InternalNode\n"
2050
, chkmap._dump_tree())
2052
def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
2053
chkmap = self._get_map(
2054
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
2055
self.assertEqualDiff("'' InternalNode\n"
2058
" 'k2' InternalNode\n"
2062
" ('k23',) 'quux'\n"
2063
, chkmap._dump_tree())
2064
orig_root = chkmap._root_node
2065
chkmap = CHKMap(chkmap._store, orig_root)
2066
chkmap._ensure_root()
2067
node = chkmap._root_node
2068
k2_ptr = node._items['k2']
2069
# unmapping k1 should give us a root, with k22 and k23 as direct
2070
# children, and should not have needed to page in the subtree.
2071
result = node.unmap(chkmap._store, ('k1',))
2072
self.assertEqual(k2_ptr, result)
2073
chkmap = CHKMap(chkmap._store, orig_root)
2074
# Unmapping at the CHKMap level should switch to the new root
2075
chkmap.unmap(('k1',))
2076
self.assertEqual(k2_ptr, chkmap._root_node)
2077
self.assertEqualDiff("'' InternalNode\n"
2081
" ('k23',) 'quux'\n"
2082
, chkmap._dump_tree())
2086
# map -> fits - done
2087
# map -> doesn't fit - shrink from left till fits
2088
# key data to return: the common prefix, new nodes.
2090
# unmap -> how to tell if siblings can be combined.
2091
# combing leaf nodes means expanding the prefix to the left; so gather the size of
2092
# all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
2093
# is an internal node, we know that that is a dense subtree - can't combine.
2094
# otherwise as soon as the sum of serialised values exceeds the split threshold
2095
# we know we can't combine - stop.
2096
# unmap -> key return data - space in node, common prefix length? and key count
2098
# variable length prefixes? -> later start with fixed width to get something going
2099
# map -> fits - update pointer to leaf
2100
# return [prefix and node] - seems sound.
2101
# map -> doesn't fit - find unique prefix and shift right
2102
# create internal nodes for all the partitions, return list of unique
2103
# prefixes and nodes.
2104
# map -> new prefix - create a leaf
2105
# unmap -> if child key count 0, remove
2106
# unmap -> return space in node, common prefix length? (why?), key count
2108
# map, if 1 node returned, use it, otherwise make an internal and populate.
2109
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
2111
# map inits as empty leafnode.
2117
# AA, AB, AC, AD, BA
2118
# packed internal node - ideal:
2119
# AA, AB, AC, AD, BA
2120
# single byte fanout - A,B, AA,AB,AC,AD, BA
2123
# AB - split, but we want to end up with AB, BA, in one node, with
2127
class TestCHKMapDifference(TestCaseWithExampleMaps):
2129
def get_difference(self, new_roots, old_roots,
2130
search_key_func=None):
2131
if search_key_func is None:
2132
search_key_func = chk_map._search_key_plain
2133
return chk_map.CHKMapDifference(self.get_chk_bytes(),
2134
new_roots, old_roots, search_key_func)
2136
def test__init__(self):
2137
c_map = self.make_root_only_map()
2139
c_map.map(('aaa',), 'new aaa content')
2140
key2 = c_map._save()
2141
diff = self.get_difference([key2], [key1])
2142
self.assertEqual(set([key1]), diff._all_old_chks)
2143
self.assertEqual([], diff._old_queue)
2144
self.assertEqual([], diff._new_queue)
2146
def help__read_all_roots(self, search_key_func):
2147
c_map = self.make_root_only_map(search_key_func=search_key_func)
2149
c_map.map(('aaa',), 'new aaa content')
2150
key2 = c_map._save()
2151
diff = self.get_difference([key2], [key1], search_key_func)
2152
root_results = [record.key for record in diff._read_all_roots()]
2153
self.assertEqual([key2], root_results)
2154
# We should have queued up only items that aren't in the old
2156
self.assertEqual([(('aaa',), 'new aaa content')],
2157
diff._new_item_queue)
2158
self.assertEqual([], diff._new_queue)
2159
# And there are no old references, so that queue should be
2161
self.assertEqual([], diff._old_queue)
2163
def test__read_all_roots_plain(self):
2164
self.help__read_all_roots(search_key_func=chk_map._search_key_plain)
2166
def test__read_all_roots_16(self):
2167
self.help__read_all_roots(search_key_func=chk_map._search_key_16)
2169
def test__read_all_roots_skips_known_old(self):
2170
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2172
c_map2 = self.make_root_only_map(chk_map._search_key_plain)
2174
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2175
root_results = [record.key for record in diff._read_all_roots()]
2176
# We should have no results. key2 is completely contained within key1,
2177
# and we should have seen that in the first pass
2178
self.assertEqual([], root_results)
2180
def test__read_all_roots_prepares_queues(self):
2181
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2183
c_map._dump_tree() # load everything
2184
key1_a = c_map._root_node._items['a'].key()
2185
c_map.map(('abb',), 'new abb content')
2186
key2 = c_map._save()
2187
key2_a = c_map._root_node._items['a'].key()
2188
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2189
root_results = [record.key for record in diff._read_all_roots()]
2190
self.assertEqual([key2], root_results)
2191
# At this point, we should have queued up only the 'a' Leaf on both
2192
# sides, both 'c' and 'd' are known to not have changed on both sides
2193
self.assertEqual([key2_a], diff._new_queue)
2194
self.assertEqual([], diff._new_item_queue)
2195
self.assertEqual([key1_a], diff._old_queue)
2197
def test__read_all_roots_multi_new_prepares_queues(self):
2198
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2200
c_map._dump_tree() # load everything
2201
key1_a = c_map._root_node._items['a'].key()
2202
key1_c = c_map._root_node._items['c'].key()
2203
c_map.map(('abb',), 'new abb content')
2204
key2 = c_map._save()
2205
key2_a = c_map._root_node._items['a'].key()
2206
key2_c = c_map._root_node._items['c'].key()
2207
c_map = chk_map.CHKMap(self.get_chk_bytes(), key1,
2208
chk_map._search_key_plain)
2209
c_map.map(('ccc',), 'new ccc content')
2210
key3 = c_map._save()
2211
key3_a = c_map._root_node._items['a'].key()
2212
key3_c = c_map._root_node._items['c'].key()
2213
diff = self.get_difference([key2, key3], [key1],
2214
chk_map._search_key_plain)
2215
root_results = [record.key for record in diff._read_all_roots()]
2216
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2217
# We should have queued up key2_a, and key3_c, but not key2_c or key3_c
2218
self.assertEqual([key2_a, key3_c], diff._new_queue)
2219
self.assertEqual([], diff._new_item_queue)
2220
# And we should have queued up both a and c for the old set
2221
self.assertEqual([key1_a, key1_c], diff._old_queue)
2223
def test__read_all_roots_different_depths(self):
2224
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2225
c_map._dump_tree() # load everything
2227
key1_a = c_map._root_node._items['a'].key()
2228
key1_c = c_map._root_node._items['c'].key()
2229
key1_d = c_map._root_node._items['d'].key()
2231
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2234
key2_aa = c_map2._root_node._items['aa'].key()
2235
key2_ad = c_map2._root_node._items['ad'].key()
2237
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2238
root_results = [record.key for record in diff._read_all_roots()]
2239
self.assertEqual([key2], root_results)
2240
# Only the 'a' subset should be queued up, since 'c' and 'd' cannot be
2242
self.assertEqual([key1_a], diff._old_queue)
2243
self.assertEqual([key2_aa, key2_ad], diff._new_queue)
2244
self.assertEqual([], diff._new_item_queue)
2246
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2247
root_results = [record.key for record in diff._read_all_roots()]
2248
self.assertEqual([key1], root_results)
2250
self.assertEqual([key2_aa, key2_ad], diff._old_queue)
2251
self.assertEqual([key1_a, key1_c, key1_d], diff._new_queue)
2252
self.assertEqual([], diff._new_item_queue)
2254
def test__read_all_roots_different_depths_16(self):
2255
c_map = self.make_two_deep_map(chk_map._search_key_16)
2256
c_map._dump_tree() # load everything
2258
key1_2 = c_map._root_node._items['2'].key()
2259
key1_4 = c_map._root_node._items['4'].key()
2260
key1_C = c_map._root_node._items['C'].key()
2261
key1_F = c_map._root_node._items['F'].key()
2263
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_16)
2266
key2_F0 = c_map2._root_node._items['F0'].key()
2267
key2_F3 = c_map2._root_node._items['F3'].key()
2268
key2_F4 = c_map2._root_node._items['F4'].key()
2269
key2_FD = c_map2._root_node._items['FD'].key()
2271
diff = self.get_difference([key2], [key1], chk_map._search_key_16)
2272
root_results = [record.key for record in diff._read_all_roots()]
2273
self.assertEqual([key2], root_results)
2274
# Only the subset of keys that may be present should be queued up.
2275
self.assertEqual([key1_F], diff._old_queue)
2276
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2277
sorted(diff._new_queue))
2278
self.assertEqual([], diff._new_item_queue)
2280
diff = self.get_difference([key1], [key2], chk_map._search_key_16)
2281
root_results = [record.key for record in diff._read_all_roots()]
2282
self.assertEqual([key1], root_results)
2284
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2285
sorted(diff._old_queue))
2286
self.assertEqual(sorted([key1_2, key1_4, key1_C, key1_F]),
2287
sorted(diff._new_queue))
2288
self.assertEqual([], diff._new_item_queue)
2290
def test__read_all_roots_mixed_depth(self):
2291
c_map = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2292
c_map._dump_tree() # load everything
2294
key1_aa = c_map._root_node._items['aa'].key()
2295
key1_ad = c_map._root_node._items['ad'].key()
2297
c_map2 = self.make_one_deep_one_prefix_map(chk_map._search_key_plain)
2300
key2_a = c_map2._root_node._items['a'].key()
2301
key2_b = c_map2._root_node._items['b'].key()
2303
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2304
root_results = [record.key for record in diff._read_all_roots()]
2305
self.assertEqual([key2], root_results)
2306
# 'ad' matches exactly 'a' on the other side, so it should be removed,
2307
# and neither side should have it queued for walking
2308
self.assertEqual([], diff._old_queue)
2309
self.assertEqual([key2_b], diff._new_queue)
2310
self.assertEqual([], diff._new_item_queue)
2312
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2313
root_results = [record.key for record in diff._read_all_roots()]
2314
self.assertEqual([key1], root_results)
2315
# Note: This is technically not the 'true minimal' set that we could
2316
# use The reason is that 'a' was matched exactly to 'ad' (by sha
2317
# sum). However, the code gets complicated in the case of more
2318
# than one interesting key, so for now, we live with this
2319
# Consider revising, though benchmarking showing it to be a
2320
# real-world issue should be done
2321
self.assertEqual([key2_a], diff._old_queue)
2322
# self.assertEqual([], diff._old_queue)
2323
self.assertEqual([key1_aa], diff._new_queue)
2324
self.assertEqual([], diff._new_item_queue)
2326
def test__read_all_roots_yields_extra_deep_records(self):
2327
# This is slightly controversial, as we will yield a chk page that we
2328
# might later on find out could be filtered out. (If a root node is
2329
# referenced deeper in the old set.)
2330
# However, even with stacking, we always have all chk pages that we
2331
# will need. So as long as we filter out the referenced keys, we'll
2332
# never run into problems.
2333
# This allows us to yield a root node record immediately, without any
2335
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2336
c_map._dump_tree() # load all keys
2338
key1_a = c_map._root_node._items['a'].key()
2339
c_map2 = self.get_map({
2340
('acc',): 'initial acc content',
2341
('ace',): 'initial ace content',
2342
}, maximum_size=100)
2343
self.assertEqualDiff(
2345
" ('acc',) 'initial acc content'\n"
2346
" ('ace',) 'initial ace content'\n",
2347
c_map2._dump_tree())
2349
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2350
root_results = [record.key for record in diff._read_all_roots()]
2351
self.assertEqual([key2], root_results)
2352
# However, even though we have yielded the root node to be fetched,
2353
# we should have enqued all of the chk pages to be walked, so that we
2354
# can find the keys if they are present
2355
self.assertEqual([key1_a], diff._old_queue)
2356
self.assertEqual([(('acc',), 'initial acc content'),
2357
(('ace',), 'initial ace content'),
2358
], diff._new_item_queue)
2360
def test__read_all_roots_multiple_targets(self):
2361
c_map = self.make_root_only_map()
2363
c_map = self.make_one_deep_map()
2366
key2_c = c_map._root_node._items['c'].key()
2367
key2_d = c_map._root_node._items['d'].key()
2368
c_map.map(('ccc',), 'new ccc value')
2369
key3 = c_map._save()
2370
key3_c = c_map._root_node._items['c'].key()
2371
diff = self.get_difference([key2, key3], [key1],
2372
chk_map._search_key_plain)
2373
root_results = [record.key for record in diff._read_all_roots()]
2374
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2375
self.assertEqual([], diff._old_queue)
2376
# the key 'd' is interesting from key2 and key3, but should only be
2377
# entered into the queue 1 time
2378
self.assertEqual(sorted([key2_c, key3_c, key2_d]),
2379
sorted(diff._new_queue))
2380
self.assertEqual([], diff._new_item_queue)
2382
def test__read_all_roots_no_old(self):
2383
# This is the 'initial branch' case. With nothing in the old
2384
# set, we can just queue up all root nodes into interesting queue, and
2385
# then have them fast-path flushed via _flush_new_queue
2386
c_map = self.make_two_deep_map()
2388
diff = self.get_difference([key1], [], chk_map._search_key_plain)
2389
root_results = [record.key for record in diff._read_all_roots()]
2390
self.assertEqual([], root_results)
2391
self.assertEqual([], diff._old_queue)
2392
self.assertEqual([key1], diff._new_queue)
2393
self.assertEqual([], diff._new_item_queue)
2395
c_map2 = self.make_one_deep_map()
2397
diff = self.get_difference([key1, key2], [], chk_map._search_key_plain)
2398
root_results = [record.key for record in diff._read_all_roots()]
2399
self.assertEqual([], root_results)
2400
self.assertEqual([], diff._old_queue)
2401
self.assertEqual(sorted([key1, key2]), sorted(diff._new_queue))
2402
self.assertEqual([], diff._new_item_queue)
2404
def test__read_all_roots_no_old_16(self):
2405
c_map = self.make_two_deep_map(chk_map._search_key_16)
2407
diff = self.get_difference([key1], [], chk_map._search_key_16)
2408
root_results = [record.key for record in diff._read_all_roots()]
2409
self.assertEqual([], root_results)
2410
self.assertEqual([], diff._old_queue)
2411
self.assertEqual([key1], diff._new_queue)
2412
self.assertEqual([], diff._new_item_queue)
2414
c_map2 = self.make_one_deep_map(chk_map._search_key_16)
2416
diff = self.get_difference([key1, key2], [],
2417
chk_map._search_key_16)
2418
root_results = [record.key for record in diff._read_all_roots()]
2419
self.assertEqual([], root_results)
2420
self.assertEqual([], diff._old_queue)
2421
self.assertEqual(sorted([key1, key2]),
2422
sorted(diff._new_queue))
2423
self.assertEqual([], diff._new_item_queue)
2425
def test__read_all_roots_multiple_old(self):
2426
c_map = self.make_two_deep_map()
2428
c_map._dump_tree() # load everything
2429
key1_a = c_map._root_node._items['a'].key()
2430
c_map.map(('ccc',), 'new ccc value')
2431
key2 = c_map._save()
2432
key2_a = c_map._root_node._items['a'].key()
2433
c_map.map(('add',), 'new add value')
2434
key3 = c_map._save()
2435
key3_a = c_map._root_node._items['a'].key()
2436
diff = self.get_difference([key3], [key1, key2],
2437
chk_map._search_key_plain)
2438
root_results = [record.key for record in diff._read_all_roots()]
2439
self.assertEqual([key3], root_results)
2440
# the 'a' keys should not be queued up 2 times, since they are
2442
self.assertEqual([key1_a], diff._old_queue)
2443
self.assertEqual([key3_a], diff._new_queue)
2444
self.assertEqual([], diff._new_item_queue)
2446
def test__process_next_old_batched_no_dupes(self):
2447
c_map = self.make_two_deep_map()
2449
c_map._dump_tree() # load everything
2450
key1_a = c_map._root_node._items['a'].key()
2451
key1_aa = c_map._root_node._items['a']._items['aa'].key()
2452
key1_ab = c_map._root_node._items['a']._items['ab'].key()
2453
key1_ac = c_map._root_node._items['a']._items['ac'].key()
2454
key1_ad = c_map._root_node._items['a']._items['ad'].key()
2455
c_map.map(('aaa',), 'new aaa value')
2456
key2 = c_map._save()
2457
key2_a = c_map._root_node._items['a'].key()
2458
key2_aa = c_map._root_node._items['a']._items['aa'].key()
2459
c_map.map(('acc',), 'new acc content')
2460
key3 = c_map._save()
2461
key3_a = c_map._root_node._items['a'].key()
2462
key3_ac = c_map._root_node._items['a']._items['ac'].key()
2463
diff = self.get_difference([key3], [key1, key2],
2464
chk_map._search_key_plain)
2465
root_results = [record.key for record in diff._read_all_roots()]
2466
self.assertEqual([key3], root_results)
2467
self.assertEqual(sorted([key1_a, key2_a]),
2468
sorted(diff._old_queue))
2469
self.assertEqual([key3_a], diff._new_queue)
2470
self.assertEqual([], diff._new_item_queue)
2471
diff._process_next_old()
2472
# All of the old records should be brought in and queued up,
2473
# but we should not have any duplicates
2474
self.assertEqual(sorted([key1_aa, key1_ab, key1_ac, key1_ad, key2_aa]),
2475
sorted(diff._old_queue))
2478
class TestIterInterestingNodes(TestCaseWithExampleMaps):
2480
def get_map_key(self, a_dict, maximum_size=10):
2481
c_map = self.get_map(a_dict, maximum_size=maximum_size)
2484
def assertIterInteresting(self, records, items, interesting_keys,
2486
"""Check the result of iter_interesting_nodes.
2488
Note that we no longer care how many steps are taken, etc, just that
2489
the right contents are returned.
2491
:param records: A list of record keys that should be yielded
2492
:param items: A list of items (key,value) that should be yielded.
2494
store = self.get_chk_bytes()
2495
store._search_key_func = chk_map._search_key_plain
2496
iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
2500
for record, new_items in iter_nodes:
2501
if record is not None:
2502
record_keys.append(record.key)
2504
all_items.extend(new_items)
2505
self.assertEqual(sorted(records), sorted(record_keys))
2506
self.assertEqual(sorted(items), sorted(all_items))
2508
def test_empty_to_one_keys(self):
2509
target = self.get_map_key({('a',): 'content'})
2510
self.assertIterInteresting([target],
2511
[(('a',), 'content')],
2514
def test_none_to_one_key(self):
2515
basis = self.get_map_key({})
2516
target = self.get_map_key({('a',): 'content'})
2517
self.assertIterInteresting([target],
2518
[(('a',), 'content')],
2521
def test_one_to_none_key(self):
2522
basis = self.get_map_key({('a',): 'content'})
2523
target = self.get_map_key({})
2524
self.assertIterInteresting([target],
2528
def test_common_pages(self):
2529
basis = self.get_map_key({('a',): 'content',
2533
target = self.get_map_key({('a',): 'content',
2534
('b',): 'other content',
2537
target_map = CHKMap(self.get_chk_bytes(), target)
2538
self.assertEqualDiff(
2541
" ('a',) 'content'\n"
2543
" ('b',) 'other content'\n"
2545
" ('c',) 'content'\n",
2546
target_map._dump_tree())
2547
b_key = target_map._root_node._items['b'].key()
2548
# This should return the root node, and the node for the 'b' key
2549
self.assertIterInteresting([target, b_key],
2550
[(('b',), 'other content')],
2553
def test_common_sub_page(self):
2554
basis = self.get_map_key({('aaa',): 'common',
2557
target = self.get_map_key({('aaa',): 'common',
2561
target_map = CHKMap(self.get_chk_bytes(), target)
2562
self.assertEqualDiff(
2564
" 'a' InternalNode\n"
2566
" ('aaa',) 'common'\n"
2570
" ('c',) 'common'\n",
2571
target_map._dump_tree())
2572
# The key for the internal aa node
2573
a_key = target_map._root_node._items['a'].key()
2574
# The key for the leaf aab node
2575
# aaa_key = target_map._root_node._items['a']._items['aaa'].key()
2576
aab_key = target_map._root_node._items['a']._items['aab'].key()
2577
self.assertIterInteresting([target, a_key, aab_key],
2578
[(('aab',), 'new')],
2581
def test_common_leaf(self):
2582
basis = self.get_map_key({})
2583
target1 = self.get_map_key({('aaa',): 'common'})
2584
target2 = self.get_map_key({('aaa',): 'common',
2587
target3 = self.get_map_key({('aaa',): 'common',
2591
# The LeafNode containing 'aaa': 'common' occurs at 3 different levels.
2592
# Once as a root node, once as a second layer, and once as a third
2593
# layer. It should only be returned one time regardless
2594
target1_map = CHKMap(self.get_chk_bytes(), target1)
2595
self.assertEqualDiff(
2597
" ('aaa',) 'common'\n",
2598
target1_map._dump_tree())
2599
target2_map = CHKMap(self.get_chk_bytes(), target2)
2600
self.assertEqualDiff(
2603
" ('aaa',) 'common'\n"
2605
" ('bbb',) 'new'\n",
2606
target2_map._dump_tree())
2607
target3_map = CHKMap(self.get_chk_bytes(), target3)
2608
self.assertEqualDiff(
2610
" 'a' InternalNode\n"
2612
" ('aaa',) 'common'\n"
2614
" ('aac',) 'other'\n"
2616
" ('bbb',) 'new'\n",
2617
target3_map._dump_tree())
2618
aaa_key = target1_map._root_node.key()
2619
b_key = target2_map._root_node._items['b'].key()
2620
a_key = target3_map._root_node._items['a'].key()
2621
aac_key = target3_map._root_node._items['a']._items['aac'].key()
2622
self.assertIterInteresting(
2623
[target1, target2, target3, a_key, aac_key, b_key],
2624
[(('aaa',), 'common'), (('bbb',), 'new'), (('aac',), 'other')],
2625
[target1, target2, target3], [basis])
2627
self.assertIterInteresting(
2628
[target2, target3, a_key, aac_key, b_key],
2629
[(('bbb',), 'new'), (('aac',), 'other')],
2630
[target2, target3], [target1])
2632
# Technically, target1 could be filtered out, but since it is a root
2633
# node, we yield it immediately, rather than waiting to find out much
2635
self.assertIterInteresting(
2638
[target1], [target3])
2640
def test_multiple_maps(self):
2641
basis1 = self.get_map_key({('aaa',): 'common',
2644
basis2 = self.get_map_key({('bbb',): 'common',
2647
target1 = self.get_map_key({('aaa',): 'common',
2648
('aac',): 'target1',
2651
target2 = self.get_map_key({('aaa',): 'common',
2652
('bba',): 'target2',
2655
target1_map = CHKMap(self.get_chk_bytes(), target1)
2656
self.assertEqualDiff(
2658
" 'a' InternalNode\n"
2660
" ('aaa',) 'common'\n"
2662
" ('aac',) 'target1'\n"
2664
" ('bbb',) 'common'\n",
2665
target1_map._dump_tree())
2666
# The key for the target1 internal a node
2667
a_key = target1_map._root_node._items['a'].key()
2668
# The key for the leaf aac node
2669
aac_key = target1_map._root_node._items['a']._items['aac'].key()
2671
target2_map = CHKMap(self.get_chk_bytes(), target2)
2672
self.assertEqualDiff(
2675
" ('aaa',) 'common'\n"
2676
" 'b' InternalNode\n"
2678
" ('bba',) 'target2'\n"
2680
" ('bbb',) 'common'\n",
2681
target2_map._dump_tree())
2682
# The key for the target2 internal bb node
2683
b_key = target2_map._root_node._items['b'].key()
2684
# The key for the leaf bba node
2685
bba_key = target2_map._root_node._items['b']._items['bba'].key()
2686
self.assertIterInteresting(
2687
[target1, target2, a_key, aac_key, b_key, bba_key],
2688
[(('aac',), 'target1'), (('bba',), 'target2')],
2689
[target1, target2], [basis1, basis2])
2691
def test_multiple_maps_overlapping_common_new(self):
2692
# Test that when a node found through the interesting_keys iteration
2693
# for *some roots* and also via the old keys iteration, that
2694
# it is still scanned for old refs and items, because its
2695
# not truely new. This requires 2 levels of InternalNodes to expose,
2696
# because of the way the bootstrap in _find_children_info works.
2697
# This suggests that the code is probably amenable to/benefit from
2699
# How does this test work?
2700
# 1) We need a second level InternalNode present in a basis tree.
2701
# 2) We need a left side new tree that uses that InternalNode
2702
# 3) We need a right side new tree that does not use that InternalNode
2703
# at all but that has an unchanged *value* that was reachable inside
2705
basis = self.get_map_key({
2706
# InternalNode, unchanged in left:
2709
# Forces an internalNode at 'a'
2712
left = self.get_map_key({
2713
# All of basis unchanged
2717
# And a new top level node so the root key is different
2720
right = self.get_map_key({
2721
# A value that is unchanged from basis and thus should be filtered
2725
basis_map = CHKMap(self.get_chk_bytes(), basis)
2726
self.assertEqualDiff(
2728
" 'a' InternalNode\n"
2730
" ('aaa',) 'left'\n"
2732
" ('abb',) 'right'\n"
2734
" ('ccc',) 'common'\n",
2735
basis_map._dump_tree())
2736
# Get left expected data
2737
left_map = CHKMap(self.get_chk_bytes(), left)
2738
self.assertEqualDiff(
2740
" 'a' InternalNode\n"
2742
" ('aaa',) 'left'\n"
2744
" ('abb',) 'right'\n"
2746
" ('ccc',) 'common'\n"
2748
" ('ddd',) 'change'\n",
2749
left_map._dump_tree())
2750
# Keys from left side target
2751
l_d_key = left_map._root_node._items['d'].key()
2752
# Get right expected data
2753
right_map = CHKMap(self.get_chk_bytes(), right)
2754
self.assertEqualDiff(
2756
" ('abb',) 'right'\n",
2757
right_map._dump_tree())
2758
# Keys from the right side target - none, the root is enough.
2760
self.assertIterInteresting(
2761
[right, left, l_d_key],
2762
[(('ddd',), 'change')],
2763
[left, right], [basis])
2765
def test_multiple_maps_similar(self):
2766
# We want to have a depth=2 tree, with multiple entries in each leaf
2768
basis = self.get_map_key({
2769
('aaa',): 'unchanged',
2770
('abb',): 'will change left',
2771
('caa',): 'unchanged',
2772
('cbb',): 'will change right',
2774
left = self.get_map_key({
2775
('aaa',): 'unchanged',
2776
('abb',): 'changed left',
2777
('caa',): 'unchanged',
2778
('cbb',): 'will change right',
2780
right = self.get_map_key({
2781
('aaa',): 'unchanged',
2782
('abb',): 'will change left',
2783
('caa',): 'unchanged',
2784
('cbb',): 'changed right',
2786
basis_map = CHKMap(self.get_chk_bytes(), basis)
2787
self.assertEqualDiff(
2790
" ('aaa',) 'unchanged'\n"
2791
" ('abb',) 'will change left'\n"
2793
" ('caa',) 'unchanged'\n"
2794
" ('cbb',) 'will change right'\n",
2795
basis_map._dump_tree())
2796
# Get left expected data
2797
left_map = CHKMap(self.get_chk_bytes(), left)
2798
self.assertEqualDiff(
2801
" ('aaa',) 'unchanged'\n"
2802
" ('abb',) 'changed left'\n"
2804
" ('caa',) 'unchanged'\n"
2805
" ('cbb',) 'will change right'\n",
2806
left_map._dump_tree())
2807
# Keys from left side target
2808
l_a_key = left_map._root_node._items['a'].key()
2809
l_c_key = left_map._root_node._items['c'].key()
2810
# Get right expected data
2811
right_map = CHKMap(self.get_chk_bytes(), right)
2812
self.assertEqualDiff(
2815
" ('aaa',) 'unchanged'\n"
2816
" ('abb',) 'will change left'\n"
2818
" ('caa',) 'unchanged'\n"
2819
" ('cbb',) 'changed right'\n",
2820
right_map._dump_tree())
2821
r_a_key = right_map._root_node._items['a'].key()
2822
r_c_key = right_map._root_node._items['c'].key()
2823
self.assertIterInteresting(
2824
[right, left, l_a_key, r_c_key],
2825
[(('abb',), 'changed left'), (('cbb',), 'changed right')],
2826
[left, right], [basis])