1
# Copyright (C) 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for maps built on a CHK versionedfiles facility."""
19
from itertools import izip
26
from bzrlib.chk_map import (
34
class TestNode(tests.TestCase):
36
def assertCommonPrefix(self, expected_common, prefix, key):
37
common = Node.common_prefix(prefix, key)
38
self.assertTrue(len(common) <= len(prefix))
39
self.assertTrue(len(common) <= len(key))
40
self.assertStartsWith(prefix, common)
41
self.assertStartsWith(key, common)
42
self.assertEquals(expected_common, common)
44
def test_common_prefix(self):
45
self.assertCommonPrefix('beg', 'beg', 'begin')
47
def test_no_common_prefix(self):
48
self.assertCommonPrefix('', 'begin', 'end')
51
self.assertCommonPrefix('begin', 'begin', 'begin')
53
def test_not_a_prefix(self):
54
self.assertCommonPrefix('b', 'begin', 'b')
57
self.assertCommonPrefix('', '', 'end')
58
self.assertCommonPrefix('', 'begin', '')
59
self.assertCommonPrefix('', '', '')
62
class TestCaseWithStore(tests.TestCaseWithTransport):
64
def get_chk_bytes(self):
65
# The easiest way to get a CHK store is a development6 repository and
66
# then work with the chk_bytes attribute directly.
67
repo = self.make_repository(".", format="development6-rich-root")
69
self.addCleanup(repo.unlock)
70
repo.start_write_group()
71
self.addCleanup(repo.abort_write_group)
74
def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
75
search_key_func=None):
77
chk_bytes = self.get_chk_bytes()
78
root_key = CHKMap.from_dict(chk_bytes, a_dict,
79
maximum_size=maximum_size, key_width=key_width,
80
search_key_func=search_key_func)
81
chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func)
84
def read_bytes(self, chk_bytes, key):
85
stream = chk_bytes.get_record_stream([key], 'unordered', True)
86
record = stream.next()
87
if record.storage_kind == 'absent':
88
self.fail('Store does not contain the key %s' % (key,))
89
return record.get_bytes_as("fulltext")
91
def to_dict(self, node, *args):
92
return dict(node.iteritems(*args))
95
class TestMap(TestCaseWithStore):
97
def assertHasABMap(self, chk_bytes):
98
ab_leaf_bytes = 'chkleaf:\n0\n1\n1\na\n\x001\nb\n'
99
ab_sha1 = osutils.sha_string(ab_leaf_bytes)
100
self.assertEqual('90986195696b177c8895d48fdb4b7f2366f798a0', ab_sha1)
101
root_key = ('sha1:' + ab_sha1,)
102
self.assertEqual(ab_leaf_bytes, self.read_bytes(chk_bytes, root_key))
105
def assertHasEmptyMap(self, chk_bytes):
106
empty_leaf_bytes = 'chkleaf:\n0\n1\n0\n\n'
107
empty_sha1 = osutils.sha_string(empty_leaf_bytes)
108
self.assertEqual('8571e09bf1bcc5b9621ce31b3d4c93d6e9a1ed26', empty_sha1)
109
root_key = ('sha1:' + empty_sha1,)
110
self.assertEqual(empty_leaf_bytes, self.read_bytes(chk_bytes, root_key))
113
def assertMapLayoutEqual(self, map_one, map_two):
114
"""Assert that the internal structure is identical between the maps."""
115
map_one._ensure_root()
116
node_one_stack = [map_one._root_node]
117
map_two._ensure_root()
118
node_two_stack = [map_two._root_node]
119
while node_one_stack:
120
node_one = node_one_stack.pop()
121
node_two = node_two_stack.pop()
122
if node_one.__class__ != node_two.__class__:
123
self.assertEqualDiff(map_one._dump_tree(include_keys=True),
124
map_two._dump_tree(include_keys=True))
125
self.assertEqual(node_one._search_prefix,
126
node_two._search_prefix)
127
if isinstance(node_one, InternalNode):
128
# Internal nodes must have identical references
129
self.assertEqual(sorted(node_one._items.keys()),
130
sorted(node_two._items.keys()))
131
node_one_stack.extend([n for n, _ in
132
node_one._iter_nodes(map_one._store)])
133
node_two_stack.extend([n for n, _ in
134
node_two._iter_nodes(map_two._store)])
136
# Leaf nodes must have identical contents
137
self.assertEqual(node_one._items, node_two._items)
138
self.assertEquals([], node_two_stack)
140
def assertCanonicalForm(self, chkmap):
141
"""Assert that the chkmap is in 'canonical' form.
143
We do this by adding all of the key value pairs from scratch, both in
144
forward order and reverse order, and assert that the final tree layout
147
items = list(chkmap.iteritems())
148
map_forward = chk_map.CHKMap(None, None)
149
map_forward._root_node.set_maximum_size(chkmap._root_node.maximum_size)
150
for key, value in items:
151
map_forward.map(key, value)
152
self.assertMapLayoutEqual(map_forward, chkmap)
153
map_reverse = chk_map.CHKMap(None, None)
154
map_reverse._root_node.set_maximum_size(chkmap._root_node.maximum_size)
155
for key, value in reversed(items):
156
map_reverse.map(key, value)
157
self.assertMapLayoutEqual(map_reverse, chkmap)
159
def test_assert_map_layout_equal(self):
160
store = self.get_chk_bytes()
161
map_one = CHKMap(store, None)
162
map_one._root_node.set_maximum_size(20)
163
map_two = CHKMap(store, None)
164
map_two._root_node.set_maximum_size(20)
165
self.assertMapLayoutEqual(map_one, map_two)
166
map_one.map('aaa', 'value')
167
self.assertRaises(AssertionError,
168
self.assertMapLayoutEqual, map_one, map_two)
169
map_two.map('aaa', 'value')
170
self.assertMapLayoutEqual(map_one, map_two)
171
# Split the tree, so we ensure that internal nodes and leaf nodes are
173
map_one.map('aab', 'value')
174
self.assertIsInstance(map_one._root_node, InternalNode)
175
self.assertRaises(AssertionError,
176
self.assertMapLayoutEqual, map_one, map_two)
177
map_two.map('aab', 'value')
178
self.assertMapLayoutEqual(map_one, map_two)
179
map_one.map('aac', 'value')
180
self.assertRaises(AssertionError,
181
self.assertMapLayoutEqual, map_one, map_two)
182
self.assertCanonicalForm(map_one)
184
def test_from_dict_empty(self):
185
chk_bytes = self.get_chk_bytes()
186
root_key = CHKMap.from_dict(chk_bytes, {})
187
# Check the data was saved and inserted correctly.
188
expected_root_key = self.assertHasEmptyMap(chk_bytes)
189
self.assertEqual(expected_root_key, root_key)
191
def test_from_dict_ab(self):
192
chk_bytes = self.get_chk_bytes()
193
root_key = CHKMap.from_dict(chk_bytes, {"a": "b"})
194
# Check the data was saved and inserted correctly.
195
expected_root_key = self.assertHasABMap(chk_bytes)
196
self.assertEqual(expected_root_key, root_key)
198
def test_apply_empty_ab(self):
199
# applying a delta (None, "a", "b") to an empty chkmap generates the
200
# same map as from_dict_ab.
201
chk_bytes = self.get_chk_bytes()
202
root_key = CHKMap.from_dict(chk_bytes, {})
203
chkmap = CHKMap(chk_bytes, root_key)
204
new_root = chkmap.apply_delta([(None, "a", "b")])
205
# Check the data was saved and inserted correctly.
206
expected_root_key = self.assertHasABMap(chk_bytes)
207
self.assertEqual(expected_root_key, new_root)
208
# The update should have left us with an in memory root node, with an
210
self.assertEqual(new_root, chkmap._root_node._key)
212
def test_apply_ab_empty(self):
213
# applying a delta ("a", None, None) to a map with 'a' in it generates
215
chk_bytes = self.get_chk_bytes()
216
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
217
chkmap = CHKMap(chk_bytes, root_key)
218
new_root = chkmap.apply_delta([(("a",), None, None)])
219
# Check the data was saved and inserted correctly.
220
expected_root_key = self.assertHasEmptyMap(chk_bytes)
221
self.assertEqual(expected_root_key, new_root)
222
# The update should have left us with an in memory root node, with an
224
self.assertEqual(new_root, chkmap._root_node._key)
226
def test_apply_delta_is_deterministic(self):
227
chk_bytes = self.get_chk_bytes()
228
chkmap1 = CHKMap(chk_bytes, None)
229
chkmap1._root_node.set_maximum_size(10)
230
chkmap1.apply_delta([(None, ('aaa',), 'common'),
231
(None, ('bba',), 'target2'),
232
(None, ('bbb',), 'common')])
233
root_key1 = chkmap1._save()
234
self.assertCanonicalForm(chkmap1)
236
chkmap2 = CHKMap(chk_bytes, None)
237
chkmap2._root_node.set_maximum_size(10)
238
chkmap2.apply_delta([(None, ('bbb',), 'common'),
239
(None, ('bba',), 'target2'),
240
(None, ('aaa',), 'common')])
241
root_key2 = chkmap2._save()
242
self.assertEqualDiff(chkmap1._dump_tree(include_keys=True),
243
chkmap2._dump_tree(include_keys=True))
244
self.assertEqual(root_key1, root_key2)
245
self.assertCanonicalForm(chkmap2)
247
def test_stable_splitting(self):
248
store = self.get_chk_bytes()
249
chkmap = CHKMap(store, None)
250
# Should fit 2 keys per LeafNode
251
chkmap._root_node.set_maximum_size(35)
252
chkmap.map(('aaa',), 'v')
253
self.assertEqualDiff("'' LeafNode\n"
256
chkmap.map(('aab',), 'v')
257
self.assertEqualDiff("'' LeafNode\n"
261
self.assertCanonicalForm(chkmap)
263
# Creates a new internal node, and splits the others into leaves
264
chkmap.map(('aac',), 'v')
265
self.assertEqualDiff("'' InternalNode\n"
273
self.assertCanonicalForm(chkmap)
275
# Splits again, because it can't fit in the current structure
276
chkmap.map(('bbb',), 'v')
277
self.assertEqualDiff("'' InternalNode\n"
278
" 'a' InternalNode\n"
288
self.assertCanonicalForm(chkmap)
290
def test_map_splits_with_longer_key(self):
291
store = self.get_chk_bytes()
292
chkmap = CHKMap(store, None)
293
# Should fit 1 key per LeafNode
294
chkmap._root_node.set_maximum_size(10)
295
chkmap.map(('aaa',), 'v')
296
chkmap.map(('aaaa',), 'v')
297
self.assertCanonicalForm(chkmap)
298
self.assertIsInstance(chkmap._root_node, InternalNode)
300
def test_with_linefeed_in_key(self):
301
store = self.get_chk_bytes()
302
chkmap = CHKMap(store, None)
303
# Should fit 1 key per LeafNode
304
chkmap._root_node.set_maximum_size(10)
305
chkmap.map(('a\ra',), 'val1')
306
chkmap.map(('a\rb',), 'val2')
307
chkmap.map(('ac',), 'val3')
308
self.assertCanonicalForm(chkmap)
309
self.assertEqualDiff("'' InternalNode\n"
310
" 'a\\r' InternalNode\n"
311
" 'a\\ra' LeafNode\n"
312
" ('a\\ra',) 'val1'\n"
313
" 'a\\rb' LeafNode\n"
314
" ('a\\rb',) 'val2'\n"
318
# We should also successfully serialise and deserialise these items
319
root_key = chkmap._save()
320
chkmap = CHKMap(store, root_key)
321
self.assertEqualDiff("'' InternalNode\n"
322
" 'a\\r' InternalNode\n"
323
" 'a\\ra' LeafNode\n"
324
" ('a\\ra',) 'val1'\n"
325
" 'a\\rb' LeafNode\n"
326
" ('a\\rb',) 'val2'\n"
331
def test_deep_splitting(self):
332
store = self.get_chk_bytes()
333
chkmap = CHKMap(store, None)
334
# Should fit 2 keys per LeafNode
335
chkmap._root_node.set_maximum_size(40)
336
chkmap.map(('aaaaaaaa',), 'v')
337
chkmap.map(('aaaaabaa',), 'v')
338
self.assertEqualDiff("'' LeafNode\n"
339
" ('aaaaaaaa',) 'v'\n"
340
" ('aaaaabaa',) 'v'\n",
342
chkmap.map(('aaabaaaa',), 'v')
343
chkmap.map(('aaababaa',), 'v')
344
self.assertEqualDiff("'' InternalNode\n"
346
" ('aaaaaaaa',) 'v'\n"
347
" ('aaaaabaa',) 'v'\n"
349
" ('aaabaaaa',) 'v'\n"
350
" ('aaababaa',) 'v'\n",
352
chkmap.map(('aaabacaa',), 'v')
353
chkmap.map(('aaabadaa',), 'v')
354
self.assertEqualDiff("'' InternalNode\n"
356
" ('aaaaaaaa',) 'v'\n"
357
" ('aaaaabaa',) 'v'\n"
358
" 'aaab' InternalNode\n"
359
" 'aaabaa' LeafNode\n"
360
" ('aaabaaaa',) 'v'\n"
361
" 'aaabab' LeafNode\n"
362
" ('aaababaa',) 'v'\n"
363
" 'aaabac' LeafNode\n"
364
" ('aaabacaa',) 'v'\n"
365
" 'aaabad' LeafNode\n"
366
" ('aaabadaa',) 'v'\n",
368
chkmap.map(('aaababba',), 'val')
369
chkmap.map(('aaababca',), 'val')
370
self.assertEqualDiff("'' InternalNode\n"
372
" ('aaaaaaaa',) 'v'\n"
373
" ('aaaaabaa',) 'v'\n"
374
" 'aaab' InternalNode\n"
375
" 'aaabaa' LeafNode\n"
376
" ('aaabaaaa',) 'v'\n"
377
" 'aaabab' InternalNode\n"
378
" 'aaababa' LeafNode\n"
379
" ('aaababaa',) 'v'\n"
380
" 'aaababb' LeafNode\n"
381
" ('aaababba',) 'val'\n"
382
" 'aaababc' LeafNode\n"
383
" ('aaababca',) 'val'\n"
384
" 'aaabac' LeafNode\n"
385
" ('aaabacaa',) 'v'\n"
386
" 'aaabad' LeafNode\n"
387
" ('aaabadaa',) 'v'\n",
389
# Now we add a node that should fit around an existing InternalNode,
390
# but has a slightly different key prefix, which causes a new
392
chkmap.map(('aaabDaaa',), 'v')
393
self.assertEqualDiff("'' InternalNode\n"
395
" ('aaaaaaaa',) 'v'\n"
396
" ('aaaaabaa',) 'v'\n"
397
" 'aaab' InternalNode\n"
398
" 'aaabD' LeafNode\n"
399
" ('aaabDaaa',) 'v'\n"
400
" 'aaaba' InternalNode\n"
401
" 'aaabaa' LeafNode\n"
402
" ('aaabaaaa',) 'v'\n"
403
" 'aaabab' InternalNode\n"
404
" 'aaababa' LeafNode\n"
405
" ('aaababaa',) 'v'\n"
406
" 'aaababb' LeafNode\n"
407
" ('aaababba',) 'val'\n"
408
" 'aaababc' LeafNode\n"
409
" ('aaababca',) 'val'\n"
410
" 'aaabac' LeafNode\n"
411
" ('aaabacaa',) 'v'\n"
412
" 'aaabad' LeafNode\n"
413
" ('aaabadaa',) 'v'\n",
416
def test_map_collapses_if_size_changes(self):
417
store = self.get_chk_bytes()
418
chkmap = CHKMap(store, None)
419
# Should fit 2 keys per LeafNode
420
chkmap._root_node.set_maximum_size(35)
421
chkmap.map(('aaa',), 'v')
422
chkmap.map(('aab',), 'very long value that splits')
423
self.assertEqualDiff("'' InternalNode\n"
427
" ('aab',) 'very long value that splits'\n",
429
self.assertCanonicalForm(chkmap)
430
# Now changing the value to something small should cause a rebuild
431
chkmap.map(('aab',), 'v')
432
self.assertEqualDiff("'' LeafNode\n"
436
self.assertCanonicalForm(chkmap)
438
def test_map_double_deep_collapses(self):
439
store = self.get_chk_bytes()
440
chkmap = CHKMap(store, None)
441
# Should fit 3 small keys per LeafNode
442
chkmap._root_node.set_maximum_size(40)
443
chkmap.map(('aaa',), 'v')
444
chkmap.map(('aab',), 'very long value that splits')
445
chkmap.map(('abc',), 'v')
446
self.assertEqualDiff("'' InternalNode\n"
447
" 'aa' InternalNode\n"
451
" ('aab',) 'very long value that splits'\n"
455
chkmap.map(('aab',), 'v')
456
self.assertCanonicalForm(chkmap)
457
self.assertEqualDiff("'' LeafNode\n"
463
def test_stable_unmap(self):
464
store = self.get_chk_bytes()
465
chkmap = CHKMap(store, None)
466
# Should fit 2 keys per LeafNode
467
chkmap._root_node.set_maximum_size(35)
468
chkmap.map(('aaa',), 'v')
469
chkmap.map(('aab',), 'v')
470
self.assertEqualDiff("'' LeafNode\n"
474
# Creates a new internal node, and splits the others into leaves
475
chkmap.map(('aac',), 'v')
476
self.assertEqualDiff("'' InternalNode\n"
484
self.assertCanonicalForm(chkmap)
485
# Now lets unmap one of the keys, and assert that we collapse the
487
chkmap.unmap(('aac',))
488
self.assertEqualDiff("'' LeafNode\n"
492
self.assertCanonicalForm(chkmap)
494
def test_unmap_double_deep(self):
495
store = self.get_chk_bytes()
496
chkmap = CHKMap(store, None)
497
# Should fit 3 keys per LeafNode
498
chkmap._root_node.set_maximum_size(40)
499
chkmap.map(('aaa',), 'v')
500
chkmap.map(('aaab',), 'v')
501
chkmap.map(('aab',), 'very long value')
502
chkmap.map(('abc',), 'v')
503
self.assertEqualDiff("'' InternalNode\n"
504
" 'aa' InternalNode\n"
509
" ('aab',) 'very long value'\n"
513
# Removing the 'aab' key should cause everything to collapse back to a
515
chkmap.unmap(('aab',))
516
self.assertEqualDiff("'' LeafNode\n"
522
def test_unmap_double_deep_non_empty_leaf(self):
523
store = self.get_chk_bytes()
524
chkmap = CHKMap(store, None)
525
# Should fit 3 keys per LeafNode
526
chkmap._root_node.set_maximum_size(40)
527
chkmap.map(('aaa',), 'v')
528
chkmap.map(('aab',), 'long value')
529
chkmap.map(('aabb',), 'v')
530
chkmap.map(('abc',), 'v')
531
self.assertEqualDiff("'' InternalNode\n"
532
" 'aa' InternalNode\n"
536
" ('aab',) 'long value'\n"
541
# Removing the 'aab' key should cause everything to collapse back to a
543
chkmap.unmap(('aab',))
544
self.assertEqualDiff("'' LeafNode\n"
550
def test_unmap_with_known_internal_node_doesnt_page(self):
551
store = self.get_chk_bytes()
552
chkmap = CHKMap(store, None)
553
# Should fit 3 keys per LeafNode
554
chkmap._root_node.set_maximum_size(30)
555
chkmap.map(('aaa',), 'v')
556
chkmap.map(('aab',), 'v')
557
chkmap.map(('aac',), 'v')
558
chkmap.map(('abc',), 'v')
559
chkmap.map(('acd',), 'v')
560
self.assertEqualDiff("'' InternalNode\n"
561
" 'aa' InternalNode\n"
573
# Save everything to the map, and start over
574
chkmap = CHKMap(store, chkmap._save())
575
# Mapping an 'aa' key loads the internal node, but should not map the
576
# 'ab' and 'ac' nodes
577
chkmap.map(('aad',), 'v')
578
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
579
self.assertIsInstance(chkmap._root_node._items['ab'], tuple)
580
self.assertIsInstance(chkmap._root_node._items['ac'], tuple)
581
# Unmapping 'acd' can notice that 'aa' is an InternalNode and not have
583
chkmap.unmap(('acd',))
584
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
585
self.assertIsInstance(chkmap._root_node._items['ab'], tuple)
587
def test_unmap_without_fitting_doesnt_page_in(self):
588
store = self.get_chk_bytes()
589
chkmap = CHKMap(store, None)
590
# Should fit 2 keys per LeafNode
591
chkmap._root_node.set_maximum_size(20)
592
chkmap.map(('aaa',), 'v')
593
chkmap.map(('aab',), 'v')
594
self.assertEqualDiff("'' InternalNode\n"
600
# Save everything to the map, and start over
601
chkmap = CHKMap(store, chkmap._save())
602
chkmap.map(('aac',), 'v')
603
chkmap.map(('aad',), 'v')
604
chkmap.map(('aae',), 'v')
605
chkmap.map(('aaf',), 'v')
606
# At this point, the previous nodes should not be paged in, but the
607
# newly added nodes would be
608
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
609
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
610
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
611
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
612
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
613
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
614
# Now unmapping one of the new nodes will use only the already-paged-in
615
# nodes to determine that we don't need to do more.
616
chkmap.unmap(('aaf',))
617
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
618
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
619
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
620
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
621
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
623
def test_unmap_pages_in_if_necessary(self):
624
store = self.get_chk_bytes()
625
chkmap = CHKMap(store, None)
626
# Should fit 2 keys per LeafNode
627
chkmap._root_node.set_maximum_size(30)
628
chkmap.map(('aaa',), 'val')
629
chkmap.map(('aab',), 'val')
630
chkmap.map(('aac',), 'val')
631
self.assertEqualDiff("'' InternalNode\n"
639
root_key = chkmap._save()
640
# Save everything to the map, and start over
641
chkmap = CHKMap(store, root_key)
642
chkmap.map(('aad',), 'v')
643
# At this point, the previous nodes should not be paged in, but the
644
# newly added node would be
645
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
646
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
647
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
648
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
649
# Unmapping the new node will check the existing nodes to see if they
651
# Clear the page cache so we ensure we have to read all the children
652
chk_map._page_cache.clear()
653
chkmap.unmap(('aad',))
654
self.assertIsInstance(chkmap._root_node._items['aaa'], LeafNode)
655
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
656
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
658
def test_unmap_pages_in_from_page_cache(self):
659
store = self.get_chk_bytes()
660
chkmap = CHKMap(store, None)
661
# Should fit 2 keys per LeafNode
662
chkmap._root_node.set_maximum_size(30)
663
chkmap.map(('aaa',), 'val')
664
chkmap.map(('aab',), 'val')
665
chkmap.map(('aac',), 'val')
666
root_key = chkmap._save()
667
# Save everything to the map, and start over
668
chkmap = CHKMap(store, root_key)
669
chkmap.map(('aad',), 'val')
670
self.assertEqualDiff("'' InternalNode\n"
680
# Save everything to the map, start over after _dump_tree
681
chkmap = CHKMap(store, root_key)
682
chkmap.map(('aad',), 'v')
683
# At this point, the previous nodes should not be paged in, but the
684
# newly added node would be
685
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
686
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
687
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
688
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
689
# Now clear the page cache, and only include 2 of the children in the
691
aab_key = chkmap._root_node._items['aab']
692
aab_bytes = chk_map._page_cache[aab_key]
693
aac_key = chkmap._root_node._items['aac']
694
aac_bytes = chk_map._page_cache[aac_key]
695
chk_map._page_cache.clear()
696
chk_map._page_cache[aab_key] = aab_bytes
697
chk_map._page_cache[aac_key] = aac_bytes
699
# Unmapping the new node will check the nodes from the page cache
700
# first, and not have to read in 'aaa'
701
chkmap.unmap(('aad',))
702
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
703
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
704
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
706
def test_unmap_uses_existing_items(self):
707
store = self.get_chk_bytes()
708
chkmap = CHKMap(store, None)
709
# Should fit 2 keys per LeafNode
710
chkmap._root_node.set_maximum_size(30)
711
chkmap.map(('aaa',), 'val')
712
chkmap.map(('aab',), 'val')
713
chkmap.map(('aac',), 'val')
714
root_key = chkmap._save()
715
# Save everything to the map, and start over
716
chkmap = CHKMap(store, root_key)
717
chkmap.map(('aad',), 'val')
718
chkmap.map(('aae',), 'val')
719
chkmap.map(('aaf',), 'val')
720
# At this point, the previous nodes should not be paged in, but the
721
# newly added node would be
722
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
723
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
724
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
725
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
726
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
727
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
729
# Unmapping a new node will see the other nodes that are already in
730
# memory, and not need to page in anything else
731
chkmap.unmap(('aad',))
732
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
733
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
734
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
735
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
736
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
738
def test_iter_changes_empty_ab(self):
739
# Asking for changes between an empty dict to a dict with keys returns
741
basis = self._get_map({}, maximum_size=10)
742
target = self._get_map(
743
{('a',): 'content here', ('b',): 'more content'},
744
chk_bytes=basis._store, maximum_size=10)
745
self.assertEqual([(('a',), None, 'content here'),
746
(('b',), None, 'more content')],
747
sorted(list(target.iter_changes(basis))))
749
def test_iter_changes_ab_empty(self):
750
# Asking for changes between a dict with keys to an empty dict returns
752
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
754
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
755
self.assertEqual([(('a',), 'content here', None),
756
(('b',), 'more content', None)],
757
sorted(list(target.iter_changes(basis))))
759
def test_iter_changes_empty_empty_is_empty(self):
760
basis = self._get_map({}, maximum_size=10)
761
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
762
self.assertEqual([], sorted(list(target.iter_changes(basis))))
764
def test_iter_changes_ab_ab_is_empty(self):
765
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
767
target = self._get_map(
768
{('a',): 'content here', ('b',): 'more content'},
769
chk_bytes=basis._store, maximum_size=10)
770
self.assertEqual([], sorted(list(target.iter_changes(basis))))
772
def test_iter_changes_ab_ab_nodes_not_loaded(self):
773
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
775
target = self._get_map(
776
{('a',): 'content here', ('b',): 'more content'},
777
chk_bytes=basis._store, maximum_size=10)
778
list(target.iter_changes(basis))
779
self.assertIsInstance(target._root_node, tuple)
780
self.assertIsInstance(basis._root_node, tuple)
782
def test_iter_changes_ab_ab_changed_values_shown(self):
783
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
785
target = self._get_map(
786
{('a',): 'content here', ('b',): 'different content'},
787
chk_bytes=basis._store, maximum_size=10)
788
result = sorted(list(target.iter_changes(basis)))
789
self.assertEqual([(('b',), 'more content', 'different content')],
792
def test_iter_changes_mixed_node_length(self):
793
# When one side has different node lengths than the other, common
794
# but different keys still need to be show, and new-and-old included
796
# aaa - common unaltered
797
# aab - common altered
801
# aaa to be not loaded (later test)
802
# aab, b, at to be returned.
803
# basis splits at byte 0,1,2, aaa is commonb is basis only
804
basis_dict = {('aaa',): 'foo bar',
805
('aab',): 'common altered a', ('b',): 'foo bar b'}
806
# target splits at byte 1,2, at is target only
807
target_dict = {('aaa',): 'foo bar',
808
('aab',): 'common altered b', ('at',): 'foo bar t'}
810
(('aab',), 'common altered a', 'common altered b'),
811
(('at',), None, 'foo bar t'),
812
(('b',), 'foo bar b', None),
814
basis = self._get_map(basis_dict, maximum_size=10)
815
target = self._get_map(target_dict, maximum_size=10,
816
chk_bytes=basis._store)
817
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
819
def test_iter_changes_common_pages_not_loaded(self):
820
# aaa - common unaltered
821
# aab - common altered
825
# aaa to be not loaded
826
# aaa not to be in result.
827
basis_dict = {('aaa',): 'foo bar',
828
('aab',): 'common altered a', ('b',): 'foo bar b'}
829
# target splits at byte 1, at is target only
830
target_dict = {('aaa',): 'foo bar',
831
('aab',): 'common altered b', ('at',): 'foo bar t'}
832
basis = self._get_map(basis_dict, maximum_size=10)
833
target = self._get_map(target_dict, maximum_size=10,
834
chk_bytes=basis._store)
835
basis_get = basis._store.get_record_stream
836
def get_record_stream(keys, order, fulltext):
837
if ('sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
838
self.fail("'aaa' pointer was followed %r" % keys)
839
return basis_get(keys, order, fulltext)
840
basis._store.get_record_stream = get_record_stream
841
result = sorted(list(target.iter_changes(basis)))
842
for change in result:
843
if change[0] == ('aaa',):
844
self.fail("Found unexpected change: %s" % change)
846
def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
847
# Within a leaf there are no hash's to exclude keys, make sure multi
848
# value leaf nodes are handled well.
849
basis_dict = {('aaa',): 'foo bar',
850
('aab',): 'common altered a', ('b',): 'foo bar b'}
851
target_dict = {('aaa',): 'foo bar',
852
('aab',): 'common altered b', ('at',): 'foo bar t'}
854
(('aab',), 'common altered a', 'common altered b'),
855
(('at',), None, 'foo bar t'),
856
(('b',), 'foo bar b', None),
858
basis = self._get_map(basis_dict)
859
target = self._get_map(target_dict, chk_bytes=basis._store)
860
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
862
def test_iteritems_empty(self):
863
chk_bytes = self.get_chk_bytes()
864
root_key = CHKMap.from_dict(chk_bytes, {})
865
chkmap = CHKMap(chk_bytes, root_key)
866
self.assertEqual([], list(chkmap.iteritems()))
868
def test_iteritems_two_items(self):
869
chk_bytes = self.get_chk_bytes()
870
root_key = CHKMap.from_dict(chk_bytes,
871
{"a":"content here", "b":"more content"})
872
chkmap = CHKMap(chk_bytes, root_key)
873
self.assertEqual([(("a",), "content here"), (("b",), "more content")],
874
sorted(list(chkmap.iteritems())))
876
def test_iteritems_selected_one_of_two_items(self):
877
chkmap = self._get_map( {("a",):"content here", ("b",):"more content"})
878
self.assertEqual({("a",): "content here"},
879
self.to_dict(chkmap, [("a",)]))
881
def test_iteritems_keys_prefixed_by_2_width_nodes(self):
882
chkmap = self._get_map(
883
{("a","a"):"content here", ("a", "b",):"more content",
884
("b", ""): 'boring content'},
885
maximum_size=10, key_width=2)
887
{("a", "a"): "content here", ("a", "b"): 'more content'},
888
self.to_dict(chkmap, [("a",)]))
890
def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self):
891
search_key_func = chk_map.search_key_registry.get('hash-16-way')
892
self.assertEqual('E8B7BE43\x00E8B7BE43', search_key_func(('a', 'a')))
893
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
894
self.assertEqual('71BEEFF9\x0000000000', search_key_func(('b', '')))
895
chkmap = self._get_map(
896
{("a","a"):"content here", ("a", "b",):"more content",
897
("b", ""): 'boring content'},
898
maximum_size=10, key_width=2, search_key_func=search_key_func)
900
{("a", "a"): "content here", ("a", "b"): 'more content'},
901
self.to_dict(chkmap, [("a",)]))
903
def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
904
chkmap = self._get_map(
905
{("a","a"):"content here", ("a", "b",):"more content",
906
("b", ""): 'boring content'}, key_width=2)
908
{("a", "a"): "content here", ("a", "b"): 'more content'},
909
self.to_dict(chkmap, [("a",)]))
911
def test___len__empty(self):
912
chkmap = self._get_map({})
913
self.assertEqual(0, len(chkmap))
915
def test___len__2(self):
916
chkmap = self._get_map({("foo",):"bar", ("gam",):"quux"})
917
self.assertEqual(2, len(chkmap))
919
def test_max_size_100_bytes_new(self):
920
# When there is a 100 byte upper node limit, a tree is formed.
921
chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100)
922
# We expect three nodes:
923
# A root, with two children, and with two key prefixes - k1 to one, and
924
# k2 to the other as our node splitting is only just being developed.
925
# The maximum size should be embedded
926
chkmap._ensure_root()
927
self.assertEqual(100, chkmap._root_node.maximum_size)
928
self.assertEqual(1, chkmap._root_node._key_width)
929
# There should be two child nodes, and prefix of 2(bytes):
930
self.assertEqual(2, len(chkmap._root_node._items))
931
self.assertEqual("k", chkmap._root_node._compute_search_prefix())
932
# The actual nodes pointed at will change as serialisers change; so
933
# here we test that the key prefix is correct; then load the nodes and
934
# check they have the right pointed at key; whether they have the
935
# pointed at value inline or not is also unrelated to this test so we
936
# don't check that in detail - rather we just check the aggregate
938
nodes = sorted(chkmap._root_node._items.items())
941
self.assertEqual('k1', ptr1[0])
942
self.assertEqual('k2', ptr2[0])
943
node1 = chk_map._deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1], None)
944
self.assertIsInstance(node1, LeafNode)
945
self.assertEqual(1, len(node1))
946
self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store))
947
node2 = chk_map._deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1], None)
948
self.assertIsInstance(node2, LeafNode)
949
self.assertEqual(1, len(node2))
950
self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store))
951
# Having checked we have a good structure, check that the content is
953
self.assertEqual(2, len(chkmap))
954
self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"},
955
self.to_dict(chkmap))
957
def test_init_root_is_LeafNode_new(self):
958
chk_bytes = self.get_chk_bytes()
959
chkmap = CHKMap(chk_bytes, None)
960
self.assertIsInstance(chkmap._root_node, LeafNode)
961
self.assertEqual({}, self.to_dict(chkmap))
962
self.assertEqual(0, len(chkmap))
964
def test_init_and_save_new(self):
965
chk_bytes = self.get_chk_bytes()
966
chkmap = CHKMap(chk_bytes, None)
968
leaf_node = LeafNode()
969
self.assertEqual([key], leaf_node.serialise(chk_bytes))
971
def test_map_first_item_new(self):
972
chk_bytes = self.get_chk_bytes()
973
chkmap = CHKMap(chk_bytes, None)
974
chkmap.map(("foo,",), "bar")
975
self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap))
976
self.assertEqual(1, len(chkmap))
978
leaf_node = LeafNode()
979
leaf_node.map(chk_bytes, ("foo,",), "bar")
980
self.assertEqual([key], leaf_node.serialise(chk_bytes))
982
def test_unmap_last_item_root_is_leaf_new(self):
983
chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"})
984
chkmap.unmap(("k1"*50,))
985
chkmap.unmap(("k2"*50,))
986
self.assertEqual(0, len(chkmap))
987
self.assertEqual({}, self.to_dict(chkmap))
989
leaf_node = LeafNode()
990
self.assertEqual([key], leaf_node.serialise(chkmap._store))
992
def test__dump_tree(self):
993
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2",
994
("bbb",): "value3",},
996
self.assertEqualDiff("'' InternalNode\n"
997
" 'a' InternalNode\n"
999
" ('aaa',) 'value1'\n"
1001
" ('aab',) 'value2'\n"
1003
" ('bbb',) 'value3'\n",
1004
chkmap._dump_tree())
1005
self.assertEqualDiff("'' InternalNode\n"
1006
" 'a' InternalNode\n"
1008
" ('aaa',) 'value1'\n"
1010
" ('aab',) 'value2'\n"
1012
" ('bbb',) 'value3'\n",
1013
chkmap._dump_tree())
1014
self.assertEqualDiff(
1015
"'' InternalNode sha1:0690d471eb0a624f359797d0ee4672bd68f4e236\n"
1016
" 'a' InternalNode sha1:1514c35503da9418d8fd90c1bed553077cb53673\n"
1017
" 'aaa' LeafNode sha1:4cc5970454d40b4ce297a7f13ddb76f63b88fefb\n"
1018
" ('aaa',) 'value1'\n"
1019
" 'aab' LeafNode sha1:1d68bc90914ef8a3edbcc8bb28b00cb4fea4b5e2\n"
1020
" ('aab',) 'value2'\n"
1021
" 'b' LeafNode sha1:3686831435b5596515353364eab0399dc45d49e7\n"
1022
" ('bbb',) 'value3'\n",
1023
chkmap._dump_tree(include_keys=True))
1025
def test__dump_tree_in_progress(self):
1026
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2"},
1028
chkmap.map(('bbb',), 'value3')
1029
self.assertEqualDiff("'' InternalNode\n"
1030
" 'a' InternalNode\n"
1032
" ('aaa',) 'value1'\n"
1034
" ('aab',) 'value2'\n"
1036
" ('bbb',) 'value3'\n",
1037
chkmap._dump_tree())
1038
# For things that are updated by adding 'bbb', we don't have a sha key
1039
# for them yet, so they are listed as None
1040
self.assertEqualDiff(
1041
"'' InternalNode None\n"
1042
" 'a' InternalNode sha1:6b0d881dd739a66f733c178b24da64395edfaafd\n"
1043
" 'aaa' LeafNode sha1:40b39a08d895babce17b20ae5f62d187eaa4f63a\n"
1044
" ('aaa',) 'value1'\n"
1045
" 'aab' LeafNode sha1:ad1dc7c4e801302c95bf1ba7b20bc45e548cd51a\n"
1046
" ('aab',) 'value2'\n"
1047
" 'b' LeafNode None\n"
1048
" ('bbb',) 'value3'\n",
1049
chkmap._dump_tree(include_keys=True))
1052
def _search_key_single(key):
1053
"""A search key function that maps all nodes to the same value"""
1056
def _test_search_key(key):
1057
return 'test:' + '\x00'.join(key)
1060
class TestMapSearchKeys(TestCaseWithStore):
1062
def test_default_chk_map_uses_flat_search_key(self):
1063
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None)
1064
self.assertEqual('1',
1065
chkmap._search_key_func(('1',)))
1066
self.assertEqual('1\x002',
1067
chkmap._search_key_func(('1', '2')))
1068
self.assertEqual('1\x002\x003',
1069
chkmap._search_key_func(('1', '2', '3')))
1071
def test_search_key_is_passed_to_root_node(self):
1072
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1073
search_key_func=_test_search_key)
1074
self.assertIs(_test_search_key, chkmap._search_key_func)
1075
self.assertEqual('test:1\x002\x003',
1076
chkmap._search_key_func(('1', '2', '3')))
1077
self.assertEqual('test:1\x002\x003',
1078
chkmap._root_node._search_key(('1', '2', '3')))
1080
def test_search_key_passed_via__ensure_root(self):
1081
chk_bytes = self.get_chk_bytes()
1082
chkmap = chk_map.CHKMap(chk_bytes, None,
1083
search_key_func=_test_search_key)
1084
root_key = chkmap._save()
1085
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1086
search_key_func=_test_search_key)
1087
chkmap._ensure_root()
1088
self.assertEqual('test:1\x002\x003',
1089
chkmap._root_node._search_key(('1', '2', '3')))
1091
def test_search_key_with_internal_node(self):
1092
chk_bytes = self.get_chk_bytes()
1093
chkmap = chk_map.CHKMap(chk_bytes, None,
1094
search_key_func=_test_search_key)
1095
chkmap._root_node.set_maximum_size(10)
1096
chkmap.map(('1',), 'foo')
1097
chkmap.map(('2',), 'bar')
1098
chkmap.map(('3',), 'baz')
1099
self.assertEqualDiff("'' InternalNode\n"
1100
" 'test:1' LeafNode\n"
1102
" 'test:2' LeafNode\n"
1104
" 'test:3' LeafNode\n"
1106
, chkmap._dump_tree())
1107
root_key = chkmap._save()
1108
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1109
search_key_func=_test_search_key)
1110
self.assertEqualDiff("'' InternalNode\n"
1111
" 'test:1' LeafNode\n"
1113
" 'test:2' LeafNode\n"
1115
" 'test:3' LeafNode\n"
1117
, chkmap._dump_tree())
1119
def test_search_key_16(self):
1120
chk_bytes = self.get_chk_bytes()
1121
chkmap = chk_map.CHKMap(chk_bytes, None,
1122
search_key_func=chk_map._search_key_16)
1123
chkmap._root_node.set_maximum_size(10)
1124
chkmap.map(('1',), 'foo')
1125
chkmap.map(('2',), 'bar')
1126
chkmap.map(('3',), 'baz')
1127
self.assertEqualDiff("'' InternalNode\n"
1134
, chkmap._dump_tree())
1135
root_key = chkmap._save()
1136
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1137
search_key_func=chk_map._search_key_16)
1138
# We can get the values back correctly
1139
self.assertEqual([(('1',), 'foo')],
1140
list(chkmap.iteritems([('1',)])))
1141
self.assertEqualDiff("'' InternalNode\n"
1148
, chkmap._dump_tree())
1150
def test_search_key_255(self):
1151
chk_bytes = self.get_chk_bytes()
1152
chkmap = chk_map.CHKMap(chk_bytes, None,
1153
search_key_func=chk_map._search_key_255)
1154
chkmap._root_node.set_maximum_size(10)
1155
chkmap.map(('1',), 'foo')
1156
chkmap.map(('2',), 'bar')
1157
chkmap.map(('3',), 'baz')
1158
self.assertEqualDiff("'' InternalNode\n"
1159
" '\\x1a' LeafNode\n"
1163
" '\\x83' LeafNode\n"
1165
, chkmap._dump_tree())
1166
root_key = chkmap._save()
1167
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1168
search_key_func=chk_map._search_key_255)
1169
# We can get the values back correctly
1170
self.assertEqual([(('1',), 'foo')],
1171
list(chkmap.iteritems([('1',)])))
1172
self.assertEqualDiff("'' InternalNode\n"
1173
" '\\x1a' LeafNode\n"
1177
" '\\x83' LeafNode\n"
1179
, chkmap._dump_tree())
1181
def test_search_key_collisions(self):
1182
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1183
search_key_func=_search_key_single)
1184
# The node will want to expand, but it cannot, because it knows that
1185
# all the keys must map to this node
1186
chkmap._root_node.set_maximum_size(20)
1187
chkmap.map(('1',), 'foo')
1188
chkmap.map(('2',), 'bar')
1189
chkmap.map(('3',), 'baz')
1190
self.assertEqualDiff("'' LeafNode\n"
1194
, chkmap._dump_tree())
1197
class TestSearchKeyFuncs(tests.TestCase):
1199
def assertSearchKey16(self, expected, key):
1200
self.assertEqual(expected, chk_map._search_key_16(key))
1202
def assertSearchKey255(self, expected, key):
1203
actual = chk_map._search_key_255(key)
1204
self.assertEqual(expected, actual, 'actual: %r' % (actual,))
1206
def test_simple_16(self):
1207
self.assertSearchKey16('8C736521', ('foo',))
1208
self.assertSearchKey16('8C736521\x008C736521', ('foo', 'foo'))
1209
self.assertSearchKey16('8C736521\x0076FF8CAA', ('foo', 'bar'))
1210
self.assertSearchKey16('ED82CD11', ('abcd',))
1212
def test_simple_255(self):
1213
self.assertSearchKey255('\x8cse!', ('foo',))
1214
self.assertSearchKey255('\x8cse!\x00\x8cse!', ('foo', 'foo'))
1215
self.assertSearchKey255('\x8cse!\x00v\xff\x8c\xaa', ('foo', 'bar'))
1216
# The standard mapping for these would include '\n', so it should be
1218
self.assertSearchKey255('\xfdm\x93_\x00P_\x1bL', ('<', 'V'))
1220
def test_255_does_not_include_newline(self):
1221
# When mapping via _search_key_255, we should never have the '\n'
1222
# character, but all other 255 values should be present
1224
for char_in in range(256):
1225
search_key = chk_map._search_key_255((chr(char_in),))
1226
chars_used.update(search_key)
1227
all_chars = set([chr(x) for x in range(256)])
1228
unused_chars = all_chars.symmetric_difference(chars_used)
1229
self.assertEqual(set('\n'), unused_chars)
1232
class TestLeafNode(TestCaseWithStore):
1234
def test_current_size_empty(self):
1236
self.assertEqual(16, node._current_size())
1238
def test_current_size_size_changed(self):
1240
node.set_maximum_size(10)
1241
self.assertEqual(17, node._current_size())
1243
def test_current_size_width_changed(self):
1245
node._key_width = 10
1246
self.assertEqual(17, node._current_size())
1248
def test_current_size_items(self):
1250
base_size = node._current_size()
1251
node.map(None, ("foo bar",), "baz")
1252
self.assertEqual(base_size + 14, node._current_size())
1254
def test_deserialise_empty(self):
1255
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1256
self.assertEqual(0, len(node))
1257
self.assertEqual(10, node.maximum_size)
1258
self.assertEqual(("sha1:1234",), node.key())
1259
self.assertIs(None, node._search_prefix)
1260
self.assertIs(None, node._common_serialised_prefix)
1262
def test_deserialise_items(self):
1263
node = LeafNode.deserialise(
1264
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1266
self.assertEqual(2, len(node))
1267
self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
1268
sorted(node.iteritems(None)))
1270
def test_deserialise_item_with_null_width_1(self):
1271
node = LeafNode.deserialise(
1272
"chkleaf:\n0\n1\n2\n\nfoo\x001\nbar\x00baz\nquux\x001\nblarh\n",
1274
self.assertEqual(2, len(node))
1275
self.assertEqual([(("foo",), "bar\x00baz"), (("quux",), "blarh")],
1276
sorted(node.iteritems(None)))
1278
def test_deserialise_item_with_null_width_2(self):
1279
node = LeafNode.deserialise(
1280
"chkleaf:\n0\n2\n2\n\nfoo\x001\x001\nbar\x00baz\n"
1281
"quux\x00\x001\nblarh\n",
1283
self.assertEqual(2, len(node))
1284
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("quux", ""), "blarh")],
1285
sorted(node.iteritems(None)))
1287
def test_iteritems_selected_one_of_two_items(self):
1288
node = LeafNode.deserialise(
1289
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1291
self.assertEqual(2, len(node))
1292
self.assertEqual([(("quux",), "blarh")],
1293
sorted(node.iteritems(None, [("quux",), ("qaz",)])))
1295
def test_deserialise_item_with_common_prefix(self):
1296
node = LeafNode.deserialise(
1297
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x001\nbar\x00baz\n2\x001\nblarh\n",
1299
self.assertEqual(2, len(node))
1300
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("foo", "2"), "blarh")],
1301
sorted(node.iteritems(None)))
1302
self.assertIs(chk_map._unknown, node._search_prefix)
1303
self.assertEqual('foo\x00', node._common_serialised_prefix)
1305
def test_deserialise_multi_line(self):
1306
node = LeafNode.deserialise(
1307
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x002\nbar\nbaz\n2\x002\nblarh\n\n",
1309
self.assertEqual(2, len(node))
1310
self.assertEqual([(("foo", "1"), "bar\nbaz"),
1311
(("foo", "2"), "blarh\n"),
1312
], sorted(node.iteritems(None)))
1313
self.assertIs(chk_map._unknown, node._search_prefix)
1314
self.assertEqual('foo\x00', node._common_serialised_prefix)
1316
def test_key_new(self):
1318
self.assertEqual(None, node.key())
1320
def test_key_after_map(self):
1321
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1322
node.map(None, ("foo bar",), "baz quux")
1323
self.assertEqual(None, node.key())
1325
def test_key_after_unmap(self):
1326
node = LeafNode.deserialise(
1327
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1329
node.unmap(None, ("foo bar",))
1330
self.assertEqual(None, node.key())
1332
def test_map_exceeding_max_size_only_entry_new(self):
1334
node.set_maximum_size(10)
1335
result = node.map(None, ("foo bar",), "baz quux")
1336
self.assertEqual(("foo bar", [("", node)]), result)
1337
self.assertTrue(10 < node._current_size())
1339
def test_map_exceeding_max_size_second_entry_early_difference_new(self):
1341
node.set_maximum_size(10)
1342
node.map(None, ("foo bar",), "baz quux")
1343
prefix, result = list(node.map(None, ("blue",), "red"))
1344
self.assertEqual("", prefix)
1345
self.assertEqual(2, len(result))
1346
split_chars = set([result[0][0], result[1][0]])
1347
self.assertEqual(set(["f", "b"]), split_chars)
1348
nodes = dict(result)
1350
self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None))
1351
self.assertEqual(10, node.maximum_size)
1352
self.assertEqual(1, node._key_width)
1354
self.assertEqual({("blue",): "red"}, self.to_dict(node, None))
1355
self.assertEqual(10, node.maximum_size)
1356
self.assertEqual(1, node._key_width)
1358
def test_map_first(self):
1360
result = node.map(None, ("foo bar",), "baz quux")
1361
self.assertEqual(("foo bar", [("", node)]), result)
1362
self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node, None))
1363
self.assertEqual(1, len(node))
1365
def test_map_second(self):
1367
node.map(None, ("foo bar",), "baz quux")
1368
result = node.map(None, ("bingo",), "bango")
1369
self.assertEqual(("", [("", node)]), result)
1370
self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
1371
self.to_dict(node, None))
1372
self.assertEqual(2, len(node))
1374
def test_map_replacement(self):
1376
node.map(None, ("foo bar",), "baz quux")
1377
result = node.map(None, ("foo bar",), "bango")
1378
self.assertEqual(("foo bar", [("", node)]), result)
1379
self.assertEqual({("foo bar",): "bango"},
1380
self.to_dict(node, None))
1381
self.assertEqual(1, len(node))
1383
def test_serialise_empty(self):
1384
store = self.get_chk_bytes()
1386
node.set_maximum_size(10)
1387
expected_key = ("sha1:f34c3f0634ea3f85953dffa887620c0a5b1f4a51",)
1388
self.assertEqual([expected_key],
1389
list(node.serialise(store)))
1390
self.assertEqual("chkleaf:\n10\n1\n0\n\n", self.read_bytes(store, expected_key))
1391
self.assertEqual(expected_key, node.key())
1393
def test_serialise_items(self):
1394
store = self.get_chk_bytes()
1396
node.set_maximum_size(10)
1397
node.map(None, ("foo bar",), "baz quux")
1398
expected_key = ("sha1:f89fac7edfc6bdb1b1b54a556012ff0c646ef5e0",)
1399
self.assertEqual('foo bar', node._common_serialised_prefix)
1400
self.assertEqual([expected_key],
1401
list(node.serialise(store)))
1402
self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\n\x001\nbaz quux\n",
1403
self.read_bytes(store, expected_key))
1404
self.assertEqual(expected_key, node.key())
1406
def test_unique_serialised_prefix_empty_new(self):
1408
self.assertIs(None, node._compute_search_prefix())
1410
def test_unique_serialised_prefix_one_item_new(self):
1412
node.map(None, ("foo bar", "baz"), "baz quux")
1413
self.assertEqual("foo bar\x00baz", node._compute_search_prefix())
1415
def test_unmap_missing(self):
1417
self.assertRaises(KeyError, node.unmap, None, ("foo bar",))
1419
def test_unmap_present(self):
1421
node.map(None, ("foo bar",), "baz quux")
1422
result = node.unmap(None, ("foo bar",))
1423
self.assertEqual(node, result)
1424
self.assertEqual({}, self.to_dict(node, None))
1425
self.assertEqual(0, len(node))
1427
def test_map_maintains_common_prefixes(self):
1430
node.map(None, ("foo bar", "baz"), "baz quux")
1431
self.assertEqual('foo bar\x00baz', node._search_prefix)
1432
self.assertEqual('foo bar\x00baz', node._common_serialised_prefix)
1433
node.map(None, ("foo bar", "bing"), "baz quux")
1434
self.assertEqual('foo bar\x00b', node._search_prefix)
1435
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1436
node.map(None, ("fool", "baby"), "baz quux")
1437
self.assertEqual('foo', node._search_prefix)
1438
self.assertEqual('foo', node._common_serialised_prefix)
1439
node.map(None, ("foo bar", "baz"), "replaced")
1440
self.assertEqual('foo', node._search_prefix)
1441
self.assertEqual('foo', node._common_serialised_prefix)
1442
node.map(None, ("very", "different"), "value")
1443
self.assertEqual('', node._search_prefix)
1444
self.assertEqual('', node._common_serialised_prefix)
1446
def test_unmap_maintains_common_prefixes(self):
1449
node.map(None, ("foo bar", "baz"), "baz quux")
1450
node.map(None, ("foo bar", "bing"), "baz quux")
1451
node.map(None, ("fool", "baby"), "baz quux")
1452
node.map(None, ("very", "different"), "value")
1453
self.assertEqual('', node._search_prefix)
1454
self.assertEqual('', node._common_serialised_prefix)
1455
node.unmap(None, ("very", "different"))
1456
self.assertEqual("foo", node._search_prefix)
1457
self.assertEqual("foo", node._common_serialised_prefix)
1458
node.unmap(None, ("fool", "baby"))
1459
self.assertEqual('foo bar\x00b', node._search_prefix)
1460
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1461
node.unmap(None, ("foo bar", "baz"))
1462
self.assertEqual('foo bar\x00bing', node._search_prefix)
1463
self.assertEqual('foo bar\x00bing', node._common_serialised_prefix)
1464
node.unmap(None, ("foo bar", "bing"))
1465
self.assertEqual(None, node._search_prefix)
1466
self.assertEqual(None, node._common_serialised_prefix)
1469
class TestInternalNode(TestCaseWithStore):
1471
def test_add_node_empty_new(self):
1472
node = InternalNode('fo')
1474
child.set_maximum_size(100)
1475
child.map(None, ("foo",), "bar")
1476
node.add_node("foo", child)
1477
# Note that node isn't strictly valid now as a tree (only one child),
1478
# but thats ok for this test.
1479
# The first child defines the node's width:
1480
self.assertEqual(3, node._node_width)
1481
# We should be able to iterate over the contents without doing IO.
1482
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None))
1483
# The length should be known:
1484
self.assertEqual(1, len(node))
1485
# serialising the node should serialise the child and the node.
1486
chk_bytes = self.get_chk_bytes()
1487
keys = list(node.serialise(chk_bytes))
1488
child_key = child.serialise(chk_bytes)[0]
1490
[child_key, ('sha1:cf67e9997d8228a907c1f5bfb25a8bd9cd916fac',)],
1492
# We should be able to access deserialised content.
1493
bytes = self.read_bytes(chk_bytes, keys[1])
1494
node = chk_map._deserialise(bytes, keys[1], None)
1495
self.assertEqual(1, len(node))
1496
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes))
1497
self.assertEqual(3, node._node_width)
1499
def test_add_node_resets_key_new(self):
1500
node = InternalNode('fo')
1502
child.set_maximum_size(100)
1503
child.map(None, ("foo",), "bar")
1504
node.add_node("foo", child)
1505
chk_bytes = self.get_chk_bytes()
1506
keys = list(node.serialise(chk_bytes))
1507
self.assertEqual(keys[1], node._key)
1508
node.add_node("fos", child)
1509
self.assertEqual(None, node._key)
1511
# def test_add_node_empty_oversized_one_ok_new(self):
1512
# def test_add_node_one_oversized_second_kept_minimum_fan(self):
1513
# def test_add_node_two_oversized_third_kept_minimum_fan(self):
1514
# def test_add_node_one_oversized_second_splits_errors(self):
1516
def test__iter_nodes_no_key_filter(self):
1517
node = InternalNode('')
1519
child.set_maximum_size(100)
1520
child.map(None, ("foo",), "bar")
1521
node.add_node("f", child)
1523
child.set_maximum_size(100)
1524
child.map(None, ("bar",), "baz")
1525
node.add_node("b", child)
1527
for child, node_key_filter in node._iter_nodes(None, key_filter=None):
1528
self.assertEqual(None, node_key_filter)
1530
def test__iter_nodes_splits_key_filter(self):
1531
node = InternalNode('')
1533
child.set_maximum_size(100)
1534
child.map(None, ("foo",), "bar")
1535
node.add_node("f", child)
1537
child.set_maximum_size(100)
1538
child.map(None, ("bar",), "baz")
1539
node.add_node("b", child)
1541
# foo and bar both match exactly one leaf node, but 'cat' should not
1542
# match any, and should not be placed in one.
1543
key_filter = (('foo',), ('bar',), ('cat',))
1544
for child, node_key_filter in node._iter_nodes(None,
1545
key_filter=key_filter):
1546
# each child could only match one key filter, so make sure it was
1548
self.assertEqual(1, len(node_key_filter))
1550
def test__iter_nodes_with_multiple_matches(self):
1551
node = InternalNode('')
1553
child.set_maximum_size(100)
1554
child.map(None, ("foo",), "val")
1555
child.map(None, ("fob",), "val")
1556
node.add_node("f", child)
1558
child.set_maximum_size(100)
1559
child.map(None, ("bar",), "val")
1560
child.map(None, ("baz",), "val")
1561
node.add_node("b", child)
1563
key_filter = (('foo',), ('fob',), ('bar',), ('baz',))
1564
for child, node_key_filter in node._iter_nodes(None,
1565
key_filter=key_filter):
1566
# each child could matches two key filters, so make sure they were
1568
self.assertEqual(2, len(node_key_filter))
1570
def test_iteritems_empty_new(self):
1571
node = InternalNode()
1572
self.assertEqual([], sorted(node.iteritems(None)))
1574
def test_iteritems_two_children(self):
1575
node = InternalNode()
1577
leaf1.map(None, ('foo bar',), 'quux')
1579
leaf2.map(None, ('strange',), 'beast')
1580
node.add_node("f", leaf1)
1581
node.add_node("s", leaf2)
1582
self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
1583
sorted(node.iteritems(None)))
1585
def test_iteritems_two_children_partial(self):
1586
node = InternalNode()
1588
leaf1.map(None, ('foo bar',), 'quux')
1590
leaf2.map(None, ('strange',), 'beast')
1591
node.add_node("f", leaf1)
1592
# This sets up a path that should not be followed - it will error if
1593
# the code tries to.
1594
node._items['f'] = None
1595
node.add_node("s", leaf2)
1596
self.assertEqual([(('strange',), 'beast')],
1597
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1599
def test_iteritems_two_children_with_hash(self):
1600
search_key_func = chk_map.search_key_registry.get('hash-255-way')
1601
node = InternalNode(search_key_func=search_key_func)
1602
leaf1 = LeafNode(search_key_func=search_key_func)
1603
leaf1.map(None, ('foo bar',), 'quux')
1604
leaf2 = LeafNode(search_key_func=search_key_func)
1605
leaf2.map(None, ('strange',), 'beast')
1606
self.assertEqual('\xbeF\x014', search_key_func(('foo bar',)))
1607
self.assertEqual('\x85\xfa\xf7K', search_key_func(('strange',)))
1608
node.add_node("\xbe", leaf1)
1609
# This sets up a path that should not be followed - it will error if
1610
# the code tries to.
1611
node._items['\xbe'] = None
1612
node.add_node("\x85", leaf2)
1613
self.assertEqual([(('strange',), 'beast')],
1614
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1616
def test_iteritems_partial_empty(self):
1617
node = InternalNode()
1618
self.assertEqual([], sorted(node.iteritems([('missing',)])))
1620
def test_map_to_new_child_new(self):
1621
chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10)
1622
chkmap._ensure_root()
1623
node = chkmap._root_node
1624
# Ensure test validity: nothing paged in below the root.
1626
len([value for value in node._items.values()
1627
if type(value) == tuple]))
1628
# now, mapping to k3 should add a k3 leaf
1629
prefix, nodes = node.map(None, ('k3',), 'quux')
1630
self.assertEqual("k", prefix)
1631
self.assertEqual([("", node)], nodes)
1632
# check new child details
1633
child = node._items['k3']
1634
self.assertIsInstance(child, LeafNode)
1635
self.assertEqual(1, len(child))
1636
self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None))
1637
self.assertEqual(None, child._key)
1638
self.assertEqual(10, child.maximum_size)
1639
self.assertEqual(1, child._key_width)
1640
# Check overall structure:
1641
self.assertEqual(3, len(chkmap))
1642
self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'},
1643
self.to_dict(chkmap))
1644
# serialising should only serialise the new data - k3 and the internal
1646
keys = list(node.serialise(chkmap._store))
1647
child_key = child.serialise(chkmap._store)[0]
1648
self.assertEqual([child_key, keys[1]], keys)
1650
def test_map_to_child_child_splits_new(self):
1651
chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10)
1652
# Check for the canonical root value for this tree:
1653
self.assertEqualDiff("'' InternalNode\n"
1658
, chkmap._dump_tree())
1659
# _dump_tree pages everything in, so reload using just the root
1660
chkmap = CHKMap(chkmap._store, chkmap._root_node)
1661
chkmap._ensure_root()
1662
node = chkmap._root_node
1663
# Ensure test validity: nothing paged in below the root.
1665
len([value for value in node._items.values()
1666
if type(value) == tuple]))
1667
# now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
1668
# k23, which for simplicity in the current implementation generates
1669
# a new internal node between node, and k22/k23.
1670
prefix, nodes = node.map(chkmap._store, ('k23',), 'quux')
1671
self.assertEqual("k", prefix)
1672
self.assertEqual([("", node)], nodes)
1673
# check new child details
1674
child = node._items['k2']
1675
self.assertIsInstance(child, InternalNode)
1676
self.assertEqual(2, len(child))
1677
self.assertEqual({('k22',): 'bar', ('k23',): 'quux'},
1678
self.to_dict(child, None))
1679
self.assertEqual(None, child._key)
1680
self.assertEqual(10, child.maximum_size)
1681
self.assertEqual(1, child._key_width)
1682
self.assertEqual(3, child._node_width)
1683
# Check overall structure:
1684
self.assertEqual(3, len(chkmap))
1685
self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'},
1686
self.to_dict(chkmap))
1687
# serialising should only serialise the new data - although k22 hasn't
1688
# changed because its a special corner case (splitting on with only one
1689
# key leaves one node unaltered), in general k22 is serialised, so we
1690
# expect k22, k23, the new internal node, and node, to be serialised.
1691
keys = list(node.serialise(chkmap._store))
1692
child_key = child._key
1693
k22_key = child._items['k22']._key
1694
k23_key = child._items['k23']._key
1695
self.assertEqual([k22_key, k23_key, child_key, node.key()], keys)
1696
self.assertEqualDiff("'' InternalNode\n"
1699
" 'k2' InternalNode\n"
1703
" ('k23',) 'quux'\n"
1704
, chkmap._dump_tree())
1706
def test__search_prefix_filter_with_hash(self):
1707
search_key_func = chk_map.search_key_registry.get('hash-16-way')
1708
node = InternalNode(search_key_func=search_key_func)
1710
node._node_width = 4
1711
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
1712
self.assertEqual('E8B7', node._search_prefix_filter(('a', 'b')))
1713
self.assertEqual('E8B7', node._search_prefix_filter(('a',)))
1715
def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
1716
chkmap = self._get_map(
1717
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
1718
# Check we have the expected tree.
1719
self.assertEqualDiff("'' InternalNode\n"
1722
" 'k2' InternalNode\n"
1726
" ('k23',) 'quux'\n"
1727
, chkmap._dump_tree())
1728
chkmap = CHKMap(chkmap._store, chkmap._root_node)
1729
chkmap._ensure_root()
1730
node = chkmap._root_node
1731
# unmapping k23 should give us a root, with k1 and k22 as direct
1733
result = node.unmap(chkmap._store, ('k23',))
1734
# check the pointed-at object within node - k2 should now point at the
1735
# k22 leaf (which has been paged in to see if we can collapse the tree)
1736
child = node._items['k2']
1737
self.assertIsInstance(child, LeafNode)
1738
self.assertEqual(1, len(child))
1739
self.assertEqual({('k22',): 'bar'},
1740
self.to_dict(child, None))
1741
# Check overall structure is instact:
1742
self.assertEqual(2, len(chkmap))
1743
self.assertEqual({('k1',): 'foo', ('k22',): 'bar'},
1744
self.to_dict(chkmap))
1745
# serialising should only serialise the new data - the root node.
1746
keys = list(node.serialise(chkmap._store))
1747
self.assertEqual([keys[-1]], keys)
1748
chkmap = CHKMap(chkmap._store, keys[-1])
1749
self.assertEqualDiff("'' InternalNode\n"
1754
, chkmap._dump_tree())
1756
def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
1757
chkmap = self._get_map(
1758
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
1759
self.assertEqualDiff("'' InternalNode\n"
1762
" 'k2' InternalNode\n"
1766
" ('k23',) 'quux'\n"
1767
, chkmap._dump_tree())
1768
orig_root = chkmap._root_node
1769
chkmap = CHKMap(chkmap._store, orig_root)
1770
chkmap._ensure_root()
1771
node = chkmap._root_node
1772
k2_ptr = node._items['k2']
1773
# unmapping k1 should give us a root, with k22 and k23 as direct
1774
# children, and should not have needed to page in the subtree.
1775
result = node.unmap(chkmap._store, ('k1',))
1776
self.assertEqual(k2_ptr, result)
1777
chkmap = CHKMap(chkmap._store, orig_root)
1778
# Unmapping at the CHKMap level should switch to the new root
1779
chkmap.unmap(('k1',))
1780
self.assertEqual(k2_ptr, chkmap._root_node)
1781
self.assertEqualDiff("'' InternalNode\n"
1785
" ('k23',) 'quux'\n"
1786
, chkmap._dump_tree())
1790
# map -> fits - done
1791
# map -> doesn't fit - shrink from left till fits
1792
# key data to return: the common prefix, new nodes.
1794
# unmap -> how to tell if siblings can be combined.
1795
# combing leaf nodes means expanding the prefix to the left; so gather the size of
1796
# all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
1797
# is an internal node, we know that that is a dense subtree - can't combine.
1798
# otherwise as soon as the sum of serialised values exceeds the split threshold
1799
# we know we can't combine - stop.
1800
# unmap -> key return data - space in node, common prefix length? and key count
1802
# variable length prefixes? -> later start with fixed width to get something going
1803
# map -> fits - update pointer to leaf
1804
# return [prefix and node] - seems sound.
1805
# map -> doesn't fit - find unique prefix and shift right
1806
# create internal nodes for all the partitions, return list of unique
1807
# prefixes and nodes.
1808
# map -> new prefix - create a leaf
1809
# unmap -> if child key count 0, remove
1810
# unmap -> return space in node, common prefix length? (why?), key count
1812
# map, if 1 node returned, use it, otherwise make an internal and populate.
1813
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
1815
# map inits as empty leafnode.
1821
# AA, AB, AC, AD, BA
1822
# packed internal node - ideal:
1823
# AA, AB, AC, AD, BA
1824
# single byte fanout - A,B, AA,AB,AC,AD, BA
1827
# AB - split, but we want to end up with AB, BA, in one node, with
1831
class TestIterInterestingNodes(TestCaseWithStore):
1833
def get_chk_bytes(self):
1834
if getattr(self, '_chk_bytes', None) is None:
1835
self._chk_bytes = super(TestIterInterestingNodes,
1836
self).get_chk_bytes()
1837
return self._chk_bytes
1839
def get_map_key(self, a_dict):
1840
c_map = self._get_map(a_dict, maximum_size=10,
1841
chk_bytes=self.get_chk_bytes())
1844
def assertIterInteresting(self, expected, interesting_keys,
1845
uninteresting_keys):
1846
"""Check the result of iter_interesting_nodes.
1848
:param expected: A list of (record_keys, interesting_chk_pages,
1849
interesting key value pairs)
1851
store = self.get_chk_bytes()
1852
iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
1854
nodes = list(iter_nodes)
1855
for count, (exp, act) in enumerate(izip(expected, nodes)):
1856
exp_record, exp_items = exp
1858
exp_tuple = (exp_record, sorted(exp_items))
1860
act_tuple = (None, sorted(items))
1862
act_tuple = (record.key, sorted(items))
1863
self.assertEqual(exp_tuple, act_tuple,
1864
'entry %d did not match expected' % count)
1865
self.assertEqual(len(expected), len(nodes))
1867
def test_empty_to_one_keys(self):
1868
target = self.get_map_key({('a',): 'content'})
1869
self.assertIterInteresting(
1870
[(target, [(('a',), 'content')]),
1873
def test_none_to_one_key(self):
1874
basis = self.get_map_key({})
1875
target = self.get_map_key({('a',): 'content'})
1876
self.assertIterInteresting(
1877
[(None, [(('a',), 'content')]),
1879
], [target], [basis])
1881
def test_one_to_none_key(self):
1882
basis = self.get_map_key({('a',): 'content'})
1883
target = self.get_map_key({})
1884
self.assertIterInteresting(
1888
def test_common_pages(self):
1889
basis = self.get_map_key({('a',): 'content',
1893
target = self.get_map_key({('a',): 'content',
1894
('b',): 'other content',
1897
target_map = CHKMap(self.get_chk_bytes(), target)
1898
self.assertEqualDiff(
1901
" ('a',) 'content'\n"
1903
" ('b',) 'other content'\n"
1905
" ('c',) 'content'\n",
1906
target_map._dump_tree())
1907
b_key = target_map._root_node._items['b'].key()
1908
# This should return the root node, and the node for the 'b' key
1909
self.assertIterInteresting(
1911
(b_key, [(('b',), 'other content')])],
1914
def test_common_sub_page(self):
1915
basis = self.get_map_key({('aaa',): 'common',
1918
target = self.get_map_key({('aaa',): 'common',
1922
target_map = CHKMap(self.get_chk_bytes(), target)
1923
self.assertEqualDiff(
1925
" 'a' InternalNode\n"
1927
" ('aaa',) 'common'\n"
1931
" ('c',) 'common'\n",
1932
target_map._dump_tree())
1933
# The key for the internal aa node
1934
a_key = target_map._root_node._items['a'].key()
1935
# The key for the leaf aab node
1936
aab_key = target_map._root_node._items['a']._items['aab'].key()
1937
self.assertIterInteresting(
1940
(aab_key, [(('aab',), 'new')])],
1943
def test_common_leaf(self):
1944
basis = self.get_map_key({})
1945
target1 = self.get_map_key({('aaa',): 'common'})
1946
target2 = self.get_map_key({('aaa',): 'common',
1949
target3 = self.get_map_key({('aaa',): 'common',
1953
# The LeafNode containing 'aaa': 'common' occurs at 3 different levels.
1954
# Once as a root node, once as a second layer, and once as a third
1955
# layer. It should only be returned one time regardless
1956
target1_map = CHKMap(self.get_chk_bytes(), target1)
1957
self.assertEqualDiff(
1959
" ('aaa',) 'common'\n",
1960
target1_map._dump_tree())
1961
target2_map = CHKMap(self.get_chk_bytes(), target2)
1962
self.assertEqualDiff(
1965
" ('aaa',) 'common'\n"
1967
" ('bbb',) 'new'\n",
1968
target2_map._dump_tree())
1969
target3_map = CHKMap(self.get_chk_bytes(), target3)
1970
self.assertEqualDiff(
1972
" 'a' InternalNode\n"
1974
" ('aaa',) 'common'\n"
1976
" ('aac',) 'other'\n"
1978
" ('bbb',) 'new'\n",
1979
target3_map._dump_tree())
1980
aaa_key = target1_map._root_node.key()
1981
b_key = target2_map._root_node._items['b'].key()
1982
a_key = target3_map._root_node._items['a'].key()
1983
aac_key = target3_map._root_node._items['a']._items['aac'].key()
1984
self.assertIterInteresting(
1985
[(None, [(('aaa',), 'common')]),
1989
(b_key, [(('bbb',), 'new')]),
1991
(aac_key, [(('aac',), 'other')]),
1992
], [target1, target2, target3], [basis])
1994
self.assertIterInteresting(
1997
(b_key, [(('bbb',), 'new')]),
1999
(aac_key, [(('aac',), 'other')]),
2000
], [target2, target3], [target1])
2002
# This may be a case that we relax. A root node is a deep child of the
2003
# excluded set. The cost is buffering root nodes until we have
2004
# determined all possible exclusions. (Because a prefix of '', cannot
2006
self.assertIterInteresting(
2007
[], [target1], [target3])
2009
def test_multiple_maps(self):
2010
basis1 = self.get_map_key({('aaa',): 'common',
2013
basis2 = self.get_map_key({('bbb',): 'common',
2016
target1 = self.get_map_key({('aaa',): 'common',
2017
('aac',): 'target1',
2020
target2 = self.get_map_key({('aaa',): 'common',
2021
('bba',): 'target2',
2024
target1_map = CHKMap(self.get_chk_bytes(), target1)
2025
self.assertEqualDiff(
2027
" 'a' InternalNode\n"
2029
" ('aaa',) 'common'\n"
2031
" ('aac',) 'target1'\n"
2033
" ('bbb',) 'common'\n",
2034
target1_map._dump_tree())
2035
# The key for the target1 internal a node
2036
a_key = target1_map._root_node._items['a'].key()
2037
# The key for the leaf aac node
2038
aac_key = target1_map._root_node._items['a']._items['aac'].key()
2040
target2_map = CHKMap(self.get_chk_bytes(), target2)
2041
self.assertEqualDiff(
2044
" ('aaa',) 'common'\n"
2045
" 'b' InternalNode\n"
2047
" ('bba',) 'target2'\n"
2049
" ('bbb',) 'common'\n",
2050
target2_map._dump_tree())
2051
# The key for the target2 internal bb node
2052
b_key = target2_map._root_node._items['b'].key()
2053
# The key for the leaf bba node
2054
bba_key = target2_map._root_node._items['b']._items['bba'].key()
2055
self.assertIterInteresting(
2060
(aac_key, [(('aac',), 'target1')]),
2061
(bba_key, [(('bba',), 'target2')]),
2062
], [target1, target2], [basis1, basis2])