/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Tests for maps built on a CHK versionedfiles facility."""
18
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
19
from bzrlib.chk_map import (
20
    CHKMap,
21
    InternalNode,
22
    LeafNode,
23
    _deserialise,
24
    )
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
25
from bzrlib.tests import TestCaseWithTransport
26
27
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
28
class TestCaseWithStore(TestCaseWithTransport):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
29
30
    def get_chk_bytes(self):
31
        # The eassiest way to get a CHK store is a development3 repository and
32
        # then work with the chk_bytes attribute directly.
33
        repo = self.make_repository(".", format="development3")
34
        repo.lock_write()
35
        self.addCleanup(repo.unlock)
36
        repo.start_write_group()
37
        self.addCleanup(repo.abort_write_group)
38
        return repo.chk_bytes
39
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
40
    def _get_map(self, a_dict, maximum_size=0, chk_bytes=None):
41
        if chk_bytes is None:
42
            chk_bytes = self.get_chk_bytes()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
43
        root_key = CHKMap.from_dict(chk_bytes, a_dict, maximum_size=maximum_size)
44
        chkmap = CHKMap(chk_bytes, root_key)
45
        return chkmap
46
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
47
    def read_bytes(self, chk_bytes, key):
48
        stream = chk_bytes.get_record_stream([key], 'unordered', True)
49
        return stream.next().get_bytes_as("fulltext")
50
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
51
    def to_dict(self, node, *args):
52
        return dict(node.iteritems(*args))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
53
54
55
class TestMap(TestCaseWithStore):
56
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
57
    def assertHasABMap(self, chk_bytes):
3735.2.24 by Robert Collins
test_chk_map tests all passing.
58
        root_key = ('sha1:f14dd34def95036bc06bb5c0ed95437d7383a04a',)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
59
        self.assertEqual(
3735.2.24 by Robert Collins
test_chk_map tests all passing.
60
            'chkleaf:\n0\n1\n1\na\x00b\n',
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
61
            self.read_bytes(chk_bytes, root_key))
3735.2.24 by Robert Collins
test_chk_map tests all passing.
62
        return root_key
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
63
64
    def assertHasEmptyMap(self, chk_bytes):
3735.2.24 by Robert Collins
test_chk_map tests all passing.
65
        root_key = ('sha1:4e6482a3a5cb2d61699971ac77befe11a0ec5779',)
66
        self.assertEqual("chkleaf:\n0\n1\n0\n", self.read_bytes(chk_bytes, root_key))
67
        return root_key
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
68
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
69
    def test_from_dict_empty(self):
70
        chk_bytes = self.get_chk_bytes()
71
        root_key = CHKMap.from_dict(chk_bytes, {})
3735.2.24 by Robert Collins
test_chk_map tests all passing.
72
        # Check the data was saved and inserted correctly.
73
        expected_root_key = self.assertHasEmptyMap(chk_bytes)
74
        self.assertEqual(expected_root_key, root_key)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
75
76
    def test_from_dict_ab(self):
77
        chk_bytes = self.get_chk_bytes()
78
        root_key = CHKMap.from_dict(chk_bytes, {"a":"b"})
3735.2.24 by Robert Collins
test_chk_map tests all passing.
79
        # Check the data was saved and inserted correctly.
80
        expected_root_key = self.assertHasABMap(chk_bytes)
81
        self.assertEqual(expected_root_key, root_key)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
82
83
    def test_apply_empty_ab(self):
84
        # applying a delta (None, "a", "b") to an empty chkmap generates the
85
        # same map as from_dict_ab.
86
        chk_bytes = self.get_chk_bytes()
87
        root_key = CHKMap.from_dict(chk_bytes, {})
88
        chkmap = CHKMap(chk_bytes, root_key)
89
        new_root = chkmap.apply_delta([(None, "a", "b")])
3735.2.24 by Robert Collins
test_chk_map tests all passing.
90
        # Check the data was saved and inserted correctly.
91
        expected_root_key = self.assertHasABMap(chk_bytes)
92
        self.assertEqual(expected_root_key, new_root)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
93
        # The update should have left us with an in memory root node, with an
94
        # updated key.
95
        self.assertEqual(new_root, chkmap._root_node._key)
96
97
    def test_apply_ab_empty(self):
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
98
        # applying a delta ("a", None, None) to a map with 'a' in it generates
99
        # an empty map.
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
100
        chk_bytes = self.get_chk_bytes()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
101
        root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
102
        chkmap = CHKMap(chk_bytes, root_key)
3735.2.24 by Robert Collins
test_chk_map tests all passing.
103
        new_root = chkmap.apply_delta([(("a",), None, None)])
104
        # Check the data was saved and inserted correctly.
105
        expected_root_key = self.assertHasEmptyMap(chk_bytes)
106
        self.assertEqual(expected_root_key, new_root)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
107
        # The update should have left us with an in memory root node, with an
108
        # updated key.
109
        self.assertEqual(new_root, chkmap._root_node._key)
110
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
111
    def test_iter_changes_empty_ab(self):
112
        # Asking for changes between an empty dict to a dict with keys returns
113
        # all the keys.
3735.2.31 by Robert Collins
CHKMap.iter_changes
114
        basis = self._get_map({}, maximum_size=10)
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
115
        target = self._get_map(
116
            {('a',): 'content here', ('b',): 'more content'},
3735.2.31 by Robert Collins
CHKMap.iter_changes
117
            chk_bytes=basis._store, maximum_size=10)
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
118
        self.assertEqual([(('a',), None, 'content here'),
3735.2.31 by Robert Collins
CHKMap.iter_changes
119
            (('b',), None, 'more content')],
120
            sorted(list(target.iter_changes(basis))))
121
122
    def test_iter_changes_ab_empty(self):
123
        # Asking for changes between a dict with keys to an empty dict returns
124
        # all the keys.
125
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
126
            maximum_size=10)
127
        target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
128
        self.assertEqual([(('a',), 'content here', None),
129
            (('b',), 'more content', None)],
130
            sorted(list(target.iter_changes(basis))))
131
132
    def test_iter_changes_empty_empty_is_empty(self):
133
        basis = self._get_map({}, maximum_size=10)
134
        target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
135
        self.assertEqual([], sorted(list(target.iter_changes(basis))))
136
137
    def test_iter_changes_ab_ab_is_empty(self):
138
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
139
            maximum_size=10)
140
        target = self._get_map(
141
            {('a',): 'content here', ('b',): 'more content'},
142
            chk_bytes=basis._store, maximum_size=10)
143
        self.assertEqual([], sorted(list(target.iter_changes(basis))))
144
145
    def test_iter_changes_ab_ab_nodes_not_loaded(self):
146
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
147
            maximum_size=10)
148
        target = self._get_map(
149
            {('a',): 'content here', ('b',): 'more content'},
150
            chk_bytes=basis._store, maximum_size=10)
151
        list(target.iter_changes(basis))
152
        self.assertIsInstance(target._root_node, tuple)
153
        self.assertIsInstance(basis._root_node, tuple)
154
155
    def test_iter_changes_ab_ab_changed_values_shown(self):
156
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
157
            maximum_size=10)
158
        target = self._get_map(
159
            {('a',): 'content here', ('b',): 'different content'},
160
            chk_bytes=basis._store, maximum_size=10)
161
        result = sorted(list(target.iter_changes(basis)))
162
        self.assertEqual([(('b',), 'more content', 'different content')],
163
            result)
164
165
    def test_iter_changes_mixed_node_length(self):
166
        # When one side has different node lengths than the other, common
167
        # but different keys still need to be show, and new-and-old included
168
        # appropriately.
169
        # aaa - common unaltered
170
        # aab - common altered
171
        # b - basis only
172
        # at - target only
173
        # we expect: 
174
        # aaa to be not loaded (later test)
175
        # aab, b, at to be returned.
176
        # basis splits at byte 0,1,2, aaa is commonb is basis only
177
        basis_dict = {('aaa',): 'foo bar',
178
            ('aab',): 'common altered a', ('b',): 'foo bar b'}
179
        # target splits at byte 1,2, at is target only
180
        target_dict = {('aaa',): 'foo bar',
181
            ('aab',): 'common altered b', ('at',): 'foo bar t'}
182
        changes = [
183
            (('aab',), 'common altered a', 'common altered b'),
184
            (('at',), None, 'foo bar t'),
185
            (('b',), 'foo bar b', None),
186
            ]
187
        basis = self._get_map(basis_dict, maximum_size=10)
188
        target = self._get_map(target_dict, maximum_size=10,
189
            chk_bytes=basis._store)
190
        self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
191
192
    def test_iter_changes_common_pages_not_loaded(self):
193
        # aaa - common unaltered
194
        # aab - common altered
195
        # b - basis only
196
        # at - target only
197
        # we expect: 
198
        # aaa to be not loaded
199
        # aaa not to be in result.
200
        basis_dict = {('aaa',): 'foo bar',
201
            ('aab',): 'common altered a', ('b',): 'foo bar b'}
202
        # target splits at byte 1, at is target only
203
        target_dict = {('aaa',): 'foo bar',
204
            ('aab',): 'common altered b', ('at',): 'foo bar t'}
205
        basis = self._get_map(basis_dict, maximum_size=10)
206
        target = self._get_map(target_dict, maximum_size=10,
207
            chk_bytes=basis._store)
3735.2.32 by Robert Collins
Activate test for common node skipping. - 50 times performance improvement.
208
        basis_get = basis._store.get_record_stream
209
        def get_record_stream(keys, order, fulltext):
210
            if ('sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
211
                self.fail("'aaa' pointer was followed %r" % keys)
212
            return basis_get(keys, order, fulltext)
213
        basis._store.get_record_stream = get_record_stream
3735.2.31 by Robert Collins
CHKMap.iter_changes
214
        result = sorted(list(target.iter_changes(basis)))
215
        for change in result:
216
            if change[0] == ('aaa',):
217
                self.fail("Found unexpected change: %s" % change)
218
219
    def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
220
        # Within a leaf there are no hash's to exclude keys, make sure multi
221
        # value leaf nodes are handled well.
222
        basis_dict = {('aaa',): 'foo bar',
223
            ('aab',): 'common altered a', ('b',): 'foo bar b'}
224
        target_dict = {('aaa',): 'foo bar',
225
            ('aab',): 'common altered b', ('at',): 'foo bar t'}
226
        changes = [
227
            (('aab',), 'common altered a', 'common altered b'),
228
            (('at',), None, 'foo bar t'),
229
            (('b',), 'foo bar b', None),
230
            ]
231
        basis = self._get_map(basis_dict)
232
        target = self._get_map(target_dict, chk_bytes=basis._store)
233
        self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
234
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
235
    def test_iteritems_empty(self):
236
        chk_bytes = self.get_chk_bytes()
237
        root_key = CHKMap.from_dict(chk_bytes, {})
238
        chkmap = CHKMap(chk_bytes, root_key)
239
        self.assertEqual([], list(chkmap.iteritems()))
240
241
    def test_iteritems_two_items(self):
242
        chk_bytes = self.get_chk_bytes()
243
        root_key = CHKMap.from_dict(chk_bytes,
244
            {"a":"content here", "b":"more content"})
245
        chkmap = CHKMap(chk_bytes, root_key)
3735.2.24 by Robert Collins
test_chk_map tests all passing.
246
        self.assertEqual([(("a",), "content here"), (("b",), "more content")],
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
247
            sorted(list(chkmap.iteritems())))
248
3735.2.9 by Robert Collins
Get a working chk_map using inventory implementation bootstrapped.
249
    def test_iteritems_selected_one_of_two_items(self):
3735.2.24 by Robert Collins
test_chk_map tests all passing.
250
        chkmap = self._get_map( {("a",):"content here", ("b",):"more content"})
251
        self.assertEqual({("a",): "content here"},
252
            self.to_dict(chkmap, [("a",)]))
3735.2.9 by Robert Collins
Get a working chk_map using inventory implementation bootstrapped.
253
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
254
    def test___len__empty(self):
255
        chkmap = self._get_map({})
256
        self.assertEqual(0, len(chkmap))
257
258
    def test___len__2(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
259
        chkmap = self._get_map({"foo":"bar", "gam":"quux"})
260
        self.assertEqual(2, len(chkmap))
261
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
262
    def test_max_size_100_bytes_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
263
        # When there is a 100 byte upper node limit, a tree is formed.
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
264
        chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
265
        # We expect three nodes:
266
        # A root, with two children, and with two key prefixes - k1 to one, and
267
        # k2 to the other as our node splitting is only just being developed.
268
        # The maximum size should be embedded
269
        chkmap._ensure_root()
270
        self.assertEqual(100, chkmap._root_node.maximum_size)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
271
        self.assertEqual(1, chkmap._root_node._key_width)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
272
        # There should be two child nodes, and prefix of 2(bytes):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
273
        self.assertEqual(2, len(chkmap._root_node._items))
274
        self.assertEqual("k", chkmap._root_node.unique_serialised_prefix())
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
275
        # The actual nodes pointed at will change as serialisers change; so
276
        # here we test that the key prefix is correct; then load the nodes and
277
        # check they have the right pointed at key; whether they have the
278
        # pointed at value inline or not is also unrelated to this test so we
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
279
        # don't check that in detail - rather we just check the aggregate
280
        # value.
281
        nodes = sorted(chkmap._root_node._items.items())
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
282
        ptr1 = nodes[0]
283
        ptr2 = nodes[1]
284
        self.assertEqual('k1', ptr1[0])
285
        self.assertEqual('k2', ptr2[0])
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
286
        node1 = _deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1])
287
        self.assertIsInstance(node1, LeafNode)
288
        self.assertEqual(1, len(node1))
289
        self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store))
290
        node2 = _deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1])
291
        self.assertIsInstance(node2, LeafNode)
292
        self.assertEqual(1, len(node2))
293
        self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
294
        # Having checked we have a good structure, check that the content is
295
        # still accessible.
296
        self.assertEqual(2, len(chkmap))
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
297
        self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"},
298
            self.to_dict(chkmap))
299
300
    def test_init_root_is_LeafNode_new(self):
301
        chk_bytes = self.get_chk_bytes()
302
        chkmap = CHKMap(chk_bytes, None)
303
        self.assertIsInstance(chkmap._root_node, LeafNode)
304
        self.assertEqual({}, self.to_dict(chkmap))
305
        self.assertEqual(0, len(chkmap))
306
307
    def test_init_and_save_new(self):
308
        chk_bytes = self.get_chk_bytes()
309
        chkmap = CHKMap(chk_bytes, None)
310
        key = chkmap._save()
311
        leaf_node = LeafNode()
312
        self.assertEqual([key], leaf_node.serialise(chk_bytes))
313
314
    def test_map_first_item_new(self):
315
        chk_bytes = self.get_chk_bytes()
316
        chkmap = CHKMap(chk_bytes, None)
317
        chkmap.map(("foo,",), "bar")
318
        self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap))
319
        self.assertEqual(1, len(chkmap))
320
        key = chkmap._save()
321
        leaf_node = LeafNode()
322
        leaf_node.map(chk_bytes, ("foo,",), "bar")
323
        self.assertEqual([key], leaf_node.serialise(chk_bytes))
324
325
    def test_unmap_last_item_root_is_leaf_new(self):
326
        chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"})
327
        chkmap.unmap(("k1"*50,))
328
        chkmap.unmap(("k2"*50,))
329
        self.assertEqual(0, len(chkmap))
330
        self.assertEqual({}, self.to_dict(chkmap))
331
        key = chkmap._save()
332
        leaf_node = LeafNode()
333
        self.assertEqual([key], leaf_node.serialise(chkmap._store))
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
334
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
335
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
336
class TestLeafNode(TestCaseWithStore):
337
338
    def test_current_size_empty(self):
339
        node = LeafNode()
340
        self.assertEqual(15, node._current_size())
341
342
    def test_current_size_size_changed(self):
343
        node = LeafNode()
344
        node.set_maximum_size(10)
345
        self.assertEqual(16, node._current_size())
346
347
    def test_current_size_width_changed(self):
348
        node = LeafNode()
349
        node._key_width = 10
350
        self.assertEqual(16, node._current_size())
351
352
    def test_current_size_items(self):
353
        node = LeafNode()
354
        base_size = node._current_size()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
355
        node.map(None, ("foo bar",), "baz")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
356
        self.assertEqual(base_size + 12, node._current_size())
357
358
    def test_deserialise_empty(self):
359
        node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n", ("sha1:1234",))
360
        self.assertEqual(0, len(node))
361
        self.assertEqual(10, node.maximum_size)
362
        self.assertEqual(("sha1:1234",), node.key())
363
364
    def test_deserialise_items(self):
365
        node = LeafNode.deserialise(
366
            "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
367
        self.assertEqual(2, len(node))
368
        self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
3735.2.24 by Robert Collins
test_chk_map tests all passing.
369
            sorted(node.iteritems(None)))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
370
3735.2.25 by Robert Collins
CHKInventory core tests passing.
371
    def test_deserialise_item_with_null_width_1(self):
372
        node = LeafNode.deserialise(
373
            "chkleaf:\n0\n1\n2\nfoo\x00bar\x00baz\nquux\x00blarh\n",
374
            ("sha1:1234",))
375
        self.assertEqual(2, len(node))
376
        self.assertEqual([(("foo",), "bar\x00baz"), (("quux",), "blarh")],
377
            sorted(node.iteritems(None)))
378
379
    def test_deserialise_item_with_null_width_2(self):
380
        node = LeafNode.deserialise(
381
            "chkleaf:\n0\n2\n2\nfoo\x001\x00bar\x00baz\nquux\x00\x00blarh\n",
382
            ("sha1:1234",))
383
        self.assertEqual(2, len(node))
384
        self.assertEqual([(("foo", "1"), "bar\x00baz"), (("quux", ""), "blarh")],
385
            sorted(node.iteritems(None)))
386
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
387
    def test_iteritems_selected_one_of_two_items(self):
388
        node = LeafNode.deserialise(
389
            "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
390
        self.assertEqual(2, len(node))
391
        self.assertEqual([(("quux",), "blarh")],
3735.2.24 by Robert Collins
test_chk_map tests all passing.
392
            sorted(node.iteritems(None, [("quux",), ("qaz",)])))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
393
394
    def test_key_new(self):
395
        node = LeafNode()
396
        self.assertEqual(None, node.key())
397
398
    def test_key_after_map(self):
399
        node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n", ("sha1:1234",))
3735.2.24 by Robert Collins
test_chk_map tests all passing.
400
        node.map(None, ("foo bar",), "baz quux")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
401
        self.assertEqual(None, node.key())
402
403
    def test_key_after_unmap(self):
404
        node = LeafNode.deserialise(
405
            "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
3735.2.24 by Robert Collins
test_chk_map tests all passing.
406
        node.unmap(None, ("foo bar",))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
407
        self.assertEqual(None, node.key())
408
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
409
    def test_map_exceeding_max_size_only_entry_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
410
        node = LeafNode()
411
        node.set_maximum_size(10)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
412
        result = node.map(None, ("foo bar",), "baz quux")
413
        self.assertEqual(("foo bar", [("", node)]), result)
414
        self.assertTrue(10 < node._current_size())
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
415
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
416
    def test_map_exceeding_max_size_second_entry_early_difference_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
417
        node = LeafNode()
418
        node.set_maximum_size(10)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
419
        node.map(None, ("foo bar",), "baz quux")
420
        prefix, result = list(node.map(None, ("blue",), "red"))
421
        self.assertEqual("", prefix)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
422
        self.assertEqual(2, len(result))
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
423
        split_chars = set([result[0][0], result[1][0]])
424
        self.assertEqual(set(["f", "b"]), split_chars)
425
        nodes = dict(result)
426
        node = nodes["f"]
427
        self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None))
428
        self.assertEqual(10, node.maximum_size)
429
        self.assertEqual(1, node._key_width)
430
        node = nodes["b"]
431
        self.assertEqual({("blue",): "red"}, self.to_dict(node, None))
432
        self.assertEqual(10, node.maximum_size)
433
        self.assertEqual(1, node._key_width)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
434
435
    def test_map_first(self):
436
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
437
        result = node.map(None, ("foo bar",), "baz quux")
438
        self.assertEqual(("foo bar", [("", node)]), result)
439
        self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node, None))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
440
        self.assertEqual(1, len(node))
441
442
    def test_map_second(self):
443
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
444
        node.map(None, ("foo bar",), "baz quux")
445
        result = node.map(None, ("bingo",), "bango")
446
        self.assertEqual(("", [("", node)]), result)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
447
        self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
3735.2.24 by Robert Collins
test_chk_map tests all passing.
448
            self.to_dict(node, None))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
449
        self.assertEqual(2, len(node))
450
451
    def test_map_replacement(self):
452
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
453
        node.map(None, ("foo bar",), "baz quux")
454
        result = node.map(None, ("foo bar",), "bango")
455
        self.assertEqual(("foo bar", [("", node)]), result)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
456
        self.assertEqual({("foo bar",): "bango"},
3735.2.24 by Robert Collins
test_chk_map tests all passing.
457
            self.to_dict(node, None))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
458
        self.assertEqual(1, len(node))
459
460
    def test_serialise_empty(self):
461
        store = self.get_chk_bytes()
462
        node = LeafNode()
463
        node.set_maximum_size(10)
464
        expected_key = ("sha1:62cc3565b48b0e830216e652cf99c6bd6b05b4b9",)
465
        self.assertEqual([expected_key],
466
            list(node.serialise(store)))
467
        self.assertEqual("chkleaf:\n10\n1\n0\n", self.read_bytes(store, expected_key))
468
        self.assertEqual(expected_key, node.key())
469
470
    def test_serialise_items(self):
471
        store = self.get_chk_bytes()
472
        node = LeafNode()
473
        node.set_maximum_size(10)
3735.2.24 by Robert Collins
test_chk_map tests all passing.
474
        node.map(None, ("foo bar",), "baz quux")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
475
        expected_key = ("sha1:d44cb6f0299b7e047da7f9e98f810e98f1dce1a7",)
476
        self.assertEqual([expected_key],
477
            list(node.serialise(store)))
478
        self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\x00baz quux\n",
479
            self.read_bytes(store, expected_key))
480
        self.assertEqual(expected_key, node.key())
481
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
482
    def test_unique_serialised_prefix_empty_new(self):
483
        node = LeafNode()
484
        self.assertEqual("", node.unique_serialised_prefix())
485
486
    def test_unique_serialised_prefix_one_item_new(self):
487
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
488
        node.map(None, ("foo bar", "baz"), "baz quux")
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
489
        self.assertEqual("foo bar\x00baz", node.unique_serialised_prefix())
490
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
491
    def test_unmap_missing(self):
492
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
493
        self.assertRaises(KeyError, node.unmap, None, ("foo bar",))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
494
495
    def test_unmap_present(self):
496
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
497
        node.map(None, ("foo bar",), "baz quux")
498
        result = node.unmap(None, ("foo bar",))
499
        self.assertEqual(node, result)
500
        self.assertEqual({}, self.to_dict(node, None))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
501
        self.assertEqual(0, len(node))
502
503
504
class TestInternalNode(TestCaseWithStore):
505
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
506
    def test_add_node_empty_new(self):
507
        node = InternalNode()
508
        child = LeafNode()
509
        child.set_maximum_size(100)
510
        child.map(None, ("foo",), "bar")
511
        node.add_node("foo", child)
512
        # Note that node isn't strictly valid now as a tree (only one child),
513
        # but thats ok for this test.
514
        # The first child defines the node's width:
515
        self.assertEqual(3, node._node_width)
516
        # We should be able to iterate over the contents without doing IO.
517
        self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None))
518
        # The length should be known:
519
        self.assertEqual(1, len(node))
520
        # serialising the node should serialise the child and the node.
521
        chk_bytes = self.get_chk_bytes()
522
        keys = list(node.serialise(chk_bytes))
523
        child_key = child.serialise(chk_bytes)[0]
524
        self.assertEqual(
525
            [child_key, ('sha1:db23b260c2bf46bf7446c39f91668900a2491610',)],
526
            keys)
527
        # We should be able to access deserialised content.
528
        bytes = self.read_bytes(chk_bytes, keys[1])
529
        node = _deserialise(bytes, keys[1])
530
        self.assertEqual(1, len(node))
531
        self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes))
532
        self.assertEqual(3, node._node_width)
533
534
    def test_add_node_resets_key_new(self):
535
        node = InternalNode()
536
        child = LeafNode()
537
        child.set_maximum_size(100)
538
        child.map(None, ("foo",), "bar")
539
        node.add_node("foo", child)
540
        chk_bytes = self.get_chk_bytes()
541
        keys = list(node.serialise(chk_bytes))
542
        self.assertEqual(keys[1], node._key)
543
        node.add_node("fos", child)
544
        self.assertEqual(None, node._key)
545
546
#    def test_add_node_empty_oversized_one_ok_new(self):
547
#    def test_add_node_one_oversized_second_kept_minimum_fan(self):
548
#    def test_add_node_two_oversized_third_kept_minimum_fan(self):
549
#    def test_add_node_one_oversized_second_splits_errors(self):
550
551
    def test_iteritems_empty_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
552
        node = InternalNode()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
553
        self.assertEqual([], sorted(node.iteritems(None)))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
554
555
    def test_iteritems_two_children(self):
556
        node = InternalNode()
557
        leaf1 = LeafNode()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
558
        leaf1.map(None, ('foo bar',), 'quux')
559
        leaf2 = LeafNode()
560
        leaf2.map(None, ('strange',), 'beast')
3735.2.24 by Robert Collins
test_chk_map tests all passing.
561
        node.add_node("f", leaf1)
562
        node.add_node("s", leaf2)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
563
        self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
3735.2.24 by Robert Collins
test_chk_map tests all passing.
564
            sorted(node.iteritems(None)))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
565
566
    def test_iteritems_two_children_partial(self):
567
        node = InternalNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
568
        leaf1 = LeafNode()
569
        leaf1.map(None, ('foo bar',), 'quux')
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
570
        leaf2 = LeafNode()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
571
        leaf2.map(None, ('strange',), 'beast')
3735.2.24 by Robert Collins
test_chk_map tests all passing.
572
        node.add_node("f", leaf1)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
573
        # This sets up a path that should not be followed - it will error if
574
        # the code tries to.
3735.2.24 by Robert Collins
test_chk_map tests all passing.
575
        node._items['f'] = None
576
        node.add_node("s", leaf2)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
577
        self.assertEqual([(('strange',), 'beast')],
3735.2.24 by Robert Collins
test_chk_map tests all passing.
578
            sorted(node.iteritems(None, [('strange',), ('weird',)])))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
579
580
    def test_iteritems_partial_empty(self):
581
        node = InternalNode()
582
        self.assertEqual([], sorted(node.iteritems([('missing',)])))
583
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
584
    def test_map_to_new_child_new(self):
585
        chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10)
586
        chkmap._ensure_root()
587
        node = chkmap._root_node
588
        # Ensure test validity: nothing paged in below the root.
589
        self.assertEqual(2,
590
            len([value for value in node._items.values()
591
                if type(value) == tuple]))
592
        # now, mapping to k3 should add a k3 leaf
593
        prefix, nodes = node.map(None, ('k3',), 'quux')
594
        self.assertEqual("k", prefix)
595
        self.assertEqual([("", node)], nodes)
596
        # check new child details
597
        child = node._items['k3']
598
        self.assertIsInstance(child, LeafNode)
599
        self.assertEqual(1, len(child))
600
        self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None))
601
        self.assertEqual(None, child._key)
602
        self.assertEqual(10, child.maximum_size)
603
        self.assertEqual(1, child._key_width)
604
        # Check overall structure:
605
        self.assertEqual(3, len(chkmap))
606
        self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'},
607
            self.to_dict(chkmap))
608
        # serialising should only serialise the new data - k3 and the internal
609
        # node.
610
        keys = list(node.serialise(chkmap._store))
611
        child_key = child.serialise(chkmap._store)[0]
612
        self.assertEqual([child_key, keys[1]], keys)
613
614
    def test_map_to_child_child_splits_new(self):
615
        chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10)
616
        # Check for the canonical root value for this tree:
617
        self.assertEqual(('sha1:d3f06fc03d8f50845894d8d04cc5a3f47e62948d',),
618
            chkmap._root_node)
619
        chkmap._ensure_root()
620
        node = chkmap._root_node
621
        # Ensure test validity: nothing paged in below the root.
622
        self.assertEqual(2,
623
            len([value for value in node._items.values()
624
                if type(value) == tuple]))
625
        # now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
626
        # k23, which for simplicity in the current implementation generates
627
        # a new internal node between node, and k22/k23.
628
        prefix, nodes = node.map(chkmap._store, ('k23',), 'quux')
629
        self.assertEqual("k", prefix)
630
        self.assertEqual([("", node)], nodes)
631
        # check new child details
632
        child = node._items['k2']
633
        self.assertIsInstance(child, InternalNode)
634
        self.assertEqual(2, len(child))
635
        self.assertEqual({('k22',): 'bar', ('k23',): 'quux'},
636
            self.to_dict(child, None))
637
        self.assertEqual(None, child._key)
638
        self.assertEqual(10, child.maximum_size)
639
        self.assertEqual(1, child._key_width)
640
        self.assertEqual(3, child._node_width)
641
        # Check overall structure:
642
        self.assertEqual(3, len(chkmap))
643
        self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'},
644
            self.to_dict(chkmap))
645
        # serialising should only serialise the new data - although k22 hasn't
646
        # changed because its a special corner case (splitting on with only one
647
        # key leaves one node unaltered), in general k22 is serialised, so we
648
        # expect k22, k23, the new internal node, and node, to be serialised.
649
        keys = list(node.serialise(chkmap._store))
650
        child_key = child._key
651
        k22_key = child._items['k22']._key
652
        k23_key = child._items['k23']._key
653
        self.assertEqual([k22_key, k23_key, child_key, keys[-1]], keys)
654
        self.assertEqual(('sha1:d68cd97c95e847d3dc58c05537aa5fdcdf2cf5da',),
655
            keys[-1])
656
657
    def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
658
        chkmap = self._get_map(
659
            {('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
660
        # Check we have the expected tree.
661
        self.assertEqual(('sha1:d68cd97c95e847d3dc58c05537aa5fdcdf2cf5da',),
662
            chkmap._root_node)
663
        chkmap._ensure_root()
664
        node = chkmap._root_node
665
        # unmapping k23 should give us a root, with k1 and k22 as direct
666
        # children.
667
        result = node.unmap(chkmap._store, ('k23',))
668
        # check the pointed-at object within node - k2 should now point at the
669
        # k22 leaf (which should not even have been paged in).
670
        ptr = node._items['k2']
671
        self.assertIsInstance(ptr, tuple)
672
        child = _deserialise(self.read_bytes(chkmap._store, ptr), ptr)
673
        self.assertIsInstance(child, LeafNode)
674
        self.assertEqual(1, len(child))
675
        self.assertEqual({('k22',): 'bar'},
676
            self.to_dict(child, None))
677
        # Check overall structure is instact:
678
        self.assertEqual(2, len(chkmap))
679
        self.assertEqual({('k1',): 'foo', ('k22',): 'bar'},
680
            self.to_dict(chkmap))
681
        # serialising should only serialise the new data - the root node.
682
        keys = list(node.serialise(chkmap._store))
683
        self.assertEqual([keys[-1]], keys)
684
        self.assertEqual(('sha1:d3f06fc03d8f50845894d8d04cc5a3f47e62948d',), keys[-1])
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
685
3735.2.23 by Robert Collins
Test unmapping with one child left but multiple keys.
686
    def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
687
        chkmap = self._get_map(
688
            {('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
689
        # Check we have the expected tree.
690
        self.assertEqual(('sha1:d68cd97c95e847d3dc58c05537aa5fdcdf2cf5da',),
691
            chkmap._root_node)
692
        chkmap._ensure_root()
693
        node = chkmap._root_node
694
        k2_ptr = node._items['k2']
695
        # unmapping k21 should give us a root, with k22 and k23 as direct
696
        # children, and should not have needed to page in the subtree.
697
        result = node.unmap(chkmap._store, ('k1',))
698
        self.assertEqual(k2_ptr, result)
699
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
700
701
# leaf:
702
# map -> fits - done
703
# map -> doesn't fit - shrink from left till fits
704
#        key data to return: the common prefix, new nodes.
705
706
# unmap -> how to tell if siblings can be combined.
707
#          combing leaf nodes means expanding the prefix to the left; so gather the size of
708
#          all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
709
#          is an internal node, we know that that is a dense subtree - can't combine.
710
#          otherwise as soon as the sum of serialised values exceeds the split threshold
711
#          we know we can't combine - stop.
712
# unmap -> key return data - space in node, common prefix length? and key count
713
# internal: 
714
# variable length prefixes? -> later start with fixed width to get something going
715
# map -> fits - update pointer to leaf
716
#        return [prefix and node] - seems sound.
717
# map -> doesn't fit - find unique prefix and shift right
718
#        create internal nodes for all the partitions, return list of unique
719
#        prefixes and nodes.
720
# map -> new prefix - create a leaf
721
# unmap -> if child key count 0, remove
722
# unmap -> return space in node, common prefix length? (why?), key count
723
# map:
724
# map, if 1 node returned, use it, otherwise make an internal and populate.
725
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
726
# code)
727
# map inits as empty leafnode.
728
# tools: 
729
# visualiser
730
731
732
# how to handle:
733
# AA, AB, AC, AD, BA
734
# packed internal node - ideal:
735
# AA, AB, AC, AD, BA
736
# single byte fanout - A,B,   AA,AB,AC,AD,     BA
737
# build order's:
738
# BA
739
# AB - split, but we want to end up with AB, BA, in one node, with 
740
# 1-4K get0