/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Tests for maps built on a CHK versionedfiles facility."""
18
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
19
from bzrlib.chk_map import (
20
    CHKMap,
21
    InternalNode,
22
    LeafNode,
23
    _deserialise,
24
    )
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
25
from bzrlib.tests import TestCaseWithTransport
26
27
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
28
class TestCaseWithStore(TestCaseWithTransport):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
29
30
    def get_chk_bytes(self):
31
        # The eassiest way to get a CHK store is a development3 repository and
32
        # then work with the chk_bytes attribute directly.
33
        repo = self.make_repository(".", format="development3")
34
        repo.lock_write()
35
        self.addCleanup(repo.unlock)
36
        repo.start_write_group()
37
        self.addCleanup(repo.abort_write_group)
38
        return repo.chk_bytes
39
3735.2.43 by Robert Collins
Teach CHKMap how to iter items in 2-tuple keyspaces.
40
    def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1):
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
41
        if chk_bytes is None:
42
            chk_bytes = self.get_chk_bytes()
3735.2.43 by Robert Collins
Teach CHKMap how to iter items in 2-tuple keyspaces.
43
        root_key = CHKMap.from_dict(chk_bytes, a_dict,
44
            maximum_size=maximum_size, key_width=key_width)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
45
        chkmap = CHKMap(chk_bytes, root_key)
46
        return chkmap
47
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
48
    def read_bytes(self, chk_bytes, key):
49
        stream = chk_bytes.get_record_stream([key], 'unordered', True)
50
        return stream.next().get_bytes_as("fulltext")
51
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
52
    def to_dict(self, node, *args):
53
        return dict(node.iteritems(*args))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
54
55
56
class TestMap(TestCaseWithStore):
57
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
58
    def assertHasABMap(self, chk_bytes):
3735.2.24 by Robert Collins
test_chk_map tests all passing.
59
        root_key = ('sha1:f14dd34def95036bc06bb5c0ed95437d7383a04a',)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
60
        self.assertEqual(
3735.2.24 by Robert Collins
test_chk_map tests all passing.
61
            'chkleaf:\n0\n1\n1\na\x00b\n',
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
62
            self.read_bytes(chk_bytes, root_key))
3735.2.24 by Robert Collins
test_chk_map tests all passing.
63
        return root_key
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
64
65
    def assertHasEmptyMap(self, chk_bytes):
3735.2.24 by Robert Collins
test_chk_map tests all passing.
66
        root_key = ('sha1:4e6482a3a5cb2d61699971ac77befe11a0ec5779',)
67
        self.assertEqual("chkleaf:\n0\n1\n0\n", self.read_bytes(chk_bytes, root_key))
68
        return root_key
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
69
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
70
    def test_from_dict_empty(self):
71
        chk_bytes = self.get_chk_bytes()
72
        root_key = CHKMap.from_dict(chk_bytes, {})
3735.2.24 by Robert Collins
test_chk_map tests all passing.
73
        # Check the data was saved and inserted correctly.
74
        expected_root_key = self.assertHasEmptyMap(chk_bytes)
75
        self.assertEqual(expected_root_key, root_key)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
76
77
    def test_from_dict_ab(self):
78
        chk_bytes = self.get_chk_bytes()
79
        root_key = CHKMap.from_dict(chk_bytes, {"a":"b"})
3735.2.24 by Robert Collins
test_chk_map tests all passing.
80
        # Check the data was saved and inserted correctly.
81
        expected_root_key = self.assertHasABMap(chk_bytes)
82
        self.assertEqual(expected_root_key, root_key)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
83
84
    def test_apply_empty_ab(self):
85
        # applying a delta (None, "a", "b") to an empty chkmap generates the
86
        # same map as from_dict_ab.
87
        chk_bytes = self.get_chk_bytes()
88
        root_key = CHKMap.from_dict(chk_bytes, {})
89
        chkmap = CHKMap(chk_bytes, root_key)
90
        new_root = chkmap.apply_delta([(None, "a", "b")])
3735.2.24 by Robert Collins
test_chk_map tests all passing.
91
        # Check the data was saved and inserted correctly.
92
        expected_root_key = self.assertHasABMap(chk_bytes)
93
        self.assertEqual(expected_root_key, new_root)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
94
        # The update should have left us with an in memory root node, with an
95
        # updated key.
96
        self.assertEqual(new_root, chkmap._root_node._key)
97
98
    def test_apply_ab_empty(self):
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
99
        # applying a delta ("a", None, None) to a map with 'a' in it generates
100
        # an empty map.
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
101
        chk_bytes = self.get_chk_bytes()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
102
        root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
103
        chkmap = CHKMap(chk_bytes, root_key)
3735.2.24 by Robert Collins
test_chk_map tests all passing.
104
        new_root = chkmap.apply_delta([(("a",), None, None)])
105
        # Check the data was saved and inserted correctly.
106
        expected_root_key = self.assertHasEmptyMap(chk_bytes)
107
        self.assertEqual(expected_root_key, new_root)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
108
        # The update should have left us with an in memory root node, with an
109
        # updated key.
110
        self.assertEqual(new_root, chkmap._root_node._key)
111
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
112
    def test_iter_changes_empty_ab(self):
113
        # Asking for changes between an empty dict to a dict with keys returns
114
        # all the keys.
3735.2.31 by Robert Collins
CHKMap.iter_changes
115
        basis = self._get_map({}, maximum_size=10)
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
116
        target = self._get_map(
117
            {('a',): 'content here', ('b',): 'more content'},
3735.2.31 by Robert Collins
CHKMap.iter_changes
118
            chk_bytes=basis._store, maximum_size=10)
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
119
        self.assertEqual([(('a',), None, 'content here'),
3735.2.31 by Robert Collins
CHKMap.iter_changes
120
            (('b',), None, 'more content')],
121
            sorted(list(target.iter_changes(basis))))
122
123
    def test_iter_changes_ab_empty(self):
124
        # Asking for changes between a dict with keys to an empty dict returns
125
        # all the keys.
126
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
127
            maximum_size=10)
128
        target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
129
        self.assertEqual([(('a',), 'content here', None),
130
            (('b',), 'more content', None)],
131
            sorted(list(target.iter_changes(basis))))
132
133
    def test_iter_changes_empty_empty_is_empty(self):
134
        basis = self._get_map({}, maximum_size=10)
135
        target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
136
        self.assertEqual([], sorted(list(target.iter_changes(basis))))
137
138
    def test_iter_changes_ab_ab_is_empty(self):
139
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
140
            maximum_size=10)
141
        target = self._get_map(
142
            {('a',): 'content here', ('b',): 'more content'},
143
            chk_bytes=basis._store, maximum_size=10)
144
        self.assertEqual([], sorted(list(target.iter_changes(basis))))
145
146
    def test_iter_changes_ab_ab_nodes_not_loaded(self):
147
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
148
            maximum_size=10)
149
        target = self._get_map(
150
            {('a',): 'content here', ('b',): 'more content'},
151
            chk_bytes=basis._store, maximum_size=10)
152
        list(target.iter_changes(basis))
153
        self.assertIsInstance(target._root_node, tuple)
154
        self.assertIsInstance(basis._root_node, tuple)
155
156
    def test_iter_changes_ab_ab_changed_values_shown(self):
157
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
158
            maximum_size=10)
159
        target = self._get_map(
160
            {('a',): 'content here', ('b',): 'different content'},
161
            chk_bytes=basis._store, maximum_size=10)
162
        result = sorted(list(target.iter_changes(basis)))
163
        self.assertEqual([(('b',), 'more content', 'different content')],
164
            result)
165
166
    def test_iter_changes_mixed_node_length(self):
167
        # When one side has different node lengths than the other, common
168
        # but different keys still need to be show, and new-and-old included
169
        # appropriately.
170
        # aaa - common unaltered
171
        # aab - common altered
172
        # b - basis only
173
        # at - target only
174
        # we expect: 
175
        # aaa to be not loaded (later test)
176
        # aab, b, at to be returned.
177
        # basis splits at byte 0,1,2, aaa is commonb is basis only
178
        basis_dict = {('aaa',): 'foo bar',
179
            ('aab',): 'common altered a', ('b',): 'foo bar b'}
180
        # target splits at byte 1,2, at is target only
181
        target_dict = {('aaa',): 'foo bar',
182
            ('aab',): 'common altered b', ('at',): 'foo bar t'}
183
        changes = [
184
            (('aab',), 'common altered a', 'common altered b'),
185
            (('at',), None, 'foo bar t'),
186
            (('b',), 'foo bar b', None),
187
            ]
188
        basis = self._get_map(basis_dict, maximum_size=10)
189
        target = self._get_map(target_dict, maximum_size=10,
190
            chk_bytes=basis._store)
191
        self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
192
193
    def test_iter_changes_common_pages_not_loaded(self):
194
        # aaa - common unaltered
195
        # aab - common altered
196
        # b - basis only
197
        # at - target only
198
        # we expect: 
199
        # aaa to be not loaded
200
        # aaa not to be in result.
201
        basis_dict = {('aaa',): 'foo bar',
202
            ('aab',): 'common altered a', ('b',): 'foo bar b'}
203
        # target splits at byte 1, at is target only
204
        target_dict = {('aaa',): 'foo bar',
205
            ('aab',): 'common altered b', ('at',): 'foo bar t'}
206
        basis = self._get_map(basis_dict, maximum_size=10)
207
        target = self._get_map(target_dict, maximum_size=10,
208
            chk_bytes=basis._store)
3735.2.32 by Robert Collins
Activate test for common node skipping. - 50 times performance improvement.
209
        basis_get = basis._store.get_record_stream
210
        def get_record_stream(keys, order, fulltext):
211
            if ('sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
212
                self.fail("'aaa' pointer was followed %r" % keys)
213
            return basis_get(keys, order, fulltext)
214
        basis._store.get_record_stream = get_record_stream
3735.2.31 by Robert Collins
CHKMap.iter_changes
215
        result = sorted(list(target.iter_changes(basis)))
216
        for change in result:
217
            if change[0] == ('aaa',):
218
                self.fail("Found unexpected change: %s" % change)
219
220
    def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
221
        # Within a leaf there are no hash's to exclude keys, make sure multi
222
        # value leaf nodes are handled well.
223
        basis_dict = {('aaa',): 'foo bar',
224
            ('aab',): 'common altered a', ('b',): 'foo bar b'}
225
        target_dict = {('aaa',): 'foo bar',
226
            ('aab',): 'common altered b', ('at',): 'foo bar t'}
227
        changes = [
228
            (('aab',), 'common altered a', 'common altered b'),
229
            (('at',), None, 'foo bar t'),
230
            (('b',), 'foo bar b', None),
231
            ]
232
        basis = self._get_map(basis_dict)
233
        target = self._get_map(target_dict, chk_bytes=basis._store)
234
        self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
235
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
236
    def test_iteritems_empty(self):
237
        chk_bytes = self.get_chk_bytes()
238
        root_key = CHKMap.from_dict(chk_bytes, {})
239
        chkmap = CHKMap(chk_bytes, root_key)
240
        self.assertEqual([], list(chkmap.iteritems()))
241
242
    def test_iteritems_two_items(self):
243
        chk_bytes = self.get_chk_bytes()
244
        root_key = CHKMap.from_dict(chk_bytes,
245
            {"a":"content here", "b":"more content"})
246
        chkmap = CHKMap(chk_bytes, root_key)
3735.2.24 by Robert Collins
test_chk_map tests all passing.
247
        self.assertEqual([(("a",), "content here"), (("b",), "more content")],
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
248
            sorted(list(chkmap.iteritems())))
249
3735.2.9 by Robert Collins
Get a working chk_map using inventory implementation bootstrapped.
250
    def test_iteritems_selected_one_of_two_items(self):
3735.2.24 by Robert Collins
test_chk_map tests all passing.
251
        chkmap = self._get_map( {("a",):"content here", ("b",):"more content"})
252
        self.assertEqual({("a",): "content here"},
253
            self.to_dict(chkmap, [("a",)]))
3735.2.9 by Robert Collins
Get a working chk_map using inventory implementation bootstrapped.
254
3735.2.43 by Robert Collins
Teach CHKMap how to iter items in 2-tuple keyspaces.
255
    def test_iteritems_keys_prefixed_by_2_width_nodes(self):
256
        chkmap = self._get_map(
257
            {("a","a"):"content here", ("a", "b",):"more content",
258
             ("b", ""): 'boring content'},
259
            maximum_size=10, key_width=2)
260
        self.assertEqual(
261
            {("a", "a"): "content here", ("a", "b"): 'more content'},
262
            self.to_dict(chkmap, [("a",)]))
263
264
    def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
265
        chkmap = self._get_map(
266
            {("a","a"):"content here", ("a", "b",):"more content",
267
             ("b", ""): 'boring content'}, key_width=2)
268
        self.assertEqual(
269
            {("a", "a"): "content here", ("a", "b"): 'more content'},
270
            self.to_dict(chkmap, [("a",)]))
271
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
272
    def test___len__empty(self):
273
        chkmap = self._get_map({})
274
        self.assertEqual(0, len(chkmap))
275
276
    def test___len__2(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
277
        chkmap = self._get_map({"foo":"bar", "gam":"quux"})
278
        self.assertEqual(2, len(chkmap))
279
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
280
    def test_max_size_100_bytes_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
281
        # When there is a 100 byte upper node limit, a tree is formed.
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
282
        chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
283
        # We expect three nodes:
284
        # A root, with two children, and with two key prefixes - k1 to one, and
285
        # k2 to the other as our node splitting is only just being developed.
286
        # The maximum size should be embedded
287
        chkmap._ensure_root()
288
        self.assertEqual(100, chkmap._root_node.maximum_size)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
289
        self.assertEqual(1, chkmap._root_node._key_width)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
290
        # There should be two child nodes, and prefix of 2(bytes):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
291
        self.assertEqual(2, len(chkmap._root_node._items))
292
        self.assertEqual("k", chkmap._root_node.unique_serialised_prefix())
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
293
        # The actual nodes pointed at will change as serialisers change; so
294
        # here we test that the key prefix is correct; then load the nodes and
295
        # check they have the right pointed at key; whether they have the
296
        # pointed at value inline or not is also unrelated to this test so we
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
297
        # don't check that in detail - rather we just check the aggregate
298
        # value.
299
        nodes = sorted(chkmap._root_node._items.items())
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
300
        ptr1 = nodes[0]
301
        ptr2 = nodes[1]
302
        self.assertEqual('k1', ptr1[0])
303
        self.assertEqual('k2', ptr2[0])
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
304
        node1 = _deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1])
305
        self.assertIsInstance(node1, LeafNode)
306
        self.assertEqual(1, len(node1))
307
        self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store))
308
        node2 = _deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1])
309
        self.assertIsInstance(node2, LeafNode)
310
        self.assertEqual(1, len(node2))
311
        self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
312
        # Having checked we have a good structure, check that the content is
313
        # still accessible.
314
        self.assertEqual(2, len(chkmap))
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
315
        self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"},
316
            self.to_dict(chkmap))
317
318
    def test_init_root_is_LeafNode_new(self):
319
        chk_bytes = self.get_chk_bytes()
320
        chkmap = CHKMap(chk_bytes, None)
321
        self.assertIsInstance(chkmap._root_node, LeafNode)
322
        self.assertEqual({}, self.to_dict(chkmap))
323
        self.assertEqual(0, len(chkmap))
324
325
    def test_init_and_save_new(self):
326
        chk_bytes = self.get_chk_bytes()
327
        chkmap = CHKMap(chk_bytes, None)
328
        key = chkmap._save()
329
        leaf_node = LeafNode()
330
        self.assertEqual([key], leaf_node.serialise(chk_bytes))
331
332
    def test_map_first_item_new(self):
333
        chk_bytes = self.get_chk_bytes()
334
        chkmap = CHKMap(chk_bytes, None)
335
        chkmap.map(("foo,",), "bar")
336
        self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap))
337
        self.assertEqual(1, len(chkmap))
338
        key = chkmap._save()
339
        leaf_node = LeafNode()
340
        leaf_node.map(chk_bytes, ("foo,",), "bar")
341
        self.assertEqual([key], leaf_node.serialise(chk_bytes))
342
343
    def test_unmap_last_item_root_is_leaf_new(self):
344
        chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"})
345
        chkmap.unmap(("k1"*50,))
346
        chkmap.unmap(("k2"*50,))
347
        self.assertEqual(0, len(chkmap))
348
        self.assertEqual({}, self.to_dict(chkmap))
349
        key = chkmap._save()
350
        leaf_node = LeafNode()
351
        self.assertEqual([key], leaf_node.serialise(chkmap._store))
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
352
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
353
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
354
class TestLeafNode(TestCaseWithStore):
355
356
    def test_current_size_empty(self):
357
        node = LeafNode()
358
        self.assertEqual(15, node._current_size())
359
360
    def test_current_size_size_changed(self):
361
        node = LeafNode()
362
        node.set_maximum_size(10)
363
        self.assertEqual(16, node._current_size())
364
365
    def test_current_size_width_changed(self):
366
        node = LeafNode()
367
        node._key_width = 10
368
        self.assertEqual(16, node._current_size())
369
370
    def test_current_size_items(self):
371
        node = LeafNode()
372
        base_size = node._current_size()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
373
        node.map(None, ("foo bar",), "baz")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
374
        self.assertEqual(base_size + 12, node._current_size())
375
376
    def test_deserialise_empty(self):
377
        node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n", ("sha1:1234",))
378
        self.assertEqual(0, len(node))
379
        self.assertEqual(10, node.maximum_size)
380
        self.assertEqual(("sha1:1234",), node.key())
381
382
    def test_deserialise_items(self):
383
        node = LeafNode.deserialise(
384
            "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
385
        self.assertEqual(2, len(node))
386
        self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
3735.2.24 by Robert Collins
test_chk_map tests all passing.
387
            sorted(node.iteritems(None)))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
388
3735.2.25 by Robert Collins
CHKInventory core tests passing.
389
    def test_deserialise_item_with_null_width_1(self):
390
        node = LeafNode.deserialise(
391
            "chkleaf:\n0\n1\n2\nfoo\x00bar\x00baz\nquux\x00blarh\n",
392
            ("sha1:1234",))
393
        self.assertEqual(2, len(node))
394
        self.assertEqual([(("foo",), "bar\x00baz"), (("quux",), "blarh")],
395
            sorted(node.iteritems(None)))
396
397
    def test_deserialise_item_with_null_width_2(self):
398
        node = LeafNode.deserialise(
399
            "chkleaf:\n0\n2\n2\nfoo\x001\x00bar\x00baz\nquux\x00\x00blarh\n",
400
            ("sha1:1234",))
401
        self.assertEqual(2, len(node))
402
        self.assertEqual([(("foo", "1"), "bar\x00baz"), (("quux", ""), "blarh")],
403
            sorted(node.iteritems(None)))
404
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
405
    def test_iteritems_selected_one_of_two_items(self):
406
        node = LeafNode.deserialise(
407
            "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
408
        self.assertEqual(2, len(node))
409
        self.assertEqual([(("quux",), "blarh")],
3735.2.24 by Robert Collins
test_chk_map tests all passing.
410
            sorted(node.iteritems(None, [("quux",), ("qaz",)])))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
411
412
    def test_key_new(self):
413
        node = LeafNode()
414
        self.assertEqual(None, node.key())
415
416
    def test_key_after_map(self):
417
        node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n", ("sha1:1234",))
3735.2.24 by Robert Collins
test_chk_map tests all passing.
418
        node.map(None, ("foo bar",), "baz quux")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
419
        self.assertEqual(None, node.key())
420
421
    def test_key_after_unmap(self):
422
        node = LeafNode.deserialise(
423
            "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
3735.2.24 by Robert Collins
test_chk_map tests all passing.
424
        node.unmap(None, ("foo bar",))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
425
        self.assertEqual(None, node.key())
426
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
427
    def test_map_exceeding_max_size_only_entry_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
428
        node = LeafNode()
429
        node.set_maximum_size(10)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
430
        result = node.map(None, ("foo bar",), "baz quux")
431
        self.assertEqual(("foo bar", [("", node)]), result)
432
        self.assertTrue(10 < node._current_size())
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
433
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
434
    def test_map_exceeding_max_size_second_entry_early_difference_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
435
        node = LeafNode()
436
        node.set_maximum_size(10)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
437
        node.map(None, ("foo bar",), "baz quux")
438
        prefix, result = list(node.map(None, ("blue",), "red"))
439
        self.assertEqual("", prefix)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
440
        self.assertEqual(2, len(result))
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
441
        split_chars = set([result[0][0], result[1][0]])
442
        self.assertEqual(set(["f", "b"]), split_chars)
443
        nodes = dict(result)
444
        node = nodes["f"]
445
        self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None))
446
        self.assertEqual(10, node.maximum_size)
447
        self.assertEqual(1, node._key_width)
448
        node = nodes["b"]
449
        self.assertEqual({("blue",): "red"}, self.to_dict(node, None))
450
        self.assertEqual(10, node.maximum_size)
451
        self.assertEqual(1, node._key_width)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
452
453
    def test_map_first(self):
454
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
455
        result = node.map(None, ("foo bar",), "baz quux")
456
        self.assertEqual(("foo bar", [("", node)]), result)
457
        self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node, None))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
458
        self.assertEqual(1, len(node))
459
460
    def test_map_second(self):
461
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
462
        node.map(None, ("foo bar",), "baz quux")
463
        result = node.map(None, ("bingo",), "bango")
464
        self.assertEqual(("", [("", node)]), result)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
465
        self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
3735.2.24 by Robert Collins
test_chk_map tests all passing.
466
            self.to_dict(node, None))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
467
        self.assertEqual(2, len(node))
468
469
    def test_map_replacement(self):
470
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
471
        node.map(None, ("foo bar",), "baz quux")
472
        result = node.map(None, ("foo bar",), "bango")
473
        self.assertEqual(("foo bar", [("", node)]), result)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
474
        self.assertEqual({("foo bar",): "bango"},
3735.2.24 by Robert Collins
test_chk_map tests all passing.
475
            self.to_dict(node, None))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
476
        self.assertEqual(1, len(node))
477
478
    def test_serialise_empty(self):
479
        store = self.get_chk_bytes()
480
        node = LeafNode()
481
        node.set_maximum_size(10)
482
        expected_key = ("sha1:62cc3565b48b0e830216e652cf99c6bd6b05b4b9",)
483
        self.assertEqual([expected_key],
484
            list(node.serialise(store)))
485
        self.assertEqual("chkleaf:\n10\n1\n0\n", self.read_bytes(store, expected_key))
486
        self.assertEqual(expected_key, node.key())
487
488
    def test_serialise_items(self):
489
        store = self.get_chk_bytes()
490
        node = LeafNode()
491
        node.set_maximum_size(10)
3735.2.24 by Robert Collins
test_chk_map tests all passing.
492
        node.map(None, ("foo bar",), "baz quux")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
493
        expected_key = ("sha1:d44cb6f0299b7e047da7f9e98f810e98f1dce1a7",)
494
        self.assertEqual([expected_key],
495
            list(node.serialise(store)))
496
        self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\x00baz quux\n",
497
            self.read_bytes(store, expected_key))
498
        self.assertEqual(expected_key, node.key())
499
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
500
    def test_unique_serialised_prefix_empty_new(self):
501
        node = LeafNode()
502
        self.assertEqual("", node.unique_serialised_prefix())
503
504
    def test_unique_serialised_prefix_one_item_new(self):
505
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
506
        node.map(None, ("foo bar", "baz"), "baz quux")
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
507
        self.assertEqual("foo bar\x00baz", node.unique_serialised_prefix())
508
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
509
    def test_unmap_missing(self):
510
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
511
        self.assertRaises(KeyError, node.unmap, None, ("foo bar",))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
512
513
    def test_unmap_present(self):
514
        node = LeafNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
515
        node.map(None, ("foo bar",), "baz quux")
516
        result = node.unmap(None, ("foo bar",))
517
        self.assertEqual(node, result)
518
        self.assertEqual({}, self.to_dict(node, None))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
519
        self.assertEqual(0, len(node))
520
521
522
class TestInternalNode(TestCaseWithStore):
523
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
524
    def test_add_node_empty_new(self):
525
        node = InternalNode()
526
        child = LeafNode()
527
        child.set_maximum_size(100)
528
        child.map(None, ("foo",), "bar")
529
        node.add_node("foo", child)
530
        # Note that node isn't strictly valid now as a tree (only one child),
531
        # but thats ok for this test.
532
        # The first child defines the node's width:
533
        self.assertEqual(3, node._node_width)
534
        # We should be able to iterate over the contents without doing IO.
535
        self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None))
536
        # The length should be known:
537
        self.assertEqual(1, len(node))
538
        # serialising the node should serialise the child and the node.
539
        chk_bytes = self.get_chk_bytes()
540
        keys = list(node.serialise(chk_bytes))
541
        child_key = child.serialise(chk_bytes)[0]
542
        self.assertEqual(
543
            [child_key, ('sha1:db23b260c2bf46bf7446c39f91668900a2491610',)],
544
            keys)
545
        # We should be able to access deserialised content.
546
        bytes = self.read_bytes(chk_bytes, keys[1])
547
        node = _deserialise(bytes, keys[1])
548
        self.assertEqual(1, len(node))
549
        self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes))
550
        self.assertEqual(3, node._node_width)
551
552
    def test_add_node_resets_key_new(self):
553
        node = InternalNode()
554
        child = LeafNode()
555
        child.set_maximum_size(100)
556
        child.map(None, ("foo",), "bar")
557
        node.add_node("foo", child)
558
        chk_bytes = self.get_chk_bytes()
559
        keys = list(node.serialise(chk_bytes))
560
        self.assertEqual(keys[1], node._key)
561
        node.add_node("fos", child)
562
        self.assertEqual(None, node._key)
563
564
#    def test_add_node_empty_oversized_one_ok_new(self):
565
#    def test_add_node_one_oversized_second_kept_minimum_fan(self):
566
#    def test_add_node_two_oversized_third_kept_minimum_fan(self):
567
#    def test_add_node_one_oversized_second_splits_errors(self):
568
569
    def test_iteritems_empty_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
570
        node = InternalNode()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
571
        self.assertEqual([], sorted(node.iteritems(None)))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
572
573
    def test_iteritems_two_children(self):
574
        node = InternalNode()
575
        leaf1 = LeafNode()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
576
        leaf1.map(None, ('foo bar',), 'quux')
577
        leaf2 = LeafNode()
578
        leaf2.map(None, ('strange',), 'beast')
3735.2.24 by Robert Collins
test_chk_map tests all passing.
579
        node.add_node("f", leaf1)
580
        node.add_node("s", leaf2)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
581
        self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
3735.2.24 by Robert Collins
test_chk_map tests all passing.
582
            sorted(node.iteritems(None)))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
583
584
    def test_iteritems_two_children_partial(self):
585
        node = InternalNode()
3735.2.24 by Robert Collins
test_chk_map tests all passing.
586
        leaf1 = LeafNode()
587
        leaf1.map(None, ('foo bar',), 'quux')
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
588
        leaf2 = LeafNode()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
589
        leaf2.map(None, ('strange',), 'beast')
3735.2.24 by Robert Collins
test_chk_map tests all passing.
590
        node.add_node("f", leaf1)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
591
        # This sets up a path that should not be followed - it will error if
592
        # the code tries to.
3735.2.24 by Robert Collins
test_chk_map tests all passing.
593
        node._items['f'] = None
594
        node.add_node("s", leaf2)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
595
        self.assertEqual([(('strange',), 'beast')],
3735.2.24 by Robert Collins
test_chk_map tests all passing.
596
            sorted(node.iteritems(None, [('strange',), ('weird',)])))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
597
598
    def test_iteritems_partial_empty(self):
599
        node = InternalNode()
600
        self.assertEqual([], sorted(node.iteritems([('missing',)])))
601
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
602
    def test_map_to_new_child_new(self):
603
        chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10)
604
        chkmap._ensure_root()
605
        node = chkmap._root_node
606
        # Ensure test validity: nothing paged in below the root.
607
        self.assertEqual(2,
608
            len([value for value in node._items.values()
609
                if type(value) == tuple]))
610
        # now, mapping to k3 should add a k3 leaf
611
        prefix, nodes = node.map(None, ('k3',), 'quux')
612
        self.assertEqual("k", prefix)
613
        self.assertEqual([("", node)], nodes)
614
        # check new child details
615
        child = node._items['k3']
616
        self.assertIsInstance(child, LeafNode)
617
        self.assertEqual(1, len(child))
618
        self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None))
619
        self.assertEqual(None, child._key)
620
        self.assertEqual(10, child.maximum_size)
621
        self.assertEqual(1, child._key_width)
622
        # Check overall structure:
623
        self.assertEqual(3, len(chkmap))
624
        self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'},
625
            self.to_dict(chkmap))
626
        # serialising should only serialise the new data - k3 and the internal
627
        # node.
628
        keys = list(node.serialise(chkmap._store))
629
        child_key = child.serialise(chkmap._store)[0]
630
        self.assertEqual([child_key, keys[1]], keys)
631
632
    def test_map_to_child_child_splits_new(self):
633
        chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10)
634
        # Check for the canonical root value for this tree:
635
        self.assertEqual(('sha1:d3f06fc03d8f50845894d8d04cc5a3f47e62948d',),
636
            chkmap._root_node)
637
        chkmap._ensure_root()
638
        node = chkmap._root_node
639
        # Ensure test validity: nothing paged in below the root.
640
        self.assertEqual(2,
641
            len([value for value in node._items.values()
642
                if type(value) == tuple]))
643
        # now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
644
        # k23, which for simplicity in the current implementation generates
645
        # a new internal node between node, and k22/k23.
646
        prefix, nodes = node.map(chkmap._store, ('k23',), 'quux')
647
        self.assertEqual("k", prefix)
648
        self.assertEqual([("", node)], nodes)
649
        # check new child details
650
        child = node._items['k2']
651
        self.assertIsInstance(child, InternalNode)
652
        self.assertEqual(2, len(child))
653
        self.assertEqual({('k22',): 'bar', ('k23',): 'quux'},
654
            self.to_dict(child, None))
655
        self.assertEqual(None, child._key)
656
        self.assertEqual(10, child.maximum_size)
657
        self.assertEqual(1, child._key_width)
658
        self.assertEqual(3, child._node_width)
659
        # Check overall structure:
660
        self.assertEqual(3, len(chkmap))
661
        self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'},
662
            self.to_dict(chkmap))
663
        # serialising should only serialise the new data - although k22 hasn't
664
        # changed because its a special corner case (splitting on with only one
665
        # key leaves one node unaltered), in general k22 is serialised, so we
666
        # expect k22, k23, the new internal node, and node, to be serialised.
667
        keys = list(node.serialise(chkmap._store))
668
        child_key = child._key
669
        k22_key = child._items['k22']._key
670
        k23_key = child._items['k23']._key
671
        self.assertEqual([k22_key, k23_key, child_key, keys[-1]], keys)
672
        self.assertEqual(('sha1:d68cd97c95e847d3dc58c05537aa5fdcdf2cf5da',),
673
            keys[-1])
674
675
    def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
676
        chkmap = self._get_map(
677
            {('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
678
        # Check we have the expected tree.
679
        self.assertEqual(('sha1:d68cd97c95e847d3dc58c05537aa5fdcdf2cf5da',),
680
            chkmap._root_node)
681
        chkmap._ensure_root()
682
        node = chkmap._root_node
683
        # unmapping k23 should give us a root, with k1 and k22 as direct
684
        # children.
685
        result = node.unmap(chkmap._store, ('k23',))
686
        # check the pointed-at object within node - k2 should now point at the
687
        # k22 leaf (which should not even have been paged in).
688
        ptr = node._items['k2']
689
        self.assertIsInstance(ptr, tuple)
690
        child = _deserialise(self.read_bytes(chkmap._store, ptr), ptr)
691
        self.assertIsInstance(child, LeafNode)
692
        self.assertEqual(1, len(child))
693
        self.assertEqual({('k22',): 'bar'},
694
            self.to_dict(child, None))
695
        # Check overall structure is instact:
696
        self.assertEqual(2, len(chkmap))
697
        self.assertEqual({('k1',): 'foo', ('k22',): 'bar'},
698
            self.to_dict(chkmap))
699
        # serialising should only serialise the new data - the root node.
700
        keys = list(node.serialise(chkmap._store))
701
        self.assertEqual([keys[-1]], keys)
702
        self.assertEqual(('sha1:d3f06fc03d8f50845894d8d04cc5a3f47e62948d',), keys[-1])
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
703
3735.2.23 by Robert Collins
Test unmapping with one child left but multiple keys.
704
    def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
705
        chkmap = self._get_map(
706
            {('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
707
        # Check we have the expected tree.
708
        self.assertEqual(('sha1:d68cd97c95e847d3dc58c05537aa5fdcdf2cf5da',),
709
            chkmap._root_node)
710
        chkmap._ensure_root()
711
        node = chkmap._root_node
712
        k2_ptr = node._items['k2']
713
        # unmapping k21 should give us a root, with k22 and k23 as direct
714
        # children, and should not have needed to page in the subtree.
715
        result = node.unmap(chkmap._store, ('k1',))
716
        self.assertEqual(k2_ptr, result)
717
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
718
719
# leaf:
720
# map -> fits - done
721
# map -> doesn't fit - shrink from left till fits
722
#        key data to return: the common prefix, new nodes.
723
724
# unmap -> how to tell if siblings can be combined.
725
#          combing leaf nodes means expanding the prefix to the left; so gather the size of
726
#          all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
727
#          is an internal node, we know that that is a dense subtree - can't combine.
728
#          otherwise as soon as the sum of serialised values exceeds the split threshold
729
#          we know we can't combine - stop.
730
# unmap -> key return data - space in node, common prefix length? and key count
731
# internal: 
732
# variable length prefixes? -> later start with fixed width to get something going
733
# map -> fits - update pointer to leaf
734
#        return [prefix and node] - seems sound.
735
# map -> doesn't fit - find unique prefix and shift right
736
#        create internal nodes for all the partitions, return list of unique
737
#        prefixes and nodes.
738
# map -> new prefix - create a leaf
739
# unmap -> if child key count 0, remove
740
# unmap -> return space in node, common prefix length? (why?), key count
741
# map:
742
# map, if 1 node returned, use it, otherwise make an internal and populate.
743
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
744
# code)
745
# map inits as empty leafnode.
746
# tools: 
747
# visualiser
748
749
750
# how to handle:
751
# AA, AB, AC, AD, BA
752
# packed internal node - ideal:
753
# AA, AB, AC, AD, BA
754
# single byte fanout - A,B,   AA,AB,AC,AD,     BA
755
# build order's:
756
# BA
757
# AB - split, but we want to end up with AB, BA, in one node, with 
758
# 1-4K get0