/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Tests for maps built on a CHK versionedfiles facility."""
18
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
19
from bzrlib.chk_map import (
20
    CHKMap,
21
    InternalNode,
22
    LeafNode,
23
    RootNode,
24
    ValueNode,
25
    _deserialise,
26
    )
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
27
from bzrlib.tests import TestCaseWithTransport
28
29
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
30
class TestCaseWithStore(TestCaseWithTransport):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
31
32
    def get_chk_bytes(self):
33
        # The eassiest way to get a CHK store is a development3 repository and
34
        # then work with the chk_bytes attribute directly.
35
        repo = self.make_repository(".", format="development3")
36
        repo.lock_write()
37
        self.addCleanup(repo.unlock)
38
        repo.start_write_group()
39
        self.addCleanup(repo.abort_write_group)
40
        return repo.chk_bytes
41
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
42
    def _get_map(self, a_dict, maximum_size=0):
43
        chk_bytes = self.get_chk_bytes()
44
        root_key = CHKMap.from_dict(chk_bytes, a_dict, maximum_size=maximum_size)
45
        chkmap = CHKMap(chk_bytes, root_key)
46
        return chkmap
47
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
48
    def read_bytes(self, chk_bytes, key):
49
        stream = chk_bytes.get_record_stream([key], 'unordered', True)
50
        return stream.next().get_bytes_as("fulltext")
51
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
52
    def to_dict(self, node, *args):
53
        return dict(node.iteritems(*args))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
54
55
56
class TestMap(TestCaseWithStore):
57
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
58
    def assertHasABMap(self, chk_bytes):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
59
        root_key = ('sha1:29f1da33ce2323d754485fd308abc5ff17f3856e',)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
60
        self.assertEqual(
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
61
            "chkroot:\n0\n1\na\x00sha1:cb29f32e561a1b7f862c38ccfd6bc7c7d892f04b\n",
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
62
            self.read_bytes(chk_bytes, root_key))
63
        self.assertEqual(
64
            "chkvalue:\nb",
65
            self.read_bytes(chk_bytes,
66
                ("sha1:cb29f32e561a1b7f862c38ccfd6bc7c7d892f04b",)))
67
68
    def assertHasEmptyMap(self, chk_bytes):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
69
        root_key = ('sha1:d0826cf765ff45bd602f602ecb0efbe375ea3b50',)
70
        self.assertEqual("chkroot:\n0\n0\n", self.read_bytes(chk_bytes, root_key))
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
71
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
72
    def test_from_dict_empty(self):
73
        chk_bytes = self.get_chk_bytes()
74
        root_key = CHKMap.from_dict(chk_bytes, {})
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
75
        self.assertEqual(('sha1:d0826cf765ff45bd602f602ecb0efbe375ea3b50',),
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
76
            root_key)
77
        self.assertHasEmptyMap(chk_bytes)
78
79
    def test_from_dict_ab(self):
80
        chk_bytes = self.get_chk_bytes()
81
        root_key = CHKMap.from_dict(chk_bytes, {"a":"b"})
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
82
        self.assertEqual(('sha1:29f1da33ce2323d754485fd308abc5ff17f3856e',),
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
83
            root_key)
84
        self.assertHasABMap(chk_bytes)
85
86
    def test_apply_empty_ab(self):
87
        # applying a delta (None, "a", "b") to an empty chkmap generates the
88
        # same map as from_dict_ab.
89
        chk_bytes = self.get_chk_bytes()
90
        root_key = CHKMap.from_dict(chk_bytes, {})
91
        chkmap = CHKMap(chk_bytes, root_key)
92
        new_root = chkmap.apply_delta([(None, "a", "b")])
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
93
        self.assertEqual(('sha1:29f1da33ce2323d754485fd308abc5ff17f3856e',),
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
94
            new_root)
95
        self.assertHasABMap(chk_bytes)
96
        # The update should have left us with an in memory root node, with an
97
        # updated key.
98
        self.assertEqual(new_root, chkmap._root_node._key)
99
100
    def test_apply_ab_empty(self):
101
        # applying a delta ("a", None, None) to an empty chkmap generates the
102
        # same map as from_dict_ab.
103
        chk_bytes = self.get_chk_bytes()
104
        root_key = CHKMap.from_dict(chk_bytes, {"a":"b"})
105
        chkmap = CHKMap(chk_bytes, root_key)
106
        new_root = chkmap.apply_delta([("a", None, None)])
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
107
        self.assertEqual(('sha1:d0826cf765ff45bd602f602ecb0efbe375ea3b50',),
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
108
            new_root)
109
        self.assertHasEmptyMap(chk_bytes)
110
        # The update should have left us with an in memory root node, with an
111
        # updated key.
112
        self.assertEqual(new_root, chkmap._root_node._key)
113
114
    def test_iteritems_empty(self):
115
        chk_bytes = self.get_chk_bytes()
116
        root_key = CHKMap.from_dict(chk_bytes, {})
117
        chkmap = CHKMap(chk_bytes, root_key)
118
        self.assertEqual([], list(chkmap.iteritems()))
119
120
    def test_iteritems_two_items(self):
121
        chk_bytes = self.get_chk_bytes()
122
        root_key = CHKMap.from_dict(chk_bytes,
123
            {"a":"content here", "b":"more content"})
124
        chkmap = CHKMap(chk_bytes, root_key)
125
        self.assertEqual([("a", "content here"), ("b", "more content")],
126
            sorted(list(chkmap.iteritems())))
127
3735.2.9 by Robert Collins
Get a working chk_map using inventory implementation bootstrapped.
128
    def test_iteritems_selected_one_of_two_items(self):
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
129
        chkmap = self._get_map( {"a":"content here", "b":"more content"})
3735.2.9 by Robert Collins
Get a working chk_map using inventory implementation bootstrapped.
130
        self.assertEqual([("a", "content here")],
131
            sorted(list(chkmap.iteritems(["a"]))))
132
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
133
    def test___len__empty(self):
134
        chkmap = self._get_map({})
135
        self.assertEqual(0, len(chkmap))
136
137
    def test___len__2(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
138
        chkmap = self._get_map({"foo":"bar", "gam":"quux"})
139
        self.assertEqual(2, len(chkmap))
140
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
141
    def test_max_size_100_bytes_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
142
        # When there is a 100 byte upper node limit, a tree is formed.
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
143
        chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
144
        # We expect three nodes:
145
        # A root, with two children, and with two key prefixes - k1 to one, and
146
        # k2 to the other as our node splitting is only just being developed.
147
        # The maximum size should be embedded
148
        chkmap._ensure_root()
149
        self.assertEqual(100, chkmap._root_node.maximum_size)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
150
        self.assertEqual(1, chkmap._root_node._key_width)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
151
        # There should be two child nodes, and prefix of 2(bytes):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
152
        self.assertEqual(2, len(chkmap._root_node._items))
153
        self.assertEqual("k", chkmap._root_node.unique_serialised_prefix())
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
154
        # The actual nodes pointed at will change as serialisers change; so
155
        # here we test that the key prefix is correct; then load the nodes and
156
        # check they have the right pointed at key; whether they have the
157
        # pointed at value inline or not is also unrelated to this test so we
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
158
        # don't check that in detail - rather we just check the aggregate
159
        # value.
160
        nodes = sorted(chkmap._root_node._items.items())
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
161
        ptr1 = nodes[0]
162
        ptr2 = nodes[1]
163
        self.assertEqual('k1', ptr1[0])
164
        self.assertEqual('k2', ptr2[0])
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
165
        node1 = _deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1])
166
        self.assertIsInstance(node1, LeafNode)
167
        self.assertEqual(1, len(node1))
168
        self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store))
169
        node2 = _deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1])
170
        self.assertIsInstance(node2, LeafNode)
171
        self.assertEqual(1, len(node2))
172
        self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
173
        # Having checked we have a good structure, check that the content is
174
        # still accessible.
175
        self.assertEqual(2, len(chkmap))
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
176
        self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"},
177
            self.to_dict(chkmap))
178
179
    def test_init_root_is_LeafNode_new(self):
180
        chk_bytes = self.get_chk_bytes()
181
        chkmap = CHKMap(chk_bytes, None)
182
        self.assertIsInstance(chkmap._root_node, LeafNode)
183
        self.assertEqual({}, self.to_dict(chkmap))
184
        self.assertEqual(0, len(chkmap))
185
186
    def test_init_and_save_new(self):
187
        chk_bytes = self.get_chk_bytes()
188
        chkmap = CHKMap(chk_bytes, None)
189
        key = chkmap._save()
190
        leaf_node = LeafNode()
191
        self.assertEqual([key], leaf_node.serialise(chk_bytes))
192
193
    def test_map_first_item_new(self):
194
        chk_bytes = self.get_chk_bytes()
195
        chkmap = CHKMap(chk_bytes, None)
196
        chkmap.map(("foo,",), "bar")
197
        self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap))
198
        self.assertEqual(1, len(chkmap))
199
        key = chkmap._save()
200
        leaf_node = LeafNode()
201
        leaf_node.map(chk_bytes, ("foo,",), "bar")
202
        self.assertEqual([key], leaf_node.serialise(chk_bytes))
203
204
    def test_unmap_last_item_root_is_leaf_new(self):
205
        chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"})
206
        chkmap.unmap(("k1"*50,))
207
        chkmap.unmap(("k2"*50,))
208
        self.assertEqual(0, len(chkmap))
209
        self.assertEqual({}, self.to_dict(chkmap))
210
        key = chkmap._save()
211
        leaf_node = LeafNode()
212
        self.assertEqual([key], leaf_node.serialise(chkmap._store))
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
213
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
214
215
class TestRootNode(TestCaseWithTransport):
216
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
217
    def test__current_size(self):
218
        node = RootNode()
219
        self.assertEqual(15, node._current_size())
220
        node.add_child("cd", ("sha1:12345",))
221
        self.assertEqual(29, node._current_size())
222
        self.assertEqual(29, len(node.serialise()))
223
        node.add_child("cd", ("sha1:123456",))
224
        self.assertEqual(30, node._current_size())
225
        self.assertEqual(30, len(node.serialise()))
226
        node.remove_child("cd")
227
        self.assertEqual(15, node._current_size())
228
        self.assertEqual(15, len(node.serialise()))
229
        node.set_maximum_size(100)
230
        self.assertEqual(17, node._current_size())
231
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
232
    def test_serialise_empty(self):
233
        node = RootNode()
234
        bytes = node.serialise()
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
235
        self.assertEqual("chkroot:\n0\n0\n0\n", bytes)
236
237
    def test_add_child_over_limit(self):
238
        node = RootNode()
239
        node.set_maximum_size(20)
240
        node.add_child("abcdef", ("sha1:12345",))
241
        size = node._current_size()
242
        self.assertTrue(20 < size)
243
        self.assertEqual(False, node.add_child("12345", ("sha1:34",)))
244
        # Nothing should have changed
245
        self.assertEqual(size, node._current_size())
246
        self.assertEqual(1, len(node))
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
247
248
    def test_add_child_resets_key(self):
249
        node = RootNode()
250
        node._key = ("something",)
251
        node.add_child("c", ("sha1:1234",))
252
        self.assertEqual(None, node._key)
253
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
254
    def test_add_child_returns_True(self):
255
        node = RootNode()
256
        node._key = ("something",)
257
        self.assertEqual(True, node.add_child("c", ("sha1:1234",)))
258
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
259
    def test_add_child_increases_len(self):
260
        node = RootNode()
261
        node._key = ("something",)
262
        node.add_child("c", ("sha1:1234",))
263
        self.assertEqual(1, len(node))
264
265
    def test_remove_child_decreases_len(self):
266
        node = RootNode()
267
        node.add_child("c", ("sha1:1234",))
268
        node._key = ("something",)
269
        node.remove_child("c")
270
        self.assertEqual(0, len(node))
271
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
272
    def test_remove_child_removes_child(self):
273
        node = RootNode()
274
        node.add_child("a", ("sha1:4321",))
275
        node.add_child("c", ("sha1:1234",))
276
        node._key = ("something",)
277
        node.remove_child("a")
278
        self.assertEqual({"c":("sha1:1234",)}, node._nodes)
279
280
    def test_remove_child_resets_key(self):
281
        node = RootNode()
282
        node.add_child("c", ("sha1:1234",))
283
        node._key = ("something",)
284
        node.remove_child("c")
285
        self.assertEqual(None, node._key)
286
287
    def test_deserialise(self):
288
        # deserialising from a bytestring & key sets the nodes and the known
289
        # key.
290
        node = RootNode()
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
291
        node.deserialise("chkroot:\n0\n0\n1\nc\x00sha1:1234\n", ("foo",))
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
292
        self.assertEqual({"c": ("sha1:1234",)}, node._nodes)
293
        self.assertEqual(("foo",), node._key)
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
294
        self.assertEqual(1, len(node))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
295
        self.assertEqual(0, node.maximum_size)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
296
297
    def test_serialise_with_child(self):
298
        node = RootNode()
299
        node.add_child("c", ("sha1:1234",))
300
        bytes = node.serialise()
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
301
        # type 0-max-length 1-value key\x00CHK
302
        self.assertEqual("chkroot:\n0\n0\n1\nc\x00sha1:1234\n", bytes)
303
304
    def test_deserialise_max_size(self):
305
        node = RootNode()
306
        node.deserialise("chkroot:\n100\n0\n1\nc\x00sha1:1234\n", ("foo",))
307
        self.assertEqual(100, node.maximum_size)
308
309
    def test_deserialise_key_prefix(self):
310
        node = RootNode()
311
        node.deserialise("chkroot:\n100\n10\n1\nc\x00sha1:1234\n", ("foo",))
312
        self.assertEqual(10, node.prefix_width)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
313
314
315
class TestValueNode(TestCaseWithTransport):
316
317
    def test_deserialise(self):
318
        node = ValueNode.deserialise("chkvalue:\nfoo bar baz\n")
319
        self.assertEqual("foo bar baz\n", node.value)
320
321
    def test_serialise(self):
322
        node = ValueNode("b")
323
        bytes = node.serialise()
324
        self.assertEqual("chkvalue:\nb", bytes)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
325
326
327
class TestLeafNode(TestCaseWithStore):
328
329
    def test_current_size_empty(self):
330
        node = LeafNode()
331
        self.assertEqual(15, node._current_size())
332
333
    def test_current_size_size_changed(self):
334
        node = LeafNode()
335
        node.set_maximum_size(10)
336
        self.assertEqual(16, node._current_size())
337
338
    def test_current_size_width_changed(self):
339
        node = LeafNode()
340
        node._key_width = 10
341
        self.assertEqual(16, node._current_size())
342
343
    def test_current_size_items(self):
344
        node = LeafNode()
345
        base_size = node._current_size()
346
        node = node.map(("foo bar",), "baz")
347
        self.assertEqual(base_size + 12, node._current_size())
348
349
    def test_deserialise_empty(self):
350
        node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n", ("sha1:1234",))
351
        self.assertEqual(0, len(node))
352
        self.assertEqual(10, node.maximum_size)
353
        self.assertEqual(("sha1:1234",), node.key())
354
355
    def test_deserialise_items(self):
356
        node = LeafNode.deserialise(
357
            "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
358
        self.assertEqual(2, len(node))
359
        self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
360
            sorted(node.iteritems()))
361
362
    def test_iteritems_selected_one_of_two_items(self):
363
        node = LeafNode.deserialise(
364
            "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
365
        self.assertEqual(2, len(node))
366
        self.assertEqual([(("quux",), "blarh")],
367
            sorted(node.iteritems([("quux",), ("qaz",)])))
368
369
    def test_key_new(self):
370
        node = LeafNode()
371
        self.assertEqual(None, node.key())
372
373
    def test_key_after_map(self):
374
        node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n", ("sha1:1234",))
375
        node = node.map(("foo bar",), "baz quux")
376
        self.assertEqual(None, node.key())
377
378
    def test_key_after_unmap(self):
379
        node = LeafNode.deserialise(
380
            "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
381
        node = node.unmap(("foo bar",))
382
        self.assertEqual(None, node.key())
383
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
384
    def test_map_exceeding_max_size_only_entry_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
385
        node = LeafNode()
386
        node.set_maximum_size(10)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
387
        result = node.map(None, ("foo bar",), "baz quux")
388
        self.assertEqual(("foo bar", [("", node)]), result)
389
        self.assertTrue(10 < node._current_size())
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
390
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
391
    def test_map_exceeding_max_size_second_entry_early_difference_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
392
        node = LeafNode()
393
        node.set_maximum_size(10)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
394
        node.map(None, ("foo bar",), "baz quux")
395
        prefix, result = list(node.map(None, ("blue",), "red"))
396
        self.assertEqual("", prefix)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
397
        self.assertEqual(2, len(result))
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
398
        split_chars = set([result[0][0], result[1][0]])
399
        self.assertEqual(set(["f", "b"]), split_chars)
400
        nodes = dict(result)
401
        node = nodes["f"]
402
        self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None))
403
        self.assertEqual(10, node.maximum_size)
404
        self.assertEqual(1, node._key_width)
405
        node = nodes["b"]
406
        self.assertEqual({("blue",): "red"}, self.to_dict(node, None))
407
        self.assertEqual(10, node.maximum_size)
408
        self.assertEqual(1, node._key_width)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
409
410
    def test_map_exceeding_max_size_second_entry_last_octect_changed(self):
411
        node = LeafNode()
412
        node.set_maximum_size(10)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
413
        node = node.map(None, ("foo bar",), "baz quux")
414
        result = node.map(None, ("foo baz",), "red")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
415
        self.assertIsInstance(result, InternalNode)
416
        # should have copied the data in:
417
        self.assertEqual(2, len(result))
418
        self.assertEqual({('foo baz',): 'red', ('foo bar',): 'baz quux'},
419
            self.to_dict(result))
420
        self.assertEqual(10, result.maximum_size)
421
        self.assertEqual(1, result._key_width)
422
423
    def test_map_first(self):
424
        node = LeafNode()
425
        result = node.map(("foo bar",), "baz quux")
426
        self.assertEqual(result, node)
427
        self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node))
428
        self.assertEqual(1, len(node))
429
430
    def test_map_second(self):
431
        node = LeafNode()
432
        node = node.map(("foo bar",), "baz quux")
433
        result = node.map(("bingo",), "bango")
434
        self.assertEqual(result, node)
435
        self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
436
            self.to_dict(node))
437
        self.assertEqual(2, len(node))
438
439
    def test_map_replacement(self):
440
        node = LeafNode()
441
        node = node.map(("foo bar",), "baz quux")
442
        result = node.map(("foo bar",), "bango")
443
        self.assertEqual(result, node)
444
        self.assertEqual({("foo bar",): "bango"},
445
            self.to_dict(node))
446
        self.assertEqual(1, len(node))
447
448
    def test_serialise_empty(self):
449
        store = self.get_chk_bytes()
450
        node = LeafNode()
451
        node.set_maximum_size(10)
452
        expected_key = ("sha1:62cc3565b48b0e830216e652cf99c6bd6b05b4b9",)
453
        self.assertEqual([expected_key],
454
            list(node.serialise(store)))
455
        self.assertEqual("chkleaf:\n10\n1\n0\n", self.read_bytes(store, expected_key))
456
        self.assertEqual(expected_key, node.key())
457
458
    def test_serialise_items(self):
459
        store = self.get_chk_bytes()
460
        node = LeafNode()
461
        node.set_maximum_size(10)
462
        node = node.map(("foo bar",), "baz quux")
463
        expected_key = ("sha1:d44cb6f0299b7e047da7f9e98f810e98f1dce1a7",)
464
        self.assertEqual([expected_key],
465
            list(node.serialise(store)))
466
        self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\x00baz quux\n",
467
            self.read_bytes(store, expected_key))
468
        self.assertEqual(expected_key, node.key())
469
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
470
    def test_unique_serialised_prefix_empty_new(self):
471
        node = LeafNode()
472
        self.assertEqual("", node.unique_serialised_prefix())
473
        return
474
475
    def test_unique_serialised_prefix_one_item_new(self):
476
        node = LeafNode()
477
        result = node.map(None, ("foo bar", "baz"), "baz quux")
478
        self.assertEqual("foo bar\x00baz", node.unique_serialised_prefix())
479
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
480
    def test_unmap_missing(self):
481
        node = LeafNode()
482
        self.assertRaises(KeyError, node.unmap, ("foo bar",))
483
484
    def test_unmap_present(self):
485
        node = LeafNode()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
486
        node = node.map(None, ("foo bar",), "baz quux")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
487
        result = node.unmap(("foo bar",))
488
        self.assertEqual(result, node)
489
        self.assertEqual({}, self.to_dict(node))
490
        self.assertEqual(0, len(node))
491
492
493
class TestInternalNode(TestCaseWithStore):
494
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
495
    def test_add_node_empty_new(self):
496
        node = InternalNode()
497
        child = LeafNode()
498
        child.set_maximum_size(100)
499
        child.map(None, ("foo",), "bar")
500
        node.add_node("foo", child)
501
        # Note that node isn't strictly valid now as a tree (only one child),
502
        # but thats ok for this test.
503
        # The first child defines the node's width:
504
        self.assertEqual(3, node._node_width)
505
        # We should be able to iterate over the contents without doing IO.
506
        self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None))
507
        # The length should be known:
508
        self.assertEqual(1, len(node))
509
        # serialising the node should serialise the child and the node.
510
        chk_bytes = self.get_chk_bytes()
511
        keys = list(node.serialise(chk_bytes))
512
        child_key = child.serialise(chk_bytes)[0]
513
        self.assertEqual(
514
            [child_key, ('sha1:db23b260c2bf46bf7446c39f91668900a2491610',)],
515
            keys)
516
        # We should be able to access deserialised content.
517
        bytes = self.read_bytes(chk_bytes, keys[1])
518
        node = _deserialise(bytes, keys[1])
519
        self.assertEqual(1, len(node))
520
        self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes))
521
        self.assertEqual(3, node._node_width)
522
523
    def test_add_node_resets_key_new(self):
524
        node = InternalNode()
525
        child = LeafNode()
526
        child.set_maximum_size(100)
527
        child.map(None, ("foo",), "bar")
528
        node.add_node("foo", child)
529
        chk_bytes = self.get_chk_bytes()
530
        keys = list(node.serialise(chk_bytes))
531
        self.assertEqual(keys[1], node._key)
532
        node.add_node("fos", child)
533
        self.assertEqual(None, node._key)
534
535
#    def test_add_node_empty_oversized_one_ok_new(self):
536
#    def test_add_node_one_oversized_second_kept_minimum_fan(self):
537
#    def test_add_node_two_oversized_third_kept_minimum_fan(self):
538
#    def test_add_node_one_oversized_second_splits_errors(self):
539
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
540
    def test_add_node_empty_oversized_no_common_sets_prefix(self):
541
        # adding a node with two children that is oversized will generate two
542
        # new leaf nodes, and a prefix width that cuts one byte off the longest
543
        # key (because that is sufficient to guarantee a split
544
        overpacked = LeafNode()
545
        overpacked.set_maximum_size(10)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
546
        overpacked.map(None, ("foo bar",), "baz")
547
        overpacked.map(None, ("strange thing",), "it is")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
548
        # at this point, map returned a new internal node that is already
549
        # packed, but that should have preserved the old node due to the 
550
        # functional idioms.. check to be sure:
551
        self.assertTrue(overpacked.maximum_size < overpacked._current_size())
552
        node = InternalNode()
553
        # We're not testing that the internal node rebalances yet
554
        node.set_maximum_size(0)
555
        node._add_node(overpacked)
556
        # 13 is the length of strange_thing serialised; as there is no node size
557
        # set, we pack the internal node as densely as possible.
558
        self.assertEqual(13, node._node_width)
559
        self.assertEqual(set(["strange thing", "foo bar\x00\x00\x00\x00\x00\x00"]),
560
            set(node._items.keys()))
561
        self.assertEqual(2, len(node))
562
        self.assertEqual({('strange thing',): 'it is'},
563
            self.to_dict(node._items["strange thing"]))
564
        self.assertEqual({('foo bar',): 'baz'},
565
            self.to_dict(node._items["foo bar\x00\x00\x00\x00\x00\x00"]))
566
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
567
    def test_iteritems_empty_new(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
568
        node = InternalNode()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
569
        self.assertEqual([], sorted(node.iteritems(None)))
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
570
571
    def test_iteritems_two_children(self):
572
        node = InternalNode()
573
        leaf1 = LeafNode()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
574
        leaf1.map(None, ('foo bar',), 'quux')
575
        leaf2 = LeafNode()
576
        leaf2 = LeafNode()
577
        leaf2.map(None, ('strange',), 'beast')
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
578
        node._items['foo ba'] = leaf1
579
        node._items['strang'] = leaf2
580
        self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
581
            sorted(node.iteritems()))
582
583
    def test_iteritems_two_children_partial(self):
584
        node = InternalNode()
585
        leaf2 = LeafNode()
586
        leaf2 = LeafNode()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
587
        leaf2.map(None, ('strange',), 'beast')
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
588
        # This sets up a path that should not be followed - it will error if
589
        # the code tries to.
590
        node._items['foo ba'] = None
591
        node._items['strang'] = leaf2
592
        node._node_width = 6
593
        self.assertEqual([(('strange',), 'beast')],
594
            sorted(node.iteritems([('strange',), ('weird',)])))
595
596
    def test_iteritems_partial_empty(self):
597
        node = InternalNode()
598
        self.assertEqual([], sorted(node.iteritems([('missing',)])))
599
600
    def test_map_to_existing_child(self):
601
        # mapping a new key which is in a child of an internal node maps
602
        # recursively.
603
        overpacked = LeafNode()
604
        overpacked.set_maximum_size(10)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
605
        overpacked.map(None, ("foo bar",), "baz")
606
        node = overpacked.map(None, ("foo baz",), "it is")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
607
        self.assertIsInstance(node, InternalNode)
608
        # Now, increase the maximum size limit on the subnode for foo bar
609
        child = node._items[node._serialised_key(("foo bar",))]
610
        child.set_maximum_size(200)
611
        # And map a new key into node, which will land in the same child node
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
612
        result = node.map(None, ("foo bar baz",), "new value")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
613
        self.assertTrue(result is node)
614
        self.assertEqual(3, len(result))
615
        self.assertEqual(2, len(child))
616
        self.assertEqual({('foo bar',): 'baz',
617
            ('foo bar baz',): 'new value', ('foo baz',): 'it is'},
618
            self.to_dict(node))
619
620
    def test_map_to_existing_child_exceed_child_size_not_internal_size(self):
621
        # mapping a new key which is in a child of an internal node maps
622
        # recursively, and when the child splits that is accomodated within the
623
        # internal node if there is room for another child pointer.
624
        overpacked = LeafNode()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
625
        # 3 pointers, 7 bytes offset, 45 byte pointers, + prelude.
626
        overpacked.set_maximum_size(180)
627
        overpacked.map(None, ("foo bar",), "baz " * 40)
628
        node = overpacked.map(None, ("foo baz",), "itis" * 40)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
629
        self.assertIsInstance(node, InternalNode)
630
        # And map a new key into node, which will land in the same child path
631
        # within node, but trigger a spill event on the child, and should end
632
        # up with 3 pointers in node (as the pointers can fit in the node
633
        # space.
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
634
        result = node.map(None, ("foo bar baz",), "new " * 60)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
635
        self.assertTrue(result is node)
636
        self.assertEqual(3, len(result))
637
        # We should have one child for foo bar
638
        child = node._items[node._serialised_key(("foo bar\x00",))]
639
        self.assertIsInstance(child, LeafNode)
640
        self.assertEqual(1, len(child))
641
        # And one for 'foo bar '
642
        child = node._items[node._serialised_key(("foo bar ",))]
643
        self.assertIsInstance(child, LeafNode)
644
        self.assertEqual(1, len(child))
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
645
        self.assertEqual({('foo bar',): 'baz ' * 60,
646
            ('foo bar baz',): 'new ' * 60,
647
            ('foo baz',): 'itis' * 60},
648
            self.to_dict(node))
649
650
    def test_map_to_new_child_new(self):
651
        chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10)
652
        chkmap._ensure_root()
653
        node = chkmap._root_node
654
        # Ensure test validity: nothing paged in below the root.
655
        self.assertEqual(2,
656
            len([value for value in node._items.values()
657
                if type(value) == tuple]))
658
        # now, mapping to k3 should add a k3 leaf
659
        prefix, nodes = node.map(None, ('k3',), 'quux')
660
        self.assertEqual("k", prefix)
661
        self.assertEqual([("", node)], nodes)
662
        # check new child details
663
        child = node._items['k3']
664
        self.assertIsInstance(child, LeafNode)
665
        self.assertEqual(1, len(child))
666
        self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None))
667
        self.assertEqual(None, child._key)
668
        self.assertEqual(10, child.maximum_size)
669
        self.assertEqual(1, child._key_width)
670
        # Check overall structure:
671
        self.assertEqual(3, len(chkmap))
672
        self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'},
673
            self.to_dict(chkmap))
674
        # serialising should only serialise the new data - k3 and the internal
675
        # node.
676
        keys = list(node.serialise(chkmap._store))
677
        child_key = child.serialise(chkmap._store)[0]
678
        self.assertEqual([child_key, keys[1]], keys)
679
680
    def test_map_to_child_child_splits_new(self):
681
        chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10)
682
        # Check for the canonical root value for this tree:
683
        self.assertEqual(('sha1:d3f06fc03d8f50845894d8d04cc5a3f47e62948d',),
684
            chkmap._root_node)
685
        chkmap._ensure_root()
686
        node = chkmap._root_node
687
        # Ensure test validity: nothing paged in below the root.
688
        self.assertEqual(2,
689
            len([value for value in node._items.values()
690
                if type(value) == tuple]))
691
        # now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
692
        # k23, which for simplicity in the current implementation generates
693
        # a new internal node between node, and k22/k23.
694
        prefix, nodes = node.map(chkmap._store, ('k23',), 'quux')
695
        self.assertEqual("k", prefix)
696
        self.assertEqual([("", node)], nodes)
697
        # check new child details
698
        child = node._items['k2']
699
        self.assertIsInstance(child, InternalNode)
700
        self.assertEqual(2, len(child))
701
        self.assertEqual({('k22',): 'bar', ('k23',): 'quux'},
702
            self.to_dict(child, None))
703
        self.assertEqual(None, child._key)
704
        self.assertEqual(10, child.maximum_size)
705
        self.assertEqual(1, child._key_width)
706
        self.assertEqual(3, child._node_width)
707
        # Check overall structure:
708
        self.assertEqual(3, len(chkmap))
709
        self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'},
710
            self.to_dict(chkmap))
711
        # serialising should only serialise the new data - although k22 hasn't
712
        # changed because its a special corner case (splitting on with only one
713
        # key leaves one node unaltered), in general k22 is serialised, so we
714
        # expect k22, k23, the new internal node, and node, to be serialised.
715
        keys = list(node.serialise(chkmap._store))
716
        child_key = child._key
717
        k22_key = child._items['k22']._key
718
        k23_key = child._items['k23']._key
719
        self.assertEqual([k22_key, k23_key, child_key, keys[-1]], keys)
720
        self.assertEqual(('sha1:d68cd97c95e847d3dc58c05537aa5fdcdf2cf5da',),
721
            keys[-1])
722
723
    def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
724
        chkmap = self._get_map(
725
            {('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
726
        # Check we have the expected tree.
727
        self.assertEqual(('sha1:d68cd97c95e847d3dc58c05537aa5fdcdf2cf5da',),
728
            chkmap._root_node)
729
        chkmap._ensure_root()
730
        node = chkmap._root_node
731
        # unmapping k23 should give us a root, with k1 and k22 as direct
732
        # children.
733
        result = node.unmap(chkmap._store, ('k23',))
734
        # check the pointed-at object within node - k2 should now point at the
735
        # k22 leaf (which should not even have been paged in).
736
        ptr = node._items['k2']
737
        self.assertIsInstance(ptr, tuple)
738
        child = _deserialise(self.read_bytes(chkmap._store, ptr), ptr)
739
        self.assertIsInstance(child, LeafNode)
740
        self.assertEqual(1, len(child))
741
        self.assertEqual({('k22',): 'bar'},
742
            self.to_dict(child, None))
743
        # Check overall structure is instact:
744
        self.assertEqual(2, len(chkmap))
745
        self.assertEqual({('k1',): 'foo', ('k22',): 'bar'},
746
            self.to_dict(chkmap))
747
        # serialising should only serialise the new data - the root node.
748
        keys = list(node.serialise(chkmap._store))
749
        self.assertEqual([keys[-1]], keys)
750
        self.assertEqual(('sha1:d3f06fc03d8f50845894d8d04cc5a3f47e62948d',), keys[-1])
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
751
752
    def test_unmap_second_last_shrinks_to_other_branch(self):
753
        # unmapping the second last child of an internal node downgrades it to
754
        # a leaf node.
755
        overpacked = LeafNode()
756
        overpacked.set_maximum_size(10)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
757
        overpacked.map(None, ("foo bar",), "baz")
758
        node = overpacked.map(None, ("strange thing",), "it is")
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
759
        self.assertIsInstance(node, InternalNode)
760
        result = node.unmap(("foo bar",))
761
        self.assertIsInstance(result, LeafNode)
762
        self.assertEqual({("strange thing",): "it is"}, self.to_dict(result))
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
763
764
765
# leaf:
766
# map -> fits - done
767
# map -> doesn't fit - shrink from left till fits
768
#        key data to return: the common prefix, new nodes.
769
770
# unmap -> how to tell if siblings can be combined.
771
#          combing leaf nodes means expanding the prefix to the left; so gather the size of
772
#          all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
773
#          is an internal node, we know that that is a dense subtree - can't combine.
774
#          otherwise as soon as the sum of serialised values exceeds the split threshold
775
#          we know we can't combine - stop.
776
# unmap -> key return data - space in node, common prefix length? and key count
777
# internal: 
778
# variable length prefixes? -> later start with fixed width to get something going
779
# map -> fits - update pointer to leaf
780
#        return [prefix and node] - seems sound.
781
# map -> doesn't fit - find unique prefix and shift right
782
#        create internal nodes for all the partitions, return list of unique
783
#        prefixes and nodes.
784
# map -> new prefix - create a leaf
785
# unmap -> if child key count 0, remove
786
# unmap -> return space in node, common prefix length? (why?), key count
787
# map:
788
# map, if 1 node returned, use it, otherwise make an internal and populate.
789
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
790
# code)
791
# map inits as empty leafnode.
792
# tools: 
793
# visualiser
794
795
796
# how to handle:
797
# AA, AB, AC, AD, BA
798
# packed internal node - ideal:
799
# AA, AB, AC, AD, BA
800
# single byte fanout - A,B,   AA,AB,AC,AD,     BA
801
# build order's:
802
# BA
803
# AB - split, but we want to end up with AB, BA, in one node, with 
804
# 1-4K get0