/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
17
"""Persistent maps from tuple_of_strings->string using CHK stores.
18
19
Overview and current status:
20
21
The CHKMap class implements a dict from tuple_of_strings->string by using a trie
22
with internal nodes of 8-bit fan out; The key tuples are mapped to strings by
23
joining them by \x00, and \x00 padding shorter keys out to the length of the
24
longest key. Leaf nodes are packed as densely as possible, and internal nodes
25
are all and additional 8-bits wide leading to a sparse upper tree. 
26
27
Updates to a CHKMap are done preferentially via the apply_delta method, to
28
allow optimisation of the update operation; but individual map/unmap calls are
29
possible and supported. All changes via map/unmap are buffered in memory until
30
the _save method is called to force serialisation of the tree. apply_delta
31
performs a _save implicitly.
32
33
TODO:
34
-----
35
36
Densely packed upper nodes.
37
38
"""
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
39
40
import osutils
41
42
43
class CHKMap(object):
44
    """A persistent map from string to string backed by a CHK store."""
45
46
    def __init__(self, store, root_key):
47
        """Create a CHKMap object.
48
49
        :param store: The store the CHKMap is stored in.
50
        :param root_key: The root key of the map. None to create an empty
51
            CHKMap.
52
        """
53
        self._store = store
54
        if root_key is None:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
55
            self._root_node = LeafNode()
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
56
        else:
57
            self._root_node = root_key
58
59
    def apply_delta(self, delta):
60
        """Apply a delta to the map.
61
62
        :param delta: An iterable of old_key, new_key, new_value tuples.
63
            If new_key is not None, then new_key->new_value is inserted
64
            into the map; if old_key is not None, then the old mapping
65
            of old_key is removed.
66
        """
67
        for old, new, value in delta:
68
            if old is not None and old != new:
69
                # unmap
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
70
                self.unmap(old)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
71
        for old, new, value in delta:
72
            if new is not None:
73
                # map
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
74
                self.map(new, value)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
75
        return self._save()
76
77
    def _ensure_root(self):
78
        """Ensure that the root node is an object not a key."""
79
        if type(self._root_node) == tuple:
80
            # Demand-load the root
81
            bytes = self._read_bytes(self._root_node)
82
            root_key = self._root_node
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
83
            self._root_node = _deserialise(bytes, root_key)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
84
85
    def _read_bytes(self, key):
86
        stream = self._store.get_record_stream([key], 'unordered', True)
87
        return stream.next().get_bytes_as('fulltext')
88
89
    @classmethod
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
90
    def from_dict(klass, store, initial_value, maximum_size=0):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
91
        """Create a CHKMap in store with initial_value as the content.
92
        
93
        :param store: The store to record initial_value in, a VersionedFiles
94
            object with 1-tuple keys supporting CHK key generation.
95
        :param initial_value: A dict to store in store. Its keys and values
96
            must be bytestrings.
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
97
        :param maximum_size: The maximum_size rule to apply to nodes. This
98
            determines the size at which no new data is added to a single node.
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
99
        :return: The root chk of te resulting CHKMap.
100
        """
101
        result = CHKMap(store, None)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
102
        result._root_node.set_maximum_size(maximum_size)
103
        delta = []
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
104
        for key, value in initial_value.items():
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
105
            delta.append((None, key, value))
106
        result.apply_delta(delta)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
107
        return result._save()
108
3735.2.9 by Robert Collins
Get a working chk_map using inventory implementation bootstrapped.
109
    def iteritems(self, key_filter=None):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
110
        """Iterate over the entire CHKMap's contents."""
111
        self._ensure_root()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
112
        return self._root_node.iteritems(self._store, key_filter=key_filter)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
113
3735.2.12 by Robert Collins
Implement commit-via-deltas for split inventory repositories.
114
    def key(self):
115
        """Return the key for this map."""
116
        if isinstance(self._root_node, tuple):
117
            return self._root_node
118
        else:
119
            return self._root_node._key
120
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
121
    def __len__(self):
122
        self._ensure_root()
123
        return len(self._root_node)
124
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
125
    def map(self, key, value):
126
        """Map a key tuple to value."""
127
        # Need a root object.
128
        self._ensure_root()
129
        prefix, node_details = self._root_node.map(self._store, key, value)
130
        if len(node_details) == 1:
131
            self._root_node = node_details[0][1]
132
        else:
133
            self._root_node = InternalNode()
134
            self._root_node.set_maximum_size(node_details[0][1].maximum_size)
135
            self._root_node._key_width = node_details[0][1]._key_width
136
            for split, node in node_details:
137
                self._root_node.add_node(split, node)
138
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
139
    def _map(self, key, value):
140
        """Map key to value."""
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
141
        # Ne
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
142
        self._ensure_root()
143
        # Store the value
144
        bytes = ValueNode(value).serialise()
3735.2.15 by Robert Collins
More direct implementation of fetch between different serializers.
145
        # Experimental code to probe for keys rather than just adding; its not
146
        # clear if it is an improvement.
147
        #chk = ("sha1:%s" % osutils.sha_string(bytes),)
148
        #if not self._store.get_parent_map([key]):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
149
        sha1, _, _ = self._store.add_lines((None,), (), osutils.split_lines(bytes))
3735.2.15 by Robert Collins
More direct implementation of fetch between different serializers.
150
        chk = ("sha1:" + sha1,)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
151
        # And link into the root
3735.2.15 by Robert Collins
More direct implementation of fetch between different serializers.
152
        self._root_node.add_child(key, chk)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
153
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
154
    def unmap(self, key):
155
        """remove key from the map."""
156
        self._ensure_root()
157
        self._root_node.unmap(self._store, key)
158
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
159
    def _unmap(self, key):
160
        """remove key from the map."""
161
        self._ensure_root()
162
        self._root_node.remove_child(key)
163
164
    def _save(self):
165
        """Save the map completely.
166
167
        :return: The key of the root node.
168
        """
169
        if type(self._root_node) == tuple:
170
            # Already saved.
171
            return self._root_node
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
172
        keys = list(self._root_node.serialise(self._store))
173
        return keys[-1]
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
174
175
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
176
class Node(object):
177
    """Base class defining the protocol for CHK Map nodes."""
178
179
    def __init__(self, key_width=1):
180
        """Create a node.
181
182
        :param key_width: The width of keys for this node.
183
        """
184
        self._key = None
185
        # Current number of elements
186
        self._len = 0
187
        self._maximum_size = 0
188
        self._key_width = 1
189
        # current size in bytes
190
        self._size = 0
191
        # The pointers/values this node has - meaning defined by child classes.
192
        self._items = {}
193
194
    def __len__(self):
195
        return self._len
196
197
    @property
198
    def maximum_size(self):
199
        """What is the upper limit for adding references to a node."""
200
        return self._maximum_size
201
202
    def set_maximum_size(self, new_size):
203
        """Set the size threshold for nodes.
204
205
        :param new_size: The size at which no data is added to a node. 0 for
206
            unlimited.
207
        """
208
        self._maximum_size = new_size
209
210
211
class LeafNode(Node):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
212
    """A node containing actual key:value pairs.
213
    
214
    :ivar _items: A dict of key->value items. The key is in tuple form.
215
    """
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
216
217
    def __init__(self):
218
        Node.__init__(self)
219
        # The size of a leaf node with default values and no children.
220
        self._size = 12
221
222
    def _current_size(self):
223
        """Answer the current serialised size of this node."""
224
        return (self._size + len(str(self._len)) + len(str(self._key_width)) +
225
            len(str(self._maximum_size)))
226
227
    @classmethod
228
    def deserialise(klass, bytes, key):
229
        """Deserialise bytes, with key key, into a LeafNode.
230
231
        :param bytes: The bytes of the node.
232
        :param key: The key that the serialised node has.
233
        """
234
        result = LeafNode()
235
        lines = bytes.splitlines()
236
        items = {}
237
        if lines[0] != 'chkleaf:':
238
            raise ValueError("not a serialised leaf node: %r" % bytes)
239
        maximum_size = int(lines[1])
240
        width = int(lines[2])
241
        length = int(lines[3])
242
        for line in lines[4:]:
3735.2.25 by Robert Collins
CHKInventory core tests passing.
243
            elements = line.split('\x00', width)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
244
            items[tuple(elements[:-1])] = elements[-1]
245
        if len(items) != length:
246
            raise AssertionError("item count mismatch")
247
        result._items = items
248
        result._len = length
249
        result._maximum_size = maximum_size
250
        result._key = key
251
        result._key_width = width
252
        result._size = len(bytes)
253
        return result
254
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
255
    def iteritems(self, store, key_filter=None):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
256
        if key_filter is not None:
257
            for item in self._items.iteritems():
258
                if item[0] in key_filter:
259
                    yield item
260
        else:
261
            for item in self._items.iteritems():
262
                yield item
263
264
    def key(self):
265
        return self._key
266
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
267
    def map(self, store, key, value):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
268
        """Map key to value."""
269
        if key in self._items:
270
            self._size -= 2 + len('\x00'.join(key)) + len(self._items[key])
271
            self._len -= 1
272
        self._items[key] = value
273
        self._size += 2 + len('\x00'.join(key)) + len(value)
274
        self._len += 1
275
        self._key = None
276
        if (self._maximum_size and self._current_size() > self._maximum_size and
277
            self._len > 1):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
278
            common_prefix = self.unique_serialised_prefix()
279
            split_at = len(common_prefix) + 1
280
            result = {}
281
            for key, value in self._items.iteritems():
282
                serialised_key = self._serialised_key(key)
283
                prefix = serialised_key[:split_at]
284
                if prefix not in result:
285
                    node = LeafNode()
286
                    node.set_maximum_size(self._maximum_size)
287
                    node._key_width = self._key_width
288
                    result[prefix] = node
289
                else:
290
                    node = result[prefix]
291
                node.map(store, key, value)
292
            return common_prefix, result.items()
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
293
        else:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
294
            return self.unique_serialised_prefix(), [("", self)]
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
295
296
    def serialise(self, store):
297
        """Serialise the tree to store.
298
299
        :param store: A VersionedFiles honouring the CHK extensions.
300
        :return: An iterable of the keys inserted by this operation.
301
        """
302
        lines = ["chkleaf:\n"]
303
        lines.append("%d\n" % self._maximum_size)
304
        lines.append("%d\n" % self._key_width)
305
        lines.append("%d\n" % self._len)
306
        for key, value in sorted(self._items.items()):
307
            lines.append("%s\x00%s\n" % ('\x00'.join(key), value))
308
        sha1, _, _ = store.add_lines((None,), (), lines)
309
        self._key = ("sha1:" + sha1,)
310
        return [self._key]
311
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
312
    def _serialised_key(self, key):
313
        """Return the serialised key for key in this node."""
314
        return '\x00'.join(key)
315
3735.2.26 by Robert Collins
CHKInventory migrated to new CHKMap code.
316
    def refs(self):
317
        """Return the references to other CHK's held by this node."""
318
        return []
319
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
320
    def unique_serialised_prefix(self):
321
        """Return the unique key prefix for this node.
322
323
        :return: A bytestring of the longest serialised key prefix that is
324
            unique within this node.
325
        """
326
        # may want to cache this eventually :- but wait for enough
327
        # functionality to profile.
328
        keys = list(self._items.keys())
329
        if not keys:
330
            return ""
331
        current_prefix = self._serialised_key(keys.pop(-1))
332
        while current_prefix and keys:
333
            next_key = self._serialised_key(keys.pop(-1))
334
            for pos, (left, right) in enumerate(zip(current_prefix, next_key)):
335
                if left != right:
336
                    pos -= 1
337
                    break
338
            current_prefix = current_prefix[:pos + 1]
339
        return current_prefix
340
341
    def unmap(self, store, key):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
342
        """Unmap key from the node."""
343
        self._size -= 2 + len('\x00'.join(key)) + len(self._items[key])
344
        self._len -= 1
345
        del self._items[key]
346
        self._key = None
347
        return self
348
349
350
class InternalNode(Node):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
351
    """A node that contains references to other nodes.
352
    
353
    An InternalNode is responsible for mapping serialised key prefixes to child
354
    nodes. It is greedy - it will defer splitting itself as long as possible.
355
    """
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
356
357
    def __init__(self):
358
        Node.__init__(self)
359
        # The size of an internalnode with default values and no children.
360
        # self._size = 12
361
        # How many octets key prefixes within this node are.
362
        self._node_width = 0
363
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
364
    def add_node(self, prefix, node):
365
        """Add a child node with prefix prefix, and node node.
366
367
        :param prefix: The serialised key prefix for node.
368
        :param node: The node being added.
369
        """
370
        self._len += len(node)
371
        if not len(self._items):
372
            self._node_width = len(prefix)
373
        self._items[prefix] = node
374
        self._key = None
375
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
376
    def _current_size(self):
377
        """Answer the current serialised size of this node."""
378
        return (self._size + len(str(self._len)) + len(str(self._key_width)) +
379
            len(str(self._maximum_size)))
380
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
381
    @classmethod
382
    def deserialise(klass, bytes, key):
3735.2.25 by Robert Collins
CHKInventory core tests passing.
383
        """Deserialise bytes to an InternalNode, with key key.
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
384
385
        :param bytes: The bytes of the node.
386
        :param key: The key that the serialised node has.
387
        :return: An InternalNode instance.
388
        """
389
        result = InternalNode()
390
        lines = bytes.splitlines()
391
        items = {}
392
        if lines[0] != 'chknode:':
393
            raise ValueError("not a serialised internal node: %r" % bytes)
394
        maximum_size = int(lines[1])
395
        width = int(lines[2])
396
        length = int(lines[3])
397
        for line in lines[4:]:
398
            prefix, flat_key = line.rsplit('\x00', 1)
399
            items[prefix] = (flat_key,)
400
        result._items = items
401
        result._len = length
402
        result._maximum_size = maximum_size
403
        result._key = key
404
        result._key_width = width
405
        result._size = len(bytes)
406
        result._node_width = len(prefix)
407
        return result
408
409
    def iteritems(self, store, key_filter=None):
410
        for node in self._iter_nodes(store, key_filter=key_filter):
411
            for item in node.iteritems(store, key_filter=key_filter):
412
                yield item
413
414
    def _iter_nodes(self, store, key_filter=None):
415
        """Iterate over node objects which match key_filter.
416
417
        :param store: A store to use for accessing content.
418
        :param key_filter: A key filter to filter nodes. Only nodes that might
419
            contain a key in key_filter will be returned.
420
        :return: An iterable of nodes.
421
        """
422
        nodes = []
423
        keys = set()
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
424
        if key_filter is None:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
425
            for node in self._items.itervalues():
426
                if type(node) == tuple:
427
                    keys.add(node)
428
                else:
429
                    nodes.append(node)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
430
        else:
431
            serialised_filter = set([self._serialised_key(key) for key in
432
                key_filter])
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
433
            for prefix, node in self._items.iteritems():
434
                if prefix in serialised_filter:
435
                    if type(node) == tuple:
436
                        keys.add(node)
437
                    else:
438
                        nodes.append(node)
439
        if keys:
440
            # demand load some pages.
441
            stream = store.get_record_stream(keys, 'unordered', True)
442
            for record in stream:
443
                node = _deserialise(record.get_bytes_as('fulltext'), record.key)
444
                nodes.append(node)
445
        return nodes
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
446
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
447
    def map(self, store, key, value):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
448
        """Map key to value."""
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
449
        if not len(self._items):
450
            raise AssertionError("cant map in an empty InternalNode.")
451
        children = self._iter_nodes(store, key_filter=[key])
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
452
        serialised_key = self._serialised_key(key)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
453
        if children:
454
            child = children[0]
455
        else:
456
            # new child needed:
457
            child = self._new_child(serialised_key, LeafNode)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
458
        old_len = len(child)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
459
        prefix, node_details = child.map(store, key, value)
460
        if len(node_details) == 1:
461
            # child may have shrunk, or might be the same.
462
            self._len = self._len - old_len + len(child)
463
            self._items[serialised_key] = child
464
            return self.unique_serialised_prefix(), [("", self)]
465
        # child has overflown - create a new intermediate node.
466
        # XXX: This is where we might want to try and expand our depth
467
        # to refer to more bytes of every child (which would give us
468
        # multiple pointers to child nodes, but less intermediate nodes)
469
        child = self._new_child(serialised_key, InternalNode)
470
        for split, node in node_details:
471
            child.add_node(split, node)
472
        self._len = self._len - old_len + len(child)
473
        return self.unique_serialised_prefix(), [("", self)]
474
475
    def _new_child(self, serialised_key, klass):
476
        """Create a new child node of type klass."""
477
        child = klass()
478
        child.set_maximum_size(self._maximum_size)
479
        child._key_width = self._key_width
480
        self._items[serialised_key] = child
481
        return child
482
483
    def serialise(self, store):
484
        """Serialise the node to store.
485
486
        :param store: A VersionedFiles honouring the CHK extensions.
487
        :return: An iterable of the keys inserted by this operation.
488
        """
489
        for node in self._items.itervalues():
490
            if type(node) == tuple:
491
                # Never deserialised.
492
                continue
493
            if node._key is not None:
494
                # Never altered
495
                continue
496
            for key in node.serialise(store):
497
                yield key
498
        lines = ["chknode:\n"]
499
        lines.append("%d\n" % self._maximum_size)
500
        lines.append("%d\n" % self._key_width)
501
        lines.append("%d\n" % self._len)
502
        for prefix, node in sorted(self._items.items()):
503
            if type(node) == tuple:
504
                key = node[0]
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
505
            else:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
506
                key = node._key[0]
507
            lines.append("%s\x00%s\n" % (prefix, key))
508
        sha1, _, _ = store.add_lines((None,), (), lines)
509
        self._key = ("sha1:" + sha1,)
510
        yield self._key
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
511
512
    def _serialised_key(self, key):
513
        """Return the serialised key for key in this node."""
514
        return ('\x00'.join(key) + '\x00'*self._node_width)[:self._node_width]
515
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
516
    def _split(self, offset):
517
        """Split this node into smaller nodes starting at offset.
518
519
        :param offset: The offset to start the new child nodes at.
520
        :return: An iterable of (prefix, node) tuples. prefix is a byte
521
            prefix for reaching node.
522
        """
523
        if offset >= self._node_width:
524
            for node in self._items.values():
525
                for result in node._split(offset):
526
                    yield result
527
            return
528
        for key, node in self._items.items():
529
            pass
530
3735.2.26 by Robert Collins
CHKInventory migrated to new CHKMap code.
531
    def refs(self):
532
        """Return the references to other CHK's held by this node."""
533
        if self._key is None:
534
            raise AssertionError("unserialised nodes have no refs.")
535
        refs = []
536
        for value in self._items.itervalues():
537
            if type(value) == tuple:
538
                refs.append(value)
539
            else:
540
                refs.append(value.key())
541
        return refs
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
542
543
    def unique_serialised_prefix(self):
544
        """Return the unique key prefix for this node.
545
546
        :return: A bytestring of the longest serialised key prefix that is
547
            unique within this node.
548
        """
549
        # may want to cache this eventually :- but wait for enough
550
        # functionality to profile.
551
        keys = list(self._items.keys())
552
        if not keys:
553
            return ""
554
        current_prefix = keys.pop(-1)
555
        while current_prefix and keys:
556
            next_key = keys.pop(-1)
557
            for pos, (left, right) in enumerate(zip(current_prefix, next_key)):
558
                if left != right:
559
                    pos -= 1
560
                    break
561
            current_prefix = current_prefix[:pos + 1]
562
        return current_prefix
563
564
    def unmap(self, store, key):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
565
        """Remove key from this node and it's children."""
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
566
        if not len(self._items):
567
            raise AssertionError("cant unmap in an empty InternalNode.")
568
        serialised_key = self._serialised_key(key)
569
        children = self._iter_nodes(store, key_filter=[key])
570
        serialised_key = self._serialised_key(key)
571
        if children:
572
            child = children[0]
573
        else:
574
            raise KeyError(key)
575
        self._len -= 1
576
        unmapped = child.unmap(store, key)
577
        if len(unmapped) == 0:
578
            # All child nodes are gone, remove the child:
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
579
            del self._items[serialised_key]
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
580
        else:
581
            # Stash the returned node
582
            self._items[serialised_key] = unmapped
3735.2.23 by Robert Collins
Test unmapping with one child left but multiple keys.
583
        if len(self._items) == 1:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
584
            # this node is no longer needed:
585
            return self._items.values()[0]
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
586
        return self
587
588
3735.2.16 by Robert Collins
Untested extensions to support repodetails
589
def _deserialise(bytes, key):
590
    """Helper for repositorydetails - convert bytes to a node."""
3735.2.24 by Robert Collins
test_chk_map tests all passing.
591
    if bytes.startswith("chkleaf:\n"):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
592
        return LeafNode.deserialise(bytes, key)
593
    elif bytes.startswith("chknode:\n"):
594
        return InternalNode.deserialise(bytes, key)
3735.2.16 by Robert Collins
Untested extensions to support repodetails
595
    else:
596
        raise AssertionError("Unknown node type.")