/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
17
"""Persistent maps from tuple_of_strings->string using CHK stores.
18
19
Overview and current status:
20
21
The CHKMap class implements a dict from tuple_of_strings->string by using a trie
22
with internal nodes of 8-bit fan out; The key tuples are mapped to strings by
23
joining them by \x00, and \x00 padding shorter keys out to the length of the
24
longest key. Leaf nodes are packed as densely as possible, and internal nodes
25
are all and additional 8-bits wide leading to a sparse upper tree. 
26
27
Updates to a CHKMap are done preferentially via the apply_delta method, to
28
allow optimisation of the update operation; but individual map/unmap calls are
29
possible and supported. All changes via map/unmap are buffered in memory until
30
the _save method is called to force serialisation of the tree. apply_delta
31
performs a _save implicitly.
32
33
TODO:
34
-----
35
36
Densely packed upper nodes.
37
38
"""
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
39
40
import osutils
41
42
43
class CHKMap(object):
44
    """A persistent map from string to string backed by a CHK store."""
45
46
    def __init__(self, store, root_key):
47
        """Create a CHKMap object.
48
49
        :param store: The store the CHKMap is stored in.
50
        :param root_key: The root key of the map. None to create an empty
51
            CHKMap.
52
        """
53
        self._store = store
54
        if root_key is None:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
55
            self._root_node = LeafNode()
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
56
        else:
57
            self._root_node = root_key
58
59
    def apply_delta(self, delta):
60
        """Apply a delta to the map.
61
62
        :param delta: An iterable of old_key, new_key, new_value tuples.
63
            If new_key is not None, then new_key->new_value is inserted
64
            into the map; if old_key is not None, then the old mapping
65
            of old_key is removed.
66
        """
67
        for old, new, value in delta:
68
            if old is not None and old != new:
69
                # unmap
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
70
                self.unmap(old)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
71
        for old, new, value in delta:
72
            if new is not None:
73
                # map
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
74
                self.map(new, value)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
75
        return self._save()
76
77
    def _ensure_root(self):
78
        """Ensure that the root node is an object not a key."""
79
        if type(self._root_node) == tuple:
80
            # Demand-load the root
81
            bytes = self._read_bytes(self._root_node)
82
            root_key = self._root_node
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
83
            self._root_node = _deserialise(bytes, root_key)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
84
85
    def _read_bytes(self, key):
86
        stream = self._store.get_record_stream([key], 'unordered', True)
87
        return stream.next().get_bytes_as('fulltext')
88
89
    @classmethod
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
90
    def from_dict(klass, store, initial_value, maximum_size=0):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
91
        """Create a CHKMap in store with initial_value as the content.
92
        
93
        :param store: The store to record initial_value in, a VersionedFiles
94
            object with 1-tuple keys supporting CHK key generation.
95
        :param initial_value: A dict to store in store. Its keys and values
96
            must be bytestrings.
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
97
        :param maximum_size: The maximum_size rule to apply to nodes. This
98
            determines the size at which no new data is added to a single node.
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
99
        :return: The root chk of te resulting CHKMap.
100
        """
101
        result = CHKMap(store, None)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
102
        result._root_node.set_maximum_size(maximum_size)
103
        delta = []
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
104
        for key, value in initial_value.items():
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
105
            delta.append((None, key, value))
106
        result.apply_delta(delta)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
107
        return result._save()
108
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
109
    def iter_changes(self, basis):
110
        """Iterate over the changes between basis and self.
111
112
        :return: An iterator of tuples: (key, old_value, new_value). Old_value
113
            is None for keys only in self; new_value is None for keys only in
114
            basis.
115
        """
116
        # Cheap but let tests pass
117
        new = dict(self.iteritems())
118
        old = dict(basis.iteritems())
119
        new_keys = set(new)
120
        old_keys = set(old)
121
        result = []
122
        for key in new_keys - old_keys:
123
            result.append((key, None, new[key]))
124
        for key in old_keys - new_keys:
125
            result.append((key, old[key], None))
126
        for key in old_keys.intersection(new_keys):
127
            if new[key] != old[key]:
128
                result.append((key, old[key], new[key]))
129
        return result
130
3735.2.9 by Robert Collins
Get a working chk_map using inventory implementation bootstrapped.
131
    def iteritems(self, key_filter=None):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
132
        """Iterate over the entire CHKMap's contents."""
133
        self._ensure_root()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
134
        return self._root_node.iteritems(self._store, key_filter=key_filter)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
135
3735.2.12 by Robert Collins
Implement commit-via-deltas for split inventory repositories.
136
    def key(self):
137
        """Return the key for this map."""
138
        if isinstance(self._root_node, tuple):
139
            return self._root_node
140
        else:
141
            return self._root_node._key
142
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
143
    def __len__(self):
144
        self._ensure_root()
145
        return len(self._root_node)
146
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
147
    def map(self, key, value):
148
        """Map a key tuple to value."""
149
        # Need a root object.
150
        self._ensure_root()
151
        prefix, node_details = self._root_node.map(self._store, key, value)
152
        if len(node_details) == 1:
153
            self._root_node = node_details[0][1]
154
        else:
155
            self._root_node = InternalNode()
156
            self._root_node.set_maximum_size(node_details[0][1].maximum_size)
157
            self._root_node._key_width = node_details[0][1]._key_width
158
            for split, node in node_details:
159
                self._root_node.add_node(split, node)
160
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
161
    def _map(self, key, value):
162
        """Map key to value."""
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
163
        # Ne
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
164
        self._ensure_root()
165
        # Store the value
166
        bytes = ValueNode(value).serialise()
3735.2.15 by Robert Collins
More direct implementation of fetch between different serializers.
167
        # Experimental code to probe for keys rather than just adding; its not
168
        # clear if it is an improvement.
169
        #chk = ("sha1:%s" % osutils.sha_string(bytes),)
170
        #if not self._store.get_parent_map([key]):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
171
        sha1, _, _ = self._store.add_lines((None,), (), osutils.split_lines(bytes))
3735.2.15 by Robert Collins
More direct implementation of fetch between different serializers.
172
        chk = ("sha1:" + sha1,)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
173
        # And link into the root
3735.2.15 by Robert Collins
More direct implementation of fetch between different serializers.
174
        self._root_node.add_child(key, chk)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
175
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
176
    def unmap(self, key):
177
        """remove key from the map."""
178
        self._ensure_root()
179
        self._root_node.unmap(self._store, key)
180
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
181
    def _unmap(self, key):
182
        """remove key from the map."""
183
        self._ensure_root()
184
        self._root_node.remove_child(key)
185
186
    def _save(self):
187
        """Save the map completely.
188
189
        :return: The key of the root node.
190
        """
191
        if type(self._root_node) == tuple:
192
            # Already saved.
193
            return self._root_node
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
194
        keys = list(self._root_node.serialise(self._store))
195
        return keys[-1]
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
196
197
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
198
class Node(object):
199
    """Base class defining the protocol for CHK Map nodes."""
200
201
    def __init__(self, key_width=1):
202
        """Create a node.
203
204
        :param key_width: The width of keys for this node.
205
        """
206
        self._key = None
207
        # Current number of elements
208
        self._len = 0
209
        self._maximum_size = 0
210
        self._key_width = 1
211
        # current size in bytes
212
        self._size = 0
213
        # The pointers/values this node has - meaning defined by child classes.
214
        self._items = {}
215
216
    def __len__(self):
217
        return self._len
218
219
    @property
220
    def maximum_size(self):
221
        """What is the upper limit for adding references to a node."""
222
        return self._maximum_size
223
224
    def set_maximum_size(self, new_size):
225
        """Set the size threshold for nodes.
226
227
        :param new_size: The size at which no data is added to a node. 0 for
228
            unlimited.
229
        """
230
        self._maximum_size = new_size
231
232
233
class LeafNode(Node):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
234
    """A node containing actual key:value pairs.
235
    
236
    :ivar _items: A dict of key->value items. The key is in tuple form.
237
    """
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
238
239
    def __init__(self):
240
        Node.__init__(self)
241
        # The size of a leaf node with default values and no children.
242
        self._size = 12
243
244
    def _current_size(self):
245
        """Answer the current serialised size of this node."""
246
        return (self._size + len(str(self._len)) + len(str(self._key_width)) +
247
            len(str(self._maximum_size)))
248
249
    @classmethod
250
    def deserialise(klass, bytes, key):
251
        """Deserialise bytes, with key key, into a LeafNode.
252
253
        :param bytes: The bytes of the node.
254
        :param key: The key that the serialised node has.
255
        """
256
        result = LeafNode()
257
        lines = bytes.splitlines()
258
        items = {}
259
        if lines[0] != 'chkleaf:':
260
            raise ValueError("not a serialised leaf node: %r" % bytes)
261
        maximum_size = int(lines[1])
262
        width = int(lines[2])
263
        length = int(lines[3])
264
        for line in lines[4:]:
3735.2.25 by Robert Collins
CHKInventory core tests passing.
265
            elements = line.split('\x00', width)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
266
            items[tuple(elements[:-1])] = elements[-1]
267
        if len(items) != length:
268
            raise AssertionError("item count mismatch")
269
        result._items = items
270
        result._len = length
271
        result._maximum_size = maximum_size
272
        result._key = key
273
        result._key_width = width
274
        result._size = len(bytes)
275
        return result
276
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
277
    def iteritems(self, store, key_filter=None):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
278
        if key_filter is not None:
279
            for item in self._items.iteritems():
280
                if item[0] in key_filter:
281
                    yield item
282
        else:
283
            for item in self._items.iteritems():
284
                yield item
285
286
    def key(self):
287
        return self._key
288
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
289
    def map(self, store, key, value):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
290
        """Map key to value."""
291
        if key in self._items:
292
            self._size -= 2 + len('\x00'.join(key)) + len(self._items[key])
293
            self._len -= 1
294
        self._items[key] = value
295
        self._size += 2 + len('\x00'.join(key)) + len(value)
296
        self._len += 1
297
        self._key = None
298
        if (self._maximum_size and self._current_size() > self._maximum_size and
299
            self._len > 1):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
300
            common_prefix = self.unique_serialised_prefix()
301
            split_at = len(common_prefix) + 1
302
            result = {}
303
            for key, value in self._items.iteritems():
304
                serialised_key = self._serialised_key(key)
305
                prefix = serialised_key[:split_at]
306
                if prefix not in result:
307
                    node = LeafNode()
308
                    node.set_maximum_size(self._maximum_size)
309
                    node._key_width = self._key_width
310
                    result[prefix] = node
311
                else:
312
                    node = result[prefix]
313
                node.map(store, key, value)
314
            return common_prefix, result.items()
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
315
        else:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
316
            return self.unique_serialised_prefix(), [("", self)]
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
317
318
    def serialise(self, store):
319
        """Serialise the tree to store.
320
321
        :param store: A VersionedFiles honouring the CHK extensions.
322
        :return: An iterable of the keys inserted by this operation.
323
        """
324
        lines = ["chkleaf:\n"]
325
        lines.append("%d\n" % self._maximum_size)
326
        lines.append("%d\n" % self._key_width)
327
        lines.append("%d\n" % self._len)
328
        for key, value in sorted(self._items.items()):
329
            lines.append("%s\x00%s\n" % ('\x00'.join(key), value))
330
        sha1, _, _ = store.add_lines((None,), (), lines)
331
        self._key = ("sha1:" + sha1,)
332
        return [self._key]
333
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
334
    def _serialised_key(self, key):
335
        """Return the serialised key for key in this node."""
336
        return '\x00'.join(key)
337
3735.2.26 by Robert Collins
CHKInventory migrated to new CHKMap code.
338
    def refs(self):
339
        """Return the references to other CHK's held by this node."""
340
        return []
341
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
342
    def unique_serialised_prefix(self):
343
        """Return the unique key prefix for this node.
344
345
        :return: A bytestring of the longest serialised key prefix that is
346
            unique within this node.
347
        """
348
        # may want to cache this eventually :- but wait for enough
349
        # functionality to profile.
350
        keys = list(self._items.keys())
351
        if not keys:
352
            return ""
353
        current_prefix = self._serialised_key(keys.pop(-1))
354
        while current_prefix and keys:
355
            next_key = self._serialised_key(keys.pop(-1))
356
            for pos, (left, right) in enumerate(zip(current_prefix, next_key)):
357
                if left != right:
358
                    pos -= 1
359
                    break
360
            current_prefix = current_prefix[:pos + 1]
361
        return current_prefix
362
363
    def unmap(self, store, key):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
364
        """Unmap key from the node."""
365
        self._size -= 2 + len('\x00'.join(key)) + len(self._items[key])
366
        self._len -= 1
367
        del self._items[key]
368
        self._key = None
369
        return self
370
371
372
class InternalNode(Node):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
373
    """A node that contains references to other nodes.
374
    
375
    An InternalNode is responsible for mapping serialised key prefixes to child
376
    nodes. It is greedy - it will defer splitting itself as long as possible.
377
    """
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
378
379
    def __init__(self):
380
        Node.__init__(self)
381
        # The size of an internalnode with default values and no children.
382
        # self._size = 12
383
        # How many octets key prefixes within this node are.
384
        self._node_width = 0
385
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
386
    def add_node(self, prefix, node):
387
        """Add a child node with prefix prefix, and node node.
388
389
        :param prefix: The serialised key prefix for node.
390
        :param node: The node being added.
391
        """
392
        self._len += len(node)
393
        if not len(self._items):
394
            self._node_width = len(prefix)
395
        self._items[prefix] = node
396
        self._key = None
397
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
398
    def _current_size(self):
399
        """Answer the current serialised size of this node."""
400
        return (self._size + len(str(self._len)) + len(str(self._key_width)) +
401
            len(str(self._maximum_size)))
402
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
403
    @classmethod
404
    def deserialise(klass, bytes, key):
3735.2.25 by Robert Collins
CHKInventory core tests passing.
405
        """Deserialise bytes to an InternalNode, with key key.
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
406
407
        :param bytes: The bytes of the node.
408
        :param key: The key that the serialised node has.
409
        :return: An InternalNode instance.
410
        """
411
        result = InternalNode()
412
        lines = bytes.splitlines()
413
        items = {}
414
        if lines[0] != 'chknode:':
415
            raise ValueError("not a serialised internal node: %r" % bytes)
416
        maximum_size = int(lines[1])
417
        width = int(lines[2])
418
        length = int(lines[3])
419
        for line in lines[4:]:
420
            prefix, flat_key = line.rsplit('\x00', 1)
421
            items[prefix] = (flat_key,)
422
        result._items = items
423
        result._len = length
424
        result._maximum_size = maximum_size
425
        result._key = key
426
        result._key_width = width
427
        result._size = len(bytes)
428
        result._node_width = len(prefix)
429
        return result
430
431
    def iteritems(self, store, key_filter=None):
432
        for node in self._iter_nodes(store, key_filter=key_filter):
433
            for item in node.iteritems(store, key_filter=key_filter):
434
                yield item
435
436
    def _iter_nodes(self, store, key_filter=None):
437
        """Iterate over node objects which match key_filter.
438
439
        :param store: A store to use for accessing content.
440
        :param key_filter: A key filter to filter nodes. Only nodes that might
441
            contain a key in key_filter will be returned.
442
        :return: An iterable of nodes.
443
        """
444
        nodes = []
445
        keys = set()
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
446
        if key_filter is None:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
447
            for node in self._items.itervalues():
448
                if type(node) == tuple:
449
                    keys.add(node)
450
                else:
451
                    nodes.append(node)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
452
        else:
453
            serialised_filter = set([self._serialised_key(key) for key in
454
                key_filter])
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
455
            for prefix, node in self._items.iteritems():
456
                if prefix in serialised_filter:
457
                    if type(node) == tuple:
458
                        keys.add(node)
459
                    else:
460
                        nodes.append(node)
461
        if keys:
462
            # demand load some pages.
463
            stream = store.get_record_stream(keys, 'unordered', True)
464
            for record in stream:
465
                node = _deserialise(record.get_bytes_as('fulltext'), record.key)
466
                nodes.append(node)
467
        return nodes
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
468
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
469
    def map(self, store, key, value):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
470
        """Map key to value."""
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
471
        if not len(self._items):
472
            raise AssertionError("cant map in an empty InternalNode.")
473
        children = self._iter_nodes(store, key_filter=[key])
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
474
        serialised_key = self._serialised_key(key)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
475
        if children:
476
            child = children[0]
477
        else:
478
            # new child needed:
479
            child = self._new_child(serialised_key, LeafNode)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
480
        old_len = len(child)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
481
        prefix, node_details = child.map(store, key, value)
482
        if len(node_details) == 1:
483
            # child may have shrunk, or might be the same.
484
            self._len = self._len - old_len + len(child)
485
            self._items[serialised_key] = child
3735.2.29 by Robert Collins
Untested code is broken code.
486
            self._key = None
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
487
            return self.unique_serialised_prefix(), [("", self)]
488
        # child has overflown - create a new intermediate node.
489
        # XXX: This is where we might want to try and expand our depth
490
        # to refer to more bytes of every child (which would give us
491
        # multiple pointers to child nodes, but less intermediate nodes)
492
        child = self._new_child(serialised_key, InternalNode)
493
        for split, node in node_details:
494
            child.add_node(split, node)
495
        self._len = self._len - old_len + len(child)
3735.2.29 by Robert Collins
Untested code is broken code.
496
        self._key = None
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
497
        return self.unique_serialised_prefix(), [("", self)]
498
499
    def _new_child(self, serialised_key, klass):
500
        """Create a new child node of type klass."""
501
        child = klass()
502
        child.set_maximum_size(self._maximum_size)
503
        child._key_width = self._key_width
504
        self._items[serialised_key] = child
505
        return child
506
507
    def serialise(self, store):
508
        """Serialise the node to store.
509
510
        :param store: A VersionedFiles honouring the CHK extensions.
511
        :return: An iterable of the keys inserted by this operation.
512
        """
513
        for node in self._items.itervalues():
514
            if type(node) == tuple:
515
                # Never deserialised.
516
                continue
517
            if node._key is not None:
518
                # Never altered
519
                continue
520
            for key in node.serialise(store):
521
                yield key
522
        lines = ["chknode:\n"]
523
        lines.append("%d\n" % self._maximum_size)
524
        lines.append("%d\n" % self._key_width)
525
        lines.append("%d\n" % self._len)
526
        for prefix, node in sorted(self._items.items()):
527
            if type(node) == tuple:
528
                key = node[0]
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
529
            else:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
530
                key = node._key[0]
531
            lines.append("%s\x00%s\n" % (prefix, key))
532
        sha1, _, _ = store.add_lines((None,), (), lines)
533
        self._key = ("sha1:" + sha1,)
534
        yield self._key
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
535
536
    def _serialised_key(self, key):
537
        """Return the serialised key for key in this node."""
538
        return ('\x00'.join(key) + '\x00'*self._node_width)[:self._node_width]
539
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
540
    def _split(self, offset):
541
        """Split this node into smaller nodes starting at offset.
542
543
        :param offset: The offset to start the new child nodes at.
544
        :return: An iterable of (prefix, node) tuples. prefix is a byte
545
            prefix for reaching node.
546
        """
547
        if offset >= self._node_width:
548
            for node in self._items.values():
549
                for result in node._split(offset):
550
                    yield result
551
            return
552
        for key, node in self._items.items():
553
            pass
554
3735.2.26 by Robert Collins
CHKInventory migrated to new CHKMap code.
555
    def refs(self):
556
        """Return the references to other CHK's held by this node."""
557
        if self._key is None:
558
            raise AssertionError("unserialised nodes have no refs.")
559
        refs = []
560
        for value in self._items.itervalues():
561
            if type(value) == tuple:
562
                refs.append(value)
563
            else:
564
                refs.append(value.key())
565
        return refs
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
566
567
    def unique_serialised_prefix(self):
568
        """Return the unique key prefix for this node.
569
570
        :return: A bytestring of the longest serialised key prefix that is
571
            unique within this node.
572
        """
573
        # may want to cache this eventually :- but wait for enough
574
        # functionality to profile.
575
        keys = list(self._items.keys())
576
        if not keys:
577
            return ""
578
        current_prefix = keys.pop(-1)
579
        while current_prefix and keys:
580
            next_key = keys.pop(-1)
581
            for pos, (left, right) in enumerate(zip(current_prefix, next_key)):
582
                if left != right:
583
                    pos -= 1
584
                    break
585
            current_prefix = current_prefix[:pos + 1]
586
        return current_prefix
587
588
    def unmap(self, store, key):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
589
        """Remove key from this node and it's children."""
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
590
        if not len(self._items):
591
            raise AssertionError("cant unmap in an empty InternalNode.")
592
        serialised_key = self._serialised_key(key)
593
        children = self._iter_nodes(store, key_filter=[key])
594
        serialised_key = self._serialised_key(key)
595
        if children:
596
            child = children[0]
597
        else:
598
            raise KeyError(key)
599
        self._len -= 1
600
        unmapped = child.unmap(store, key)
601
        if len(unmapped) == 0:
602
            # All child nodes are gone, remove the child:
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
603
            del self._items[serialised_key]
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
604
        else:
605
            # Stash the returned node
606
            self._items[serialised_key] = unmapped
3735.2.23 by Robert Collins
Test unmapping with one child left but multiple keys.
607
        if len(self._items) == 1:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
608
            # this node is no longer needed:
609
            return self._items.values()[0]
3735.2.29 by Robert Collins
Untested code is broken code.
610
        self._key = None
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
611
        return self
612
613
3735.2.16 by Robert Collins
Untested extensions to support repodetails
614
def _deserialise(bytes, key):
615
    """Helper for repositorydetails - convert bytes to a node."""
3735.2.24 by Robert Collins
test_chk_map tests all passing.
616
    if bytes.startswith("chkleaf:\n"):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
617
        return LeafNode.deserialise(bytes, key)
618
    elif bytes.startswith("chknode:\n"):
619
        return InternalNode.deserialise(bytes, key)
3735.2.16 by Robert Collins
Untested extensions to support repodetails
620
    else:
621
        raise AssertionError("Unknown node type.")