/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
17
"""Persistent maps from tuple_of_strings->string using CHK stores.
18
19
Overview and current status:
20
21
The CHKMap class implements a dict from tuple_of_strings->string by using a trie
22
with internal nodes of 8-bit fan out; The key tuples are mapped to strings by
23
joining them by \x00, and \x00 padding shorter keys out to the length of the
24
longest key. Leaf nodes are packed as densely as possible, and internal nodes
25
are all and additional 8-bits wide leading to a sparse upper tree. 
26
27
Updates to a CHKMap are done preferentially via the apply_delta method, to
28
allow optimisation of the update operation; but individual map/unmap calls are
29
possible and supported. All changes via map/unmap are buffered in memory until
30
the _save method is called to force serialisation of the tree. apply_delta
31
performs a _save implicitly.
32
33
TODO:
34
-----
35
36
Densely packed upper nodes.
37
38
"""
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
39
40
import osutils
41
42
43
class CHKMap(object):
44
    """A persistent map from string to string backed by a CHK store."""
45
46
    def __init__(self, store, root_key):
47
        """Create a CHKMap object.
48
49
        :param store: The store the CHKMap is stored in.
50
        :param root_key: The root key of the map. None to create an empty
51
            CHKMap.
52
        """
53
        self._store = store
54
        if root_key is None:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
55
            self._root_node = LeafNode()
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
56
        else:
57
            self._root_node = root_key
58
59
    def apply_delta(self, delta):
60
        """Apply a delta to the map.
61
62
        :param delta: An iterable of old_key, new_key, new_value tuples.
63
            If new_key is not None, then new_key->new_value is inserted
64
            into the map; if old_key is not None, then the old mapping
65
            of old_key is removed.
66
        """
67
        for old, new, value in delta:
68
            if old is not None and old != new:
69
                # unmap
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
70
                self.unmap(old)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
71
        for old, new, value in delta:
72
            if new is not None:
73
                # map
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
74
                self.map(new, value)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
75
        return self._save()
76
77
    def _ensure_root(self):
78
        """Ensure that the root node is an object not a key."""
79
        if type(self._root_node) == tuple:
80
            # Demand-load the root
81
            bytes = self._read_bytes(self._root_node)
82
            root_key = self._root_node
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
83
            self._root_node = _deserialise(bytes, root_key)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
84
85
    def _read_bytes(self, key):
86
        stream = self._store.get_record_stream([key], 'unordered', True)
87
        return stream.next().get_bytes_as('fulltext')
88
89
    @classmethod
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
90
    def from_dict(klass, store, initial_value, maximum_size=0):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
91
        """Create a CHKMap in store with initial_value as the content.
92
        
93
        :param store: The store to record initial_value in, a VersionedFiles
94
            object with 1-tuple keys supporting CHK key generation.
95
        :param initial_value: A dict to store in store. Its keys and values
96
            must be bytestrings.
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
97
        :param maximum_size: The maximum_size rule to apply to nodes. This
98
            determines the size at which no new data is added to a single node.
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
99
        :return: The root chk of te resulting CHKMap.
100
        """
101
        result = CHKMap(store, None)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
102
        result._root_node.set_maximum_size(maximum_size)
103
        delta = []
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
104
        for key, value in initial_value.items():
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
105
            delta.append((None, key, value))
106
        result.apply_delta(delta)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
107
        return result._save()
108
3735.2.9 by Robert Collins
Get a working chk_map using inventory implementation bootstrapped.
109
    def iteritems(self, key_filter=None):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
110
        """Iterate over the entire CHKMap's contents."""
111
        self._ensure_root()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
112
        return self._root_node.iteritems(self._store, key_filter=key_filter)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
113
3735.2.12 by Robert Collins
Implement commit-via-deltas for split inventory repositories.
114
    def key(self):
115
        """Return the key for this map."""
116
        if isinstance(self._root_node, tuple):
117
            return self._root_node
118
        else:
119
            return self._root_node._key
120
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
121
    def __len__(self):
122
        self._ensure_root()
123
        return len(self._root_node)
124
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
125
    def map(self, key, value):
126
        """Map a key tuple to value."""
127
        # Need a root object.
128
        self._ensure_root()
129
        prefix, node_details = self._root_node.map(self._store, key, value)
130
        if len(node_details) == 1:
131
            self._root_node = node_details[0][1]
132
        else:
133
            self._root_node = InternalNode()
134
            self._root_node.set_maximum_size(node_details[0][1].maximum_size)
135
            self._root_node._key_width = node_details[0][1]._key_width
136
            for split, node in node_details:
137
                self._root_node.add_node(split, node)
138
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
139
    def _map(self, key, value):
140
        """Map key to value."""
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
141
        # Ne
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
142
        self._ensure_root()
143
        # Store the value
144
        bytes = ValueNode(value).serialise()
3735.2.15 by Robert Collins
More direct implementation of fetch between different serializers.
145
        # Experimental code to probe for keys rather than just adding; its not
146
        # clear if it is an improvement.
147
        #chk = ("sha1:%s" % osutils.sha_string(bytes),)
148
        #if not self._store.get_parent_map([key]):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
149
        sha1, _, _ = self._store.add_lines((None,), (), osutils.split_lines(bytes))
3735.2.15 by Robert Collins
More direct implementation of fetch between different serializers.
150
        chk = ("sha1:" + sha1,)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
151
        # And link into the root
3735.2.15 by Robert Collins
More direct implementation of fetch between different serializers.
152
        self._root_node.add_child(key, chk)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
153
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
154
    def unmap(self, key):
155
        """remove key from the map."""
156
        self._ensure_root()
157
        self._root_node.unmap(self._store, key)
158
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
159
    def _unmap(self, key):
160
        """remove key from the map."""
161
        self._ensure_root()
162
        self._root_node.remove_child(key)
163
164
    def _save(self):
165
        """Save the map completely.
166
167
        :return: The key of the root node.
168
        """
169
        if type(self._root_node) == tuple:
170
            # Already saved.
171
            return self._root_node
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
172
        keys = list(self._root_node.serialise(self._store))
173
        return keys[-1]
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
174
175
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
176
class Node(object):
177
    """Base class defining the protocol for CHK Map nodes."""
178
179
    def __init__(self, key_width=1):
180
        """Create a node.
181
182
        :param key_width: The width of keys for this node.
183
        """
184
        self._key = None
185
        # Current number of elements
186
        self._len = 0
187
        self._maximum_size = 0
188
        self._key_width = 1
189
        # current size in bytes
190
        self._size = 0
191
        # The pointers/values this node has - meaning defined by child classes.
192
        self._items = {}
193
194
    def __len__(self):
195
        return self._len
196
197
    @property
198
    def maximum_size(self):
199
        """What is the upper limit for adding references to a node."""
200
        return self._maximum_size
201
202
    def set_maximum_size(self, new_size):
203
        """Set the size threshold for nodes.
204
205
        :param new_size: The size at which no data is added to a node. 0 for
206
            unlimited.
207
        """
208
        self._maximum_size = new_size
209
210
211
class LeafNode(Node):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
212
    """A node containing actual key:value pairs.
213
    
214
    :ivar _items: A dict of key->value items. The key is in tuple form.
215
    """
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
216
217
    def __init__(self):
218
        Node.__init__(self)
219
        # The size of a leaf node with default values and no children.
220
        self._size = 12
221
222
    def _current_size(self):
223
        """Answer the current serialised size of this node."""
224
        return (self._size + len(str(self._len)) + len(str(self._key_width)) +
225
            len(str(self._maximum_size)))
226
227
    @classmethod
228
    def deserialise(klass, bytes, key):
229
        """Deserialise bytes, with key key, into a LeafNode.
230
231
        :param bytes: The bytes of the node.
232
        :param key: The key that the serialised node has.
233
        """
234
        result = LeafNode()
235
        lines = bytes.splitlines()
236
        items = {}
237
        if lines[0] != 'chkleaf:':
238
            raise ValueError("not a serialised leaf node: %r" % bytes)
239
        maximum_size = int(lines[1])
240
        width = int(lines[2])
241
        length = int(lines[3])
242
        for line in lines[4:]:
243
            elements = line.split('\x00')
244
            items[tuple(elements[:-1])] = elements[-1]
245
        if len(items) != length:
246
            raise AssertionError("item count mismatch")
247
        result._items = items
248
        result._len = length
249
        result._maximum_size = maximum_size
250
        result._key = key
251
        result._key_width = width
252
        result._size = len(bytes)
253
        return result
254
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
255
    def iteritems(self, store, key_filter=None):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
256
        if key_filter is not None:
257
            for item in self._items.iteritems():
258
                if item[0] in key_filter:
259
                    yield item
260
        else:
261
            for item in self._items.iteritems():
262
                yield item
263
264
    def key(self):
265
        return self._key
266
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
267
    def _key_count(self, parent_width, cut_width):
268
        """Return the number of keys in/under this node between two widths.
269
270
        :param parent_width: The start offset in keys to consider.
271
        :param cut_width: The width to stop considering at.
272
        """
273
        # This assumes the keys are unique up to parent_width.
274
        serialised_keys = set()
275
        for key in self._items:
276
            serialised_key = '\x00'.join(key)
277
            serialised_keys.add(serialised_key[parent_width:cut_width])
278
        return len(serialised_keys)
279
280
    def map(self, store, key, value):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
281
        """Map key to value."""
282
        if key in self._items:
283
            self._size -= 2 + len('\x00'.join(key)) + len(self._items[key])
284
            self._len -= 1
285
        self._items[key] = value
286
        self._size += 2 + len('\x00'.join(key)) + len(value)
287
        self._len += 1
288
        self._key = None
289
        if (self._maximum_size and self._current_size() > self._maximum_size and
290
            self._len > 1):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
291
            common_prefix = self.unique_serialised_prefix()
292
            split_at = len(common_prefix) + 1
293
            result = {}
294
            for key, value in self._items.iteritems():
295
                serialised_key = self._serialised_key(key)
296
                prefix = serialised_key[:split_at]
297
                if prefix not in result:
298
                    node = LeafNode()
299
                    node.set_maximum_size(self._maximum_size)
300
                    node._key_width = self._key_width
301
                    result[prefix] = node
302
                else:
303
                    node = result[prefix]
304
                node.map(store, key, value)
305
            return common_prefix, result.items()
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
306
        else:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
307
            return self.unique_serialised_prefix(), [("", self)]
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
308
309
    def serialise(self, store):
310
        """Serialise the tree to store.
311
312
        :param store: A VersionedFiles honouring the CHK extensions.
313
        :return: An iterable of the keys inserted by this operation.
314
        """
315
        lines = ["chkleaf:\n"]
316
        lines.append("%d\n" % self._maximum_size)
317
        lines.append("%d\n" % self._key_width)
318
        lines.append("%d\n" % self._len)
319
        for key, value in sorted(self._items.items()):
320
            lines.append("%s\x00%s\n" % ('\x00'.join(key), value))
321
        sha1, _, _ = store.add_lines((None,), (), lines)
322
        self._key = ("sha1:" + sha1,)
323
        return [self._key]
324
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
325
    def _serialised_key(self, key):
326
        """Return the serialised key for key in this node."""
327
        return '\x00'.join(key)
328
329
    def unique_serialised_prefix(self):
330
        """Return the unique key prefix for this node.
331
332
        :return: A bytestring of the longest serialised key prefix that is
333
            unique within this node.
334
        """
335
        # may want to cache this eventually :- but wait for enough
336
        # functionality to profile.
337
        keys = list(self._items.keys())
338
        if not keys:
339
            return ""
340
        current_prefix = self._serialised_key(keys.pop(-1))
341
        while current_prefix and keys:
342
            next_key = self._serialised_key(keys.pop(-1))
343
            for pos, (left, right) in enumerate(zip(current_prefix, next_key)):
344
                if left != right:
345
                    pos -= 1
346
                    break
347
            current_prefix = current_prefix[:pos + 1]
348
        return current_prefix
349
350
    def unmap(self, store, key):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
351
        """Unmap key from the node."""
352
        self._size -= 2 + len('\x00'.join(key)) + len(self._items[key])
353
        self._len -= 1
354
        del self._items[key]
355
        self._key = None
356
        return self
357
358
359
class InternalNode(Node):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
360
    """A node that contains references to other nodes.
361
    
362
    An InternalNode is responsible for mapping serialised key prefixes to child
363
    nodes. It is greedy - it will defer splitting itself as long as possible.
364
    """
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
365
366
    def __init__(self):
367
        Node.__init__(self)
368
        # The size of an internalnode with default values and no children.
369
        # self._size = 12
370
        # How many octets key prefixes within this node are.
371
        self._node_width = 0
372
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
373
    def add_node(self, prefix, node):
374
        """Add a child node with prefix prefix, and node node.
375
376
        :param prefix: The serialised key prefix for node.
377
        :param node: The node being added.
378
        """
379
        self._len += len(node)
380
        if not len(self._items):
381
            self._node_width = len(prefix)
382
        self._items[prefix] = node
383
        self._key = None
384
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
385
    def _add_node(self, node):
386
        """Add a node to the InternalNode.
387
388
        :param node: An existing node to add. The node will be examined to see
389
            if it is over or undersized and rebalanced if needed across this
390
            nodes children.
391
        """
392
        if self._len == 0:
393
            # new tree level, we're being populated by upspill from a overfull
394
            # tree.
395
            # Cheap-to-code-but-slow?
396
            elements = {}
397
            max_width = 0
398
            # suck in all the values
399
            for key, value in node.iteritems():
400
                # We work on the serialised keys
401
                serialised_key = '\x00'.join(key)
402
                elements[serialised_key] = (key, value)
403
                max_width = max(len(serialised_key), max_width)
404
            # Determine the maximum common key width we will internally handle.
405
            # Start with the full key width; if that exceeds our node size
406
            # shrink it until we are within the node limit.
407
            self._node_width = max_width
408
            width = self._node_width
409
            # Populate all the resulting keys:
410
            items = self._items
411
            for serialised_key, key_value in elements.iteritems():
412
                actual_key = self._serialised_key(key_value[0])
413
                child = items.get(actual_key, None)
414
                if not child:
415
                    child = LeafNode()
416
                    child.set_maximum_size(self._maximum_size)
417
                    child._key_width = self._key_width
418
                    items[actual_key] = child
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
419
                child.map(store, key_value[0], key_value[1])
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
420
                self._len += 1
421
        else:
422
            raise NotImplementedError(self._add_node)
423
424
    def _current_size(self):
425
        """Answer the current serialised size of this node."""
426
        return (self._size + len(str(self._len)) + len(str(self._key_width)) +
427
            len(str(self._maximum_size)))
428
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
429
    @classmethod
430
    def deserialise(klass, bytes, key):
431
        """Deseriaise bytes to an InternalNode, with key key.
432
433
        :param bytes: The bytes of the node.
434
        :param key: The key that the serialised node has.
435
        :return: An InternalNode instance.
436
        """
437
        result = InternalNode()
438
        lines = bytes.splitlines()
439
        items = {}
440
        if lines[0] != 'chknode:':
441
            raise ValueError("not a serialised internal node: %r" % bytes)
442
        maximum_size = int(lines[1])
443
        width = int(lines[2])
444
        length = int(lines[3])
445
        for line in lines[4:]:
446
            prefix, flat_key = line.rsplit('\x00', 1)
447
            items[prefix] = (flat_key,)
448
        result._items = items
449
        result._len = length
450
        result._maximum_size = maximum_size
451
        result._key = key
452
        result._key_width = width
453
        result._size = len(bytes)
454
        result._node_width = len(prefix)
455
        return result
456
457
    def iteritems(self, store, key_filter=None):
458
        for node in self._iter_nodes(store, key_filter=key_filter):
459
            for item in node.iteritems(store, key_filter=key_filter):
460
                yield item
461
462
    def _iter_nodes(self, store, key_filter=None):
463
        """Iterate over node objects which match key_filter.
464
465
        :param store: A store to use for accessing content.
466
        :param key_filter: A key filter to filter nodes. Only nodes that might
467
            contain a key in key_filter will be returned.
468
        :return: An iterable of nodes.
469
        """
470
        nodes = []
471
        keys = set()
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
472
        if key_filter is None:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
473
            for node in self._items.itervalues():
474
                if type(node) == tuple:
475
                    keys.add(node)
476
                else:
477
                    nodes.append(node)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
478
        else:
479
            serialised_filter = set([self._serialised_key(key) for key in
480
                key_filter])
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
481
            for prefix, node in self._items.iteritems():
482
                if prefix in serialised_filter:
483
                    if type(node) == tuple:
484
                        keys.add(node)
485
                    else:
486
                        nodes.append(node)
487
        if keys:
488
            # demand load some pages.
489
            stream = store.get_record_stream(keys, 'unordered', True)
490
            for record in stream:
491
                node = _deserialise(record.get_bytes_as('fulltext'), record.key)
492
                nodes.append(node)
493
        return nodes
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
494
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
495
    def map(self, store, key, value):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
496
        """Map key to value."""
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
497
        if not len(self._items):
498
            raise AssertionError("cant map in an empty InternalNode.")
499
        children = self._iter_nodes(store, key_filter=[key])
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
500
        serialised_key = self._serialised_key(key)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
501
        if children:
502
            child = children[0]
503
        else:
504
            # new child needed:
505
            child = self._new_child(serialised_key, LeafNode)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
506
        old_len = len(child)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
507
        prefix, node_details = child.map(store, key, value)
508
        if len(node_details) == 1:
509
            # child may have shrunk, or might be the same.
510
            self._len = self._len - old_len + len(child)
511
            self._items[serialised_key] = child
512
            return self.unique_serialised_prefix(), [("", self)]
513
        # child has overflown - create a new intermediate node.
514
        # XXX: This is where we might want to try and expand our depth
515
        # to refer to more bytes of every child (which would give us
516
        # multiple pointers to child nodes, but less intermediate nodes)
517
        child = self._new_child(serialised_key, InternalNode)
518
        for split, node in node_details:
519
            child.add_node(split, node)
520
        self._len = self._len - old_len + len(child)
521
        return self.unique_serialised_prefix(), [("", self)]
522
523
    def _new_child(self, serialised_key, klass):
524
        """Create a new child node of type klass."""
525
        child = klass()
526
        child.set_maximum_size(self._maximum_size)
527
        child._key_width = self._key_width
528
        self._items[serialised_key] = child
529
        return child
530
531
    def serialise(self, store):
532
        """Serialise the node to store.
533
534
        :param store: A VersionedFiles honouring the CHK extensions.
535
        :return: An iterable of the keys inserted by this operation.
536
        """
537
        for node in self._items.itervalues():
538
            if type(node) == tuple:
539
                # Never deserialised.
540
                continue
541
            if node._key is not None:
542
                # Never altered
543
                continue
544
            for key in node.serialise(store):
545
                yield key
546
        lines = ["chknode:\n"]
547
        lines.append("%d\n" % self._maximum_size)
548
        lines.append("%d\n" % self._key_width)
549
        lines.append("%d\n" % self._len)
550
        for prefix, node in sorted(self._items.items()):
551
            if type(node) == tuple:
552
                key = node[0]
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
553
            else:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
554
                key = node._key[0]
555
            lines.append("%s\x00%s\n" % (prefix, key))
556
        sha1, _, _ = store.add_lines((None,), (), lines)
557
        self._key = ("sha1:" + sha1,)
558
        yield self._key
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
559
560
    def _serialised_key(self, key):
561
        """Return the serialised key for key in this node."""
562
        return ('\x00'.join(key) + '\x00'*self._node_width)[:self._node_width]
563
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
564
    def _split(self, offset):
565
        """Split this node into smaller nodes starting at offset.
566
567
        :param offset: The offset to start the new child nodes at.
568
        :return: An iterable of (prefix, node) tuples. prefix is a byte
569
            prefix for reaching node.
570
        """
571
        if offset >= self._node_width:
572
            for node in self._items.values():
573
                for result in node._split(offset):
574
                    yield result
575
            return
576
        for key, node in self._items.items():
577
            pass
578
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
579
    def _key_count(self, parent_width, cut_width):
580
        """Return the number of keys in/under this node between two widths.
581
582
        :param parent_width: The start offset in keys to consider.
583
        :param cut_width: The width to stop considering at.
584
        """
585
        if cut_width > self._node_width:
586
            raise NotImplementedError(self._key_count)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
587
        # This assumes the keys are unique up to parent_width.
588
        serialised_keys = set()
589
        for serialised_key in self._items:
590
            serialised_keys.add(serialised_key[parent_width:cut_width])
591
        return len(serialised_keys)
592
593
    def _prelude_size(self):
594
        """Return the size of the node prelude."""
595
        return 15
596
597
    def unique_serialised_prefix(self):
598
        """Return the unique key prefix for this node.
599
600
        :return: A bytestring of the longest serialised key prefix that is
601
            unique within this node.
602
        """
603
        # may want to cache this eventually :- but wait for enough
604
        # functionality to profile.
605
        keys = list(self._items.keys())
606
        if not keys:
607
            return ""
608
        current_prefix = keys.pop(-1)
609
        while current_prefix and keys:
610
            next_key = keys.pop(-1)
611
            for pos, (left, right) in enumerate(zip(current_prefix, next_key)):
612
                if left != right:
613
                    pos -= 1
614
                    break
615
            current_prefix = current_prefix[:pos + 1]
616
        return current_prefix
617
618
    def unmap(self, store, key):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
619
        """Remove key from this node and it's children."""
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
620
        if not len(self._items):
621
            raise AssertionError("cant unmap in an empty InternalNode.")
622
        serialised_key = self._serialised_key(key)
623
        children = self._iter_nodes(store, key_filter=[key])
624
        serialised_key = self._serialised_key(key)
625
        if children:
626
            child = children[0]
627
        else:
628
            raise KeyError(key)
629
        self._len -= 1
630
        unmapped = child.unmap(store, key)
631
        if len(unmapped) == 0:
632
            # All child nodes are gone, remove the child:
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
633
            del self._items[serialised_key]
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
634
        else:
635
            # Stash the returned node
636
            self._items[serialised_key] = unmapped
637
        if len(self) == 1:
638
            # this node is no longer needed:
639
            return self._items.values()[0]
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
640
        return self
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
641
        prefix, node_details = child.map(store, key, value)
642
        if len(node_details) == 1:
643
            # child may have shrunk, or might be the same.
644
            self._len = self._len - old_len + len(child)
645
            self._items[serialised_key] = child
646
            return self.unique_serialised_prefix(), [("", self)]
647
        # child has overflown - create a new intermediate node.
648
        # XXX: This is where we might want to try and expand our depth
649
        # to refer to more bytes of every child (which would give us
650
        # multiple pointers to child nodes, but less intermediate nodes)
651
        child = self._new_child(serialised_key, InternalNode)
652
        for split, node in node_details:
653
            child.add_node(split, node)
654
        self._len = self._len - old_len + len(child)
655
        return self.unique_serialised_prefix(), [("", self)]
656
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
657
658
659
class RootNode(Node):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
660
    """A root node in a CHKMap."""
661
662
    def __init__(self):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
663
        Node.__init__(self)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
664
        self._nodes = {}
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
665
        self._size = 12
666
        self.prefix_width = 0
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
667
668
    def add_child(self, name, child):
669
        """Add a child to the node.
670
671
        If the node changes, it's key is reset.
672
673
        :param name: The name of the child. A bytestring.
674
        :param child: The child, a key tuple for the childs value.
675
        """
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
676
        if self._maximum_size and self._current_size() >= self._maximum_size:
677
            return False
678
        if name in self._nodes:
679
            self.remove_child(name)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
680
        self._nodes[name] = child
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
681
        self._len += 1
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
682
        self._key = None
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
683
        self._size += len(name) + len(child[0]) + 2
684
        return True
685
686
    def _current_size(self):
687
        """Answer the current serialised size of this node."""
688
        return (self._size + len(str(self._maximum_size)) + len(str(self._len))
689
            + len(str(self.prefix_width)))
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
690
691
    def deserialise(self, bytes, key):
692
        """Set the nodes value to that contained in bytes.
693
        
694
        :param bytes: The bytes of the node.
695
        :param key: The key that the serialised node has.
696
        """
697
        lines = bytes.splitlines()
698
        nodes = {}
699
        if lines[0] != 'chkroot:':
700
            raise ValueError("not a serialised root node: %r" % bytes)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
701
        maximum_size = int(lines[1])
702
        prefix_width = int(lines[2])
703
        length = int(lines[3])
704
        for line in lines[4:]:
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
705
            name, value = line.split('\x00')
706
            nodes[name] = (value,)
707
        self._nodes = nodes
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
708
        self._len = length
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
709
        self._maximum_size = maximum_size
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
710
        self._key = key
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
711
        self.prefix_width = prefix_width
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
712
3735.2.9 by Robert Collins
Get a working chk_map using inventory implementation bootstrapped.
713
    def refs(self):
714
        """Get the CHK key references this node holds."""
715
        return self._nodes.values()
716
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
717
    def remove_child(self, name):
718
        """Remove name from the node.
719
720
        If the node changes, it's key is reset.
721
722
        :param name: The name to remove from the node.
723
        """
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
724
        node = self._nodes.pop(name)
725
        self._size -= 2 + len(name) + len(node[0])
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
726
        self._len -= 1
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
727
        self._key = None
728
729
    def serialise(self):
730
        """Flatten the node to a bytestring.
731
732
        :return: A bytestring.
733
        """
734
        lines = ["chkroot:\n"]
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
735
        lines.append("%d\n" % self._maximum_size)
736
        lines.append("%d\n" % self.prefix_width)
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
737
        lines.append("%d\n" % self._len)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
738
        for name, child in sorted(self._nodes.items()):
739
            lines.append("%s\x00%s\n" % (name, child[0]))
740
        return "".join(lines)
741
742
743
class ValueNode(object):
744
    """A value in a CHKMap."""
745
746
    def __init__(self, value):
747
        """Create a ValueNode.
748
749
        :param value: The value of this node, must be a bytestring.
750
        """
751
        self.value = value
752
753
    @classmethod
754
    def deserialise(klass, bytes):
755
        """Get a ValueNode from a serialised bytestring.
756
        
757
        :param bytes: The bytes returned by an earlier serialisation.
758
        """
759
        if not bytes.startswith("chkvalue:\n"):
760
            raise ValueError("not a chkvalue %r" % bytes)
761
        return ValueNode(bytes[10:])
762
763
    def serialise(self):
764
        """Flatten the value to a bytestring.
765
766
        :return: A bytestring.
767
        """
768
        return "chkvalue:\n" + self.value
3735.2.16 by Robert Collins
Untested extensions to support repodetails
769
770
    def refs(self):
771
        """ValueNodes have no refs within the dict."""
772
        return []
773
774
775
def _deserialise(bytes, key):
776
    """Helper for repositorydetails - convert bytes to a node."""
777
    if bytes.startswith("chkvalue:\n"):
778
        return ValueNode.deserialise(bytes)
779
    elif bytes.startswith("chkroot:\n"):
780
        result = RootNode()
781
        result.deserialise(bytes, key)
782
        return result
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
783
    elif bytes.startswith("chkleaf:\n"):
784
        return LeafNode.deserialise(bytes, key)
785
    elif bytes.startswith("chknode:\n"):
786
        return InternalNode.deserialise(bytes, key)
3735.2.16 by Robert Collins
Untested extensions to support repodetails
787
    else:
788
        raise AssertionError("Unknown node type.")