/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
17
"""Persistent maps from tuple_of_strings->string using CHK stores.
18
19
Overview and current status:
20
21
The CHKMap class implements a dict from tuple_of_strings->string by using a trie
22
with internal nodes of 8-bit fan out; The key tuples are mapped to strings by
23
joining them by \x00, and \x00 padding shorter keys out to the length of the
24
longest key. Leaf nodes are packed as densely as possible, and internal nodes
25
are all and additional 8-bits wide leading to a sparse upper tree. 
26
27
Updates to a CHKMap are done preferentially via the apply_delta method, to
28
allow optimisation of the update operation; but individual map/unmap calls are
29
possible and supported. All changes via map/unmap are buffered in memory until
30
the _save method is called to force serialisation of the tree. apply_delta
31
performs a _save implicitly.
32
33
TODO:
34
-----
35
36
Densely packed upper nodes.
37
38
"""
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
39
3735.2.31 by Robert Collins
CHKMap.iter_changes
40
import heapq
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
41
import osutils
42
43
44
class CHKMap(object):
45
    """A persistent map from string to string backed by a CHK store."""
46
47
    def __init__(self, store, root_key):
48
        """Create a CHKMap object.
49
50
        :param store: The store the CHKMap is stored in.
51
        :param root_key: The root key of the map. None to create an empty
52
            CHKMap.
53
        """
54
        self._store = store
55
        if root_key is None:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
56
            self._root_node = LeafNode()
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
57
        else:
58
            self._root_node = root_key
59
60
    def apply_delta(self, delta):
61
        """Apply a delta to the map.
62
63
        :param delta: An iterable of old_key, new_key, new_value tuples.
64
            If new_key is not None, then new_key->new_value is inserted
65
            into the map; if old_key is not None, then the old mapping
66
            of old_key is removed.
67
        """
68
        for old, new, value in delta:
69
            if old is not None and old != new:
70
                # unmap
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
71
                self.unmap(old)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
72
        for old, new, value in delta:
73
            if new is not None:
74
                # map
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
75
                self.map(new, value)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
76
        return self._save()
77
78
    def _ensure_root(self):
79
        """Ensure that the root node is an object not a key."""
80
        if type(self._root_node) == tuple:
81
            # Demand-load the root
3735.2.31 by Robert Collins
CHKMap.iter_changes
82
            self._root_node = self._get_node(self._root_node)
83
84
    def _get_node(self, node):
85
        """Get a node.
86
87
        Node that this does not update the _items dict in objects containing a
88
        reference to this node. As such it does not prevent subsequent IO being
89
        performed.
90
        
91
        :param node: A tuple key or node object.
92
        :return: A node object.
93
        """
94
        if type(node) == tuple:
95
            bytes = self._read_bytes(node)
96
            return _deserialise(bytes, node)
97
        else:
98
            return node
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
99
100
    def _read_bytes(self, key):
101
        stream = self._store.get_record_stream([key], 'unordered', True)
102
        return stream.next().get_bytes_as('fulltext')
103
104
    @classmethod
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
105
    def from_dict(klass, store, initial_value, maximum_size=0):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
106
        """Create a CHKMap in store with initial_value as the content.
107
        
108
        :param store: The store to record initial_value in, a VersionedFiles
109
            object with 1-tuple keys supporting CHK key generation.
110
        :param initial_value: A dict to store in store. Its keys and values
111
            must be bytestrings.
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
112
        :param maximum_size: The maximum_size rule to apply to nodes. This
113
            determines the size at which no new data is added to a single node.
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
114
        :return: The root chk of te resulting CHKMap.
115
        """
116
        result = CHKMap(store, None)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
117
        result._root_node.set_maximum_size(maximum_size)
118
        delta = []
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
119
        for key, value in initial_value.items():
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
120
            delta.append((None, key, value))
121
        result.apply_delta(delta)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
122
        return result._save()
123
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
124
    def iter_changes(self, basis):
125
        """Iterate over the changes between basis and self.
126
127
        :return: An iterator of tuples: (key, old_value, new_value). Old_value
128
            is None for keys only in self; new_value is None for keys only in
129
            basis.
130
        """
3735.2.31 by Robert Collins
CHKMap.iter_changes
131
        # Overview:
132
        # Read both trees in lexographic, highest-first order.
133
        # Any identical nodes we skip
134
        # Any unique prefixes we output immediately.
135
        # values in a leaf node are treated as single-value nodes in the tree
136
        # which allows them to be not-special-cased. We know to output them
137
        # because their value is a string, not a key(tuple) or node.
138
        #
139
        # corner cases to beware of when considering this function:
140
        # *) common references are at different heights.
141
        #    consider two trees:
142
        #    {'a': LeafNode={'aaa':'foo', 'aab':'bar'}, 'b': LeafNode={'b'}}
143
        #    {'a': InternalNode={'aa':LeafNode={'aaa':'foo', 'aab':'bar'}, 'ab':LeafNode={'ab':'bar'}}
144
        #     'b': LeafNode={'b'}}
145
        #    the node with aaa/aab will only be encountered in the second tree
146
        #    after reading the 'a' subtree, but it is encountered in the first
147
        #    tree immediately. Variations on this may have read internal nodes like this.
148
        #    we want to cut the entire pending subtree when we realise we have a common node.
149
        #    For this we use a list of keys - the path to a node - and check the entire path is 
150
        #    clean as we process each item.
151
        if self._node_key(self._root_node) == self._node_key(basis._root_node):
152
            return
153
        self._ensure_root()
154
        basis._ensure_root()
155
        excluded_keys = set()
156
        self_node = self._root_node
157
        basis_node = basis._root_node
158
        # A heap, each element is prefix, node(tuple/NodeObject/string),
159
        # key_path (a list of tuples, tail-sharing down the tree.)
160
        self_pending = []
161
        basis_pending = []
162
        def process_node(prefix, node, path, a_map, pending):
163
            # take a node and expand it
164
            node = a_map._get_node(node)
165
            if type(node) == LeafNode:
166
                path = (node._key, path)
167
                for key, value in node._items.items():
168
                    heapq.heappush(pending, ('\x00'.join(key), value, path))
169
            else:
170
                # type(node) == InternalNode
171
                path = (node._key, path)
172
                for prefix, child in node._items.items():
173
                    heapq.heappush(pending, (prefix, child, path))
174
        process_node(None, self_node, None, self, self_pending)
175
        process_node(None, basis_node, None, basis, basis_pending)
176
        self_seen = set()
177
        basis_seen = set()
178
        excluded_keys = set()
179
        def check_excluded(key_path):
180
            # Note that this is N^2, it depends on us trimming trees
181
            # aggressively to not become slow.
182
            # A better implementation would probably have a reverse map
183
            # back to the children of a node, and jump straight to it when 
184
            # a common node is detected, the proceed to remove the already
185
            # pending children. bzrlib.graph has a searcher module with a
186
            # similar problem.
187
            while key_path is not None:
188
                key, key_path = key_path
189
                if key in excluded_keys:
190
                    return True
191
            return False
192
3735.2.32 by Robert Collins
Activate test for common node skipping. - 50 times performance improvement.
193
        loop_counter = 0
3735.2.31 by Robert Collins
CHKMap.iter_changes
194
        while self_pending or basis_pending:
3735.2.32 by Robert Collins
Activate test for common node skipping. - 50 times performance improvement.
195
            loop_counter += 1
3735.2.31 by Robert Collins
CHKMap.iter_changes
196
            if not self_pending:
197
                # self is exhausted: output remainder of basis
198
                for prefix, node, path in basis_pending:
199
                    if check_excluded(path):
200
                        continue
201
                    node = basis._get_node(node)
202
                    if type(node) == str:
203
                        # a value
204
                        yield (tuple(prefix.split('\x00')), node, None)
205
                    else:
206
                        # subtree - fastpath the entire thing.
207
                        for key, value in node.iteritems(basis._store):
208
                            yield (key, value, None)
209
                return
210
            elif not basis_pending:
211
                # basis is exhausted: output remainder of self.
212
                for prefix, node, path in self_pending:
213
                    if check_excluded(path):
214
                        continue
215
                    node = self._get_node(node)
216
                    excluded_keys.add(node._key)
217
                    if type(node) == str:
218
                        # a value
219
                        yield (tuple(prefix.split('\x00')), None, node)
220
                    else:
221
                        # subtree - fastpath the entire thing.
222
                        for key, value in node.iteritems(self._store):
223
                            yield (key, None, value)
224
                return
225
            else:
226
                # XXX: future optimisation - yield the smaller items
227
                # immediately rather than pushing everything on/off the
228
                # heaps. Applies to both internal nodes and leafnodes.
229
                if self_pending[0][0] < basis_pending[0][0]:
230
                    # expand self
231
                    prefix, node, path = heapq.heappop(self_pending)
232
                    if check_excluded(path):
233
                        continue
234
                    if type(node) == str:
235
                        # a value
236
                        yield (tuple(prefix.split('\x00')), None, node)
237
                    else:
238
                        process_node(prefix, node, path, self, self_pending)
239
                        continue
240
                elif self_pending[0][0] > basis_pending[0][0]:
241
                    # expand basis
242
                    prefix, node, path = heapq.heappop(basis_pending)
243
                    if check_excluded(path):
244
                        continue
245
                    if type(node) == str:
246
                        # a value
247
                        yield (tuple(prefix.split('\x00')), node, None)
248
                    else:
249
                        process_node(prefix, node, path, basis, basis_pending)
250
                        continue
251
                else:
252
                    # common prefix: possibly expand both
253
                    if type(self_pending[0][1]) != str:
254
                        # process next self
255
                        read_self = True
256
                    else:
257
                        read_self = False
258
                    if type(basis_pending[0][1]) != str:
259
                        # process next basis
260
                        read_basis = True
261
                    else:
262
                        read_basis = False
263
                    if not read_self and not read_basis:
264
                        # compare a common value
265
                        self_details = heapq.heappop(self_pending)
266
                        basis_details = heapq.heappop(basis_pending)
267
                        if self_details[1] != basis_details[1]:
268
                            yield (tuple(self_details[0].split('\x00')),
269
                                basis_details[1], self_details[1])
270
                        continue
3735.2.32 by Robert Collins
Activate test for common node skipping. - 50 times performance improvement.
271
                    # At least one side wasn't a string.
272
                    if (self._node_key(self_pending[0][1]) ==
273
                        self._node_key(basis_pending[0][1])):
274
                        # Identical pointers, skip (and don't bother adding to
275
                        # excluded, it won't turn up again.
276
                        heapq.heappop(self_pending)
277
                        heapq.heappop(basis_pending)
278
                        continue
279
                    # Now we need to expand this node before we can continue
3735.2.31 by Robert Collins
CHKMap.iter_changes
280
                    if read_self:
281
                        prefix, node, path = heapq.heappop(self_pending)
282
                        if check_excluded(path):
283
                            continue
284
                        process_node(prefix, node, path, self, self_pending)
285
                    if read_basis:
286
                        prefix, node, path = heapq.heappop(basis_pending)
287
                        if check_excluded(path):
288
                            continue
289
                        process_node(prefix, node, path, basis, basis_pending)
3735.2.32 by Robert Collins
Activate test for common node skipping. - 50 times performance improvement.
290
        # print loop_counter
3735.2.30 by Robert Collins
Start iter_changes between CHKMap instances.
291
3735.2.9 by Robert Collins
Get a working chk_map using inventory implementation bootstrapped.
292
    def iteritems(self, key_filter=None):
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
293
        """Iterate over the entire CHKMap's contents."""
294
        self._ensure_root()
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
295
        return self._root_node.iteritems(self._store, key_filter=key_filter)
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
296
3735.2.12 by Robert Collins
Implement commit-via-deltas for split inventory repositories.
297
    def key(self):
298
        """Return the key for this map."""
299
        if isinstance(self._root_node, tuple):
300
            return self._root_node
301
        else:
302
            return self._root_node._key
303
3735.2.17 by Robert Collins
Cache node length to avoid full iteration on __len__ calls.
304
    def __len__(self):
305
        self._ensure_root()
306
        return len(self._root_node)
307
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
308
    def map(self, key, value):
309
        """Map a key tuple to value."""
310
        # Need a root object.
311
        self._ensure_root()
312
        prefix, node_details = self._root_node.map(self._store, key, value)
313
        if len(node_details) == 1:
314
            self._root_node = node_details[0][1]
315
        else:
316
            self._root_node = InternalNode()
317
            self._root_node.set_maximum_size(node_details[0][1].maximum_size)
318
            self._root_node._key_width = node_details[0][1]._key_width
319
            for split, node in node_details:
320
                self._root_node.add_node(split, node)
321
3735.2.31 by Robert Collins
CHKMap.iter_changes
322
    def _node_key(self, node):
323
        """Get the key for a node whether its a tuple o r node."""
324
        if type(node) == tuple:
325
            return node
326
        else:
327
            return node._key
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
328
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
329
    def unmap(self, key):
330
        """remove key from the map."""
331
        self._ensure_root()
332
        self._root_node.unmap(self._store, key)
333
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
334
    def _save(self):
335
        """Save the map completely.
336
337
        :return: The key of the root node.
338
        """
339
        if type(self._root_node) == tuple:
340
            # Already saved.
341
            return self._root_node
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
342
        keys = list(self._root_node.serialise(self._store))
343
        return keys[-1]
3735.2.8 by Robert Collins
New chk_map module for use in tree based inventory storage.
344
345
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
346
class Node(object):
347
    """Base class defining the protocol for CHK Map nodes."""
348
349
    def __init__(self, key_width=1):
350
        """Create a node.
351
352
        :param key_width: The width of keys for this node.
353
        """
354
        self._key = None
355
        # Current number of elements
356
        self._len = 0
357
        self._maximum_size = 0
358
        self._key_width = 1
359
        # current size in bytes
360
        self._size = 0
361
        # The pointers/values this node has - meaning defined by child classes.
362
        self._items = {}
363
364
    def __len__(self):
365
        return self._len
366
367
    @property
368
    def maximum_size(self):
369
        """What is the upper limit for adding references to a node."""
370
        return self._maximum_size
371
372
    def set_maximum_size(self, new_size):
373
        """Set the size threshold for nodes.
374
375
        :param new_size: The size at which no data is added to a node. 0 for
376
            unlimited.
377
        """
378
        self._maximum_size = new_size
379
380
381
class LeafNode(Node):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
382
    """A node containing actual key:value pairs.
383
    
384
    :ivar _items: A dict of key->value items. The key is in tuple form.
385
    """
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
386
387
    def __init__(self):
388
        Node.__init__(self)
389
        # The size of a leaf node with default values and no children.
390
        self._size = 12
391
392
    def _current_size(self):
393
        """Answer the current serialised size of this node."""
394
        return (self._size + len(str(self._len)) + len(str(self._key_width)) +
395
            len(str(self._maximum_size)))
396
397
    @classmethod
398
    def deserialise(klass, bytes, key):
399
        """Deserialise bytes, with key key, into a LeafNode.
400
401
        :param bytes: The bytes of the node.
402
        :param key: The key that the serialised node has.
403
        """
404
        result = LeafNode()
405
        lines = bytes.splitlines()
406
        items = {}
407
        if lines[0] != 'chkleaf:':
408
            raise ValueError("not a serialised leaf node: %r" % bytes)
409
        maximum_size = int(lines[1])
410
        width = int(lines[2])
411
        length = int(lines[3])
412
        for line in lines[4:]:
3735.2.25 by Robert Collins
CHKInventory core tests passing.
413
            elements = line.split('\x00', width)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
414
            items[tuple(elements[:-1])] = elements[-1]
415
        if len(items) != length:
416
            raise AssertionError("item count mismatch")
417
        result._items = items
418
        result._len = length
419
        result._maximum_size = maximum_size
420
        result._key = key
421
        result._key_width = width
422
        result._size = len(bytes)
423
        return result
424
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
425
    def iteritems(self, store, key_filter=None):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
426
        if key_filter is not None:
427
            for item in self._items.iteritems():
428
                if item[0] in key_filter:
429
                    yield item
430
        else:
431
            for item in self._items.iteritems():
432
                yield item
433
434
    def key(self):
435
        return self._key
436
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
437
    def map(self, store, key, value):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
438
        """Map key to value."""
439
        if key in self._items:
440
            self._size -= 2 + len('\x00'.join(key)) + len(self._items[key])
441
            self._len -= 1
442
        self._items[key] = value
443
        self._size += 2 + len('\x00'.join(key)) + len(value)
444
        self._len += 1
445
        self._key = None
446
        if (self._maximum_size and self._current_size() > self._maximum_size and
447
            self._len > 1):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
448
            common_prefix = self.unique_serialised_prefix()
449
            split_at = len(common_prefix) + 1
450
            result = {}
451
            for key, value in self._items.iteritems():
452
                serialised_key = self._serialised_key(key)
453
                prefix = serialised_key[:split_at]
454
                if prefix not in result:
455
                    node = LeafNode()
456
                    node.set_maximum_size(self._maximum_size)
457
                    node._key_width = self._key_width
458
                    result[prefix] = node
459
                else:
460
                    node = result[prefix]
461
                node.map(store, key, value)
462
            return common_prefix, result.items()
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
463
        else:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
464
            return self.unique_serialised_prefix(), [("", self)]
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
465
466
    def serialise(self, store):
467
        """Serialise the tree to store.
468
469
        :param store: A VersionedFiles honouring the CHK extensions.
470
        :return: An iterable of the keys inserted by this operation.
471
        """
472
        lines = ["chkleaf:\n"]
473
        lines.append("%d\n" % self._maximum_size)
474
        lines.append("%d\n" % self._key_width)
475
        lines.append("%d\n" % self._len)
476
        for key, value in sorted(self._items.items()):
477
            lines.append("%s\x00%s\n" % ('\x00'.join(key), value))
478
        sha1, _, _ = store.add_lines((None,), (), lines)
479
        self._key = ("sha1:" + sha1,)
480
        return [self._key]
481
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
482
    def _serialised_key(self, key):
483
        """Return the serialised key for key in this node."""
484
        return '\x00'.join(key)
485
3735.2.26 by Robert Collins
CHKInventory migrated to new CHKMap code.
486
    def refs(self):
487
        """Return the references to other CHK's held by this node."""
488
        return []
489
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
490
    def unique_serialised_prefix(self):
491
        """Return the unique key prefix for this node.
492
493
        :return: A bytestring of the longest serialised key prefix that is
494
            unique within this node.
495
        """
496
        # may want to cache this eventually :- but wait for enough
497
        # functionality to profile.
498
        keys = list(self._items.keys())
499
        if not keys:
500
            return ""
501
        current_prefix = self._serialised_key(keys.pop(-1))
502
        while current_prefix and keys:
503
            next_key = self._serialised_key(keys.pop(-1))
504
            for pos, (left, right) in enumerate(zip(current_prefix, next_key)):
505
                if left != right:
506
                    pos -= 1
507
                    break
508
            current_prefix = current_prefix[:pos + 1]
509
        return current_prefix
510
511
    def unmap(self, store, key):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
512
        """Unmap key from the node."""
513
        self._size -= 2 + len('\x00'.join(key)) + len(self._items[key])
514
        self._len -= 1
515
        del self._items[key]
516
        self._key = None
517
        return self
518
519
520
class InternalNode(Node):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
521
    """A node that contains references to other nodes.
522
    
523
    An InternalNode is responsible for mapping serialised key prefixes to child
524
    nodes. It is greedy - it will defer splitting itself as long as possible.
525
    """
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
526
527
    def __init__(self):
528
        Node.__init__(self)
529
        # The size of an internalnode with default values and no children.
530
        # self._size = 12
531
        # How many octets key prefixes within this node are.
532
        self._node_width = 0
533
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
534
    def add_node(self, prefix, node):
535
        """Add a child node with prefix prefix, and node node.
536
537
        :param prefix: The serialised key prefix for node.
538
        :param node: The node being added.
539
        """
540
        self._len += len(node)
541
        if not len(self._items):
542
            self._node_width = len(prefix)
543
        self._items[prefix] = node
544
        self._key = None
545
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
546
    def _current_size(self):
547
        """Answer the current serialised size of this node."""
548
        return (self._size + len(str(self._len)) + len(str(self._key_width)) +
549
            len(str(self._maximum_size)))
550
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
551
    @classmethod
552
    def deserialise(klass, bytes, key):
3735.2.25 by Robert Collins
CHKInventory core tests passing.
553
        """Deserialise bytes to an InternalNode, with key key.
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
554
555
        :param bytes: The bytes of the node.
556
        :param key: The key that the serialised node has.
557
        :return: An InternalNode instance.
558
        """
559
        result = InternalNode()
560
        lines = bytes.splitlines()
561
        items = {}
562
        if lines[0] != 'chknode:':
563
            raise ValueError("not a serialised internal node: %r" % bytes)
564
        maximum_size = int(lines[1])
565
        width = int(lines[2])
566
        length = int(lines[3])
567
        for line in lines[4:]:
568
            prefix, flat_key = line.rsplit('\x00', 1)
569
            items[prefix] = (flat_key,)
570
        result._items = items
571
        result._len = length
572
        result._maximum_size = maximum_size
573
        result._key = key
574
        result._key_width = width
575
        result._size = len(bytes)
576
        result._node_width = len(prefix)
577
        return result
578
579
    def iteritems(self, store, key_filter=None):
580
        for node in self._iter_nodes(store, key_filter=key_filter):
581
            for item in node.iteritems(store, key_filter=key_filter):
582
                yield item
583
584
    def _iter_nodes(self, store, key_filter=None):
585
        """Iterate over node objects which match key_filter.
586
587
        :param store: A store to use for accessing content.
588
        :param key_filter: A key filter to filter nodes. Only nodes that might
589
            contain a key in key_filter will be returned.
590
        :return: An iterable of nodes.
591
        """
592
        nodes = []
3735.2.31 by Robert Collins
CHKMap.iter_changes
593
        keys = {}
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
594
        if key_filter is None:
3735.2.31 by Robert Collins
CHKMap.iter_changes
595
            for prefix, node in self._items.iteritems():
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
596
                if type(node) == tuple:
3735.2.31 by Robert Collins
CHKMap.iter_changes
597
                    keys[node] = prefix
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
598
                else:
599
                    nodes.append(node)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
600
        else:
601
            serialised_filter = set([self._serialised_key(key) for key in
602
                key_filter])
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
603
            for prefix, node in self._items.iteritems():
604
                if prefix in serialised_filter:
605
                    if type(node) == tuple:
3735.2.31 by Robert Collins
CHKMap.iter_changes
606
                        keys[node] = prefix
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
607
                    else:
608
                        nodes.append(node)
609
        if keys:
610
            # demand load some pages.
611
            stream = store.get_record_stream(keys, 'unordered', True)
612
            for record in stream:
613
                node = _deserialise(record.get_bytes_as('fulltext'), record.key)
614
                nodes.append(node)
3735.2.31 by Robert Collins
CHKMap.iter_changes
615
                self._items[keys[record.key]] = node
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
616
        return nodes
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
617
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
618
    def map(self, store, key, value):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
619
        """Map key to value."""
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
620
        if not len(self._items):
621
            raise AssertionError("cant map in an empty InternalNode.")
622
        children = self._iter_nodes(store, key_filter=[key])
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
623
        serialised_key = self._serialised_key(key)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
624
        if children:
625
            child = children[0]
626
        else:
627
            # new child needed:
628
            child = self._new_child(serialised_key, LeafNode)
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
629
        old_len = len(child)
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
630
        prefix, node_details = child.map(store, key, value)
631
        if len(node_details) == 1:
632
            # child may have shrunk, or might be the same.
633
            self._len = self._len - old_len + len(child)
634
            self._items[serialised_key] = child
3735.2.29 by Robert Collins
Untested code is broken code.
635
            self._key = None
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
636
            return self.unique_serialised_prefix(), [("", self)]
637
        # child has overflown - create a new intermediate node.
638
        # XXX: This is where we might want to try and expand our depth
639
        # to refer to more bytes of every child (which would give us
640
        # multiple pointers to child nodes, but less intermediate nodes)
641
        child = self._new_child(serialised_key, InternalNode)
642
        for split, node in node_details:
643
            child.add_node(split, node)
644
        self._len = self._len - old_len + len(child)
3735.2.29 by Robert Collins
Untested code is broken code.
645
        self._key = None
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
646
        return self.unique_serialised_prefix(), [("", self)]
647
648
    def _new_child(self, serialised_key, klass):
649
        """Create a new child node of type klass."""
650
        child = klass()
651
        child.set_maximum_size(self._maximum_size)
652
        child._key_width = self._key_width
653
        self._items[serialised_key] = child
654
        return child
655
656
    def serialise(self, store):
657
        """Serialise the node to store.
658
659
        :param store: A VersionedFiles honouring the CHK extensions.
660
        :return: An iterable of the keys inserted by this operation.
661
        """
662
        for node in self._items.itervalues():
663
            if type(node) == tuple:
664
                # Never deserialised.
665
                continue
666
            if node._key is not None:
667
                # Never altered
668
                continue
669
            for key in node.serialise(store):
670
                yield key
671
        lines = ["chknode:\n"]
672
        lines.append("%d\n" % self._maximum_size)
673
        lines.append("%d\n" % self._key_width)
674
        lines.append("%d\n" % self._len)
675
        for prefix, node in sorted(self._items.items()):
676
            if type(node) == tuple:
677
                key = node[0]
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
678
            else:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
679
                key = node._key[0]
680
            lines.append("%s\x00%s\n" % (prefix, key))
681
        sha1, _, _ = store.add_lines((None,), (), lines)
682
        self._key = ("sha1:" + sha1,)
683
        yield self._key
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
684
685
    def _serialised_key(self, key):
686
        """Return the serialised key for key in this node."""
687
        return ('\x00'.join(key) + '\x00'*self._node_width)[:self._node_width]
688
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
689
    def _split(self, offset):
690
        """Split this node into smaller nodes starting at offset.
691
692
        :param offset: The offset to start the new child nodes at.
693
        :return: An iterable of (prefix, node) tuples. prefix is a byte
694
            prefix for reaching node.
695
        """
696
        if offset >= self._node_width:
697
            for node in self._items.values():
698
                for result in node._split(offset):
699
                    yield result
700
            return
701
        for key, node in self._items.items():
702
            pass
703
3735.2.26 by Robert Collins
CHKInventory migrated to new CHKMap code.
704
    def refs(self):
705
        """Return the references to other CHK's held by this node."""
706
        if self._key is None:
707
            raise AssertionError("unserialised nodes have no refs.")
708
        refs = []
709
        for value in self._items.itervalues():
710
            if type(value) == tuple:
711
                refs.append(value)
712
            else:
713
                refs.append(value.key())
714
        return refs
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
715
716
    def unique_serialised_prefix(self):
717
        """Return the unique key prefix for this node.
718
719
        :return: A bytestring of the longest serialised key prefix that is
720
            unique within this node.
721
        """
722
        # may want to cache this eventually :- but wait for enough
723
        # functionality to profile.
724
        keys = list(self._items.keys())
725
        if not keys:
726
            return ""
727
        current_prefix = keys.pop(-1)
728
        while current_prefix and keys:
729
            next_key = keys.pop(-1)
730
            for pos, (left, right) in enumerate(zip(current_prefix, next_key)):
731
                if left != right:
732
                    pos -= 1
733
                    break
734
            current_prefix = current_prefix[:pos + 1]
735
        return current_prefix
736
737
    def unmap(self, store, key):
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
738
        """Remove key from this node and it's children."""
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
739
        if not len(self._items):
740
            raise AssertionError("cant unmap in an empty InternalNode.")
741
        serialised_key = self._serialised_key(key)
742
        children = self._iter_nodes(store, key_filter=[key])
743
        serialised_key = self._serialised_key(key)
744
        if children:
745
            child = children[0]
746
        else:
747
            raise KeyError(key)
748
        self._len -= 1
749
        unmapped = child.unmap(store, key)
750
        if len(unmapped) == 0:
751
            # All child nodes are gone, remove the child:
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
752
            del self._items[serialised_key]
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
753
        else:
754
            # Stash the returned node
755
            self._items[serialised_key] = unmapped
3735.2.23 by Robert Collins
Test unmapping with one child left but multiple keys.
756
        if len(self._items) == 1:
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
757
            # this node is no longer needed:
758
            return self._items.values()[0]
3735.2.29 by Robert Collins
Untested code is broken code.
759
        self._key = None
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
760
        return self
761
762
3735.2.16 by Robert Collins
Untested extensions to support repodetails
763
def _deserialise(bytes, key):
764
    """Helper for repositorydetails - convert bytes to a node."""
3735.2.24 by Robert Collins
test_chk_map tests all passing.
765
    if bytes.startswith("chkleaf:\n"):
3735.2.21 by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux.
766
        return LeafNode.deserialise(bytes, key)
767
    elif bytes.startswith("chknode:\n"):
768
        return InternalNode.deserialise(bytes, key)
3735.2.16 by Robert Collins
Untested extensions to support repodetails
769
    else:
770
        raise AssertionError("Unknown node type.")