bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
1 |
# Copyright (C) 2008 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
17 |
"""Persistent maps from tuple_of_strings->string using CHK stores.
|
18 |
||
19 |
Overview and current status:
|
|
20 |
||
21 |
The CHKMap class implements a dict from tuple_of_strings->string by using a trie
|
|
22 |
with internal nodes of 8-bit fan out; The key tuples are mapped to strings by
|
|
23 |
joining them by \x00, and \x00 padding shorter keys out to the length of the
|
|
24 |
longest key. Leaf nodes are packed as densely as possible, and internal nodes
|
|
25 |
are all and additional 8-bits wide leading to a sparse upper tree.
|
|
26 |
||
27 |
Updates to a CHKMap are done preferentially via the apply_delta method, to
|
|
28 |
allow optimisation of the update operation; but individual map/unmap calls are
|
|
29 |
possible and supported. All changes via map/unmap are buffered in memory until
|
|
30 |
the _save method is called to force serialisation of the tree. apply_delta
|
|
31 |
performs a _save implicitly.
|
|
32 |
||
33 |
TODO:
|
|
34 |
-----
|
|
35 |
||
36 |
Densely packed upper nodes.
|
|
37 |
||
38 |
"""
|
|
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
39 |
|
40 |
import osutils |
|
41 |
||
42 |
||
43 |
class CHKMap(object): |
|
44 |
"""A persistent map from string to string backed by a CHK store.""" |
|
45 |
||
46 |
def __init__(self, store, root_key): |
|
47 |
"""Create a CHKMap object. |
|
48 |
||
49 |
:param store: The store the CHKMap is stored in.
|
|
50 |
:param root_key: The root key of the map. None to create an empty
|
|
51 |
CHKMap.
|
|
52 |
"""
|
|
53 |
self._store = store |
|
54 |
if root_key is None: |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
55 |
self._root_node = LeafNode() |
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
56 |
else: |
57 |
self._root_node = root_key |
|
58 |
||
59 |
def apply_delta(self, delta): |
|
60 |
"""Apply a delta to the map. |
|
61 |
||
62 |
:param delta: An iterable of old_key, new_key, new_value tuples.
|
|
63 |
If new_key is not None, then new_key->new_value is inserted
|
|
64 |
into the map; if old_key is not None, then the old mapping
|
|
65 |
of old_key is removed.
|
|
66 |
"""
|
|
67 |
for old, new, value in delta: |
|
68 |
if old is not None and old != new: |
|
69 |
# unmap
|
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
70 |
self.unmap(old) |
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
71 |
for old, new, value in delta: |
72 |
if new is not None: |
|
73 |
# map
|
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
74 |
self.map(new, value) |
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
75 |
return self._save() |
76 |
||
77 |
def _ensure_root(self): |
|
78 |
"""Ensure that the root node is an object not a key.""" |
|
79 |
if type(self._root_node) == tuple: |
|
80 |
# Demand-load the root
|
|
81 |
bytes = self._read_bytes(self._root_node) |
|
82 |
root_key = self._root_node |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
83 |
self._root_node = _deserialise(bytes, root_key) |
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
84 |
|
85 |
def _read_bytes(self, key): |
|
86 |
stream = self._store.get_record_stream([key], 'unordered', True) |
|
87 |
return stream.next().get_bytes_as('fulltext') |
|
88 |
||
89 |
@classmethod
|
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
90 |
def from_dict(klass, store, initial_value, maximum_size=0): |
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
91 |
"""Create a CHKMap in store with initial_value as the content. |
92 |
|
|
93 |
:param store: The store to record initial_value in, a VersionedFiles
|
|
94 |
object with 1-tuple keys supporting CHK key generation.
|
|
95 |
:param initial_value: A dict to store in store. Its keys and values
|
|
96 |
must be bytestrings.
|
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
97 |
:param maximum_size: The maximum_size rule to apply to nodes. This
|
98 |
determines the size at which no new data is added to a single node.
|
|
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
99 |
:return: The root chk of te resulting CHKMap.
|
100 |
"""
|
|
101 |
result = CHKMap(store, None) |
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
102 |
result._root_node.set_maximum_size(maximum_size) |
103 |
delta = [] |
|
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
104 |
for key, value in initial_value.items(): |
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
105 |
delta.append((None, key, value)) |
106 |
result.apply_delta(delta) |
|
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
107 |
return result._save() |
108 |
||
|
3735.2.30
by Robert Collins
Start iter_changes between CHKMap instances. |
109 |
def iter_changes(self, basis): |
110 |
"""Iterate over the changes between basis and self. |
|
111 |
||
112 |
:return: An iterator of tuples: (key, old_value, new_value). Old_value
|
|
113 |
is None for keys only in self; new_value is None for keys only in
|
|
114 |
basis.
|
|
115 |
"""
|
|
116 |
# Cheap but let tests pass
|
|
117 |
new = dict(self.iteritems()) |
|
118 |
old = dict(basis.iteritems()) |
|
119 |
new_keys = set(new) |
|
120 |
old_keys = set(old) |
|
121 |
result = [] |
|
122 |
for key in new_keys - old_keys: |
|
123 |
result.append((key, None, new[key])) |
|
124 |
for key in old_keys - new_keys: |
|
125 |
result.append((key, old[key], None)) |
|
126 |
for key in old_keys.intersection(new_keys): |
|
127 |
if new[key] != old[key]: |
|
128 |
result.append((key, old[key], new[key])) |
|
129 |
return result |
|
130 |
||
|
3735.2.9
by Robert Collins
Get a working chk_map using inventory implementation bootstrapped. |
131 |
def iteritems(self, key_filter=None): |
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
132 |
"""Iterate over the entire CHKMap's contents.""" |
133 |
self._ensure_root() |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
134 |
return self._root_node.iteritems(self._store, key_filter=key_filter) |
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
135 |
|
|
3735.2.12
by Robert Collins
Implement commit-via-deltas for split inventory repositories. |
136 |
def key(self): |
137 |
"""Return the key for this map.""" |
|
138 |
if isinstance(self._root_node, tuple): |
|
139 |
return self._root_node |
|
140 |
else: |
|
141 |
return self._root_node._key |
|
142 |
||
|
3735.2.17
by Robert Collins
Cache node length to avoid full iteration on __len__ calls. |
143 |
def __len__(self): |
144 |
self._ensure_root() |
|
145 |
return len(self._root_node) |
|
146 |
||
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
147 |
def map(self, key, value): |
148 |
"""Map a key tuple to value.""" |
|
149 |
# Need a root object.
|
|
150 |
self._ensure_root() |
|
151 |
prefix, node_details = self._root_node.map(self._store, key, value) |
|
152 |
if len(node_details) == 1: |
|
153 |
self._root_node = node_details[0][1] |
|
154 |
else: |
|
155 |
self._root_node = InternalNode() |
|
156 |
self._root_node.set_maximum_size(node_details[0][1].maximum_size) |
|
157 |
self._root_node._key_width = node_details[0][1]._key_width |
|
158 |
for split, node in node_details: |
|
159 |
self._root_node.add_node(split, node) |
|
160 |
||
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
161 |
def _map(self, key, value): |
162 |
"""Map key to value.""" |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
163 |
# Ne
|
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
164 |
self._ensure_root() |
165 |
# Store the value
|
|
166 |
bytes = ValueNode(value).serialise() |
|
|
3735.2.15
by Robert Collins
More direct implementation of fetch between different serializers. |
167 |
# Experimental code to probe for keys rather than just adding; its not
|
168 |
# clear if it is an improvement.
|
|
169 |
#chk = ("sha1:%s" % osutils.sha_string(bytes),)
|
|
170 |
#if not self._store.get_parent_map([key]):
|
|
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
171 |
sha1, _, _ = self._store.add_lines((None,), (), osutils.split_lines(bytes)) |
|
3735.2.15
by Robert Collins
More direct implementation of fetch between different serializers. |
172 |
chk = ("sha1:" + sha1,) |
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
173 |
# And link into the root
|
|
3735.2.15
by Robert Collins
More direct implementation of fetch between different serializers. |
174 |
self._root_node.add_child(key, chk) |
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
175 |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
176 |
def unmap(self, key): |
177 |
"""remove key from the map.""" |
|
178 |
self._ensure_root() |
|
179 |
self._root_node.unmap(self._store, key) |
|
180 |
||
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
181 |
def _unmap(self, key): |
182 |
"""remove key from the map.""" |
|
183 |
self._ensure_root() |
|
184 |
self._root_node.remove_child(key) |
|
185 |
||
186 |
def _save(self): |
|
187 |
"""Save the map completely. |
|
188 |
||
189 |
:return: The key of the root node.
|
|
190 |
"""
|
|
191 |
if type(self._root_node) == tuple: |
|
192 |
# Already saved.
|
|
193 |
return self._root_node |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
194 |
keys = list(self._root_node.serialise(self._store)) |
195 |
return keys[-1] |
|
|
3735.2.8
by Robert Collins
New chk_map module for use in tree based inventory storage. |
196 |
|
197 |
||
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
198 |
class Node(object): |
199 |
"""Base class defining the protocol for CHK Map nodes.""" |
|
200 |
||
201 |
def __init__(self, key_width=1): |
|
202 |
"""Create a node. |
|
203 |
||
204 |
:param key_width: The width of keys for this node.
|
|
205 |
"""
|
|
206 |
self._key = None |
|
207 |
# Current number of elements
|
|
208 |
self._len = 0 |
|
209 |
self._maximum_size = 0 |
|
210 |
self._key_width = 1 |
|
211 |
# current size in bytes
|
|
212 |
self._size = 0 |
|
213 |
# The pointers/values this node has - meaning defined by child classes.
|
|
214 |
self._items = {} |
|
215 |
||
216 |
def __len__(self): |
|
217 |
return self._len |
|
218 |
||
219 |
@property
|
|
220 |
def maximum_size(self): |
|
221 |
"""What is the upper limit for adding references to a node.""" |
|
222 |
return self._maximum_size |
|
223 |
||
224 |
def set_maximum_size(self, new_size): |
|
225 |
"""Set the size threshold for nodes. |
|
226 |
||
227 |
:param new_size: The size at which no data is added to a node. 0 for
|
|
228 |
unlimited.
|
|
229 |
"""
|
|
230 |
self._maximum_size = new_size |
|
231 |
||
232 |
||
233 |
class LeafNode(Node): |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
234 |
"""A node containing actual key:value pairs. |
235 |
|
|
236 |
:ivar _items: A dict of key->value items. The key is in tuple form.
|
|
237 |
"""
|
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
238 |
|
239 |
def __init__(self): |
|
240 |
Node.__init__(self) |
|
241 |
# The size of a leaf node with default values and no children.
|
|
242 |
self._size = 12 |
|
243 |
||
244 |
def _current_size(self): |
|
245 |
"""Answer the current serialised size of this node.""" |
|
246 |
return (self._size + len(str(self._len)) + len(str(self._key_width)) + |
|
247 |
len(str(self._maximum_size))) |
|
248 |
||
249 |
@classmethod
|
|
250 |
def deserialise(klass, bytes, key): |
|
251 |
"""Deserialise bytes, with key key, into a LeafNode. |
|
252 |
||
253 |
:param bytes: The bytes of the node.
|
|
254 |
:param key: The key that the serialised node has.
|
|
255 |
"""
|
|
256 |
result = LeafNode() |
|
257 |
lines = bytes.splitlines() |
|
258 |
items = {} |
|
259 |
if lines[0] != 'chkleaf:': |
|
260 |
raise ValueError("not a serialised leaf node: %r" % bytes) |
|
261 |
maximum_size = int(lines[1]) |
|
262 |
width = int(lines[2]) |
|
263 |
length = int(lines[3]) |
|
264 |
for line in lines[4:]: |
|
|
3735.2.25
by Robert Collins
CHKInventory core tests passing. |
265 |
elements = line.split('\x00', width) |
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
266 |
items[tuple(elements[:-1])] = elements[-1] |
267 |
if len(items) != length: |
|
268 |
raise AssertionError("item count mismatch") |
|
269 |
result._items = items |
|
270 |
result._len = length |
|
271 |
result._maximum_size = maximum_size |
|
272 |
result._key = key |
|
273 |
result._key_width = width |
|
274 |
result._size = len(bytes) |
|
275 |
return result |
|
276 |
||
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
277 |
def iteritems(self, store, key_filter=None): |
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
278 |
if key_filter is not None: |
279 |
for item in self._items.iteritems(): |
|
280 |
if item[0] in key_filter: |
|
281 |
yield item |
|
282 |
else: |
|
283 |
for item in self._items.iteritems(): |
|
284 |
yield item |
|
285 |
||
286 |
def key(self): |
|
287 |
return self._key |
|
288 |
||
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
289 |
def map(self, store, key, value): |
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
290 |
"""Map key to value.""" |
291 |
if key in self._items: |
|
292 |
self._size -= 2 + len('\x00'.join(key)) + len(self._items[key]) |
|
293 |
self._len -= 1 |
|
294 |
self._items[key] = value |
|
295 |
self._size += 2 + len('\x00'.join(key)) + len(value) |
|
296 |
self._len += 1 |
|
297 |
self._key = None |
|
298 |
if (self._maximum_size and self._current_size() > self._maximum_size and |
|
299 |
self._len > 1): |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
300 |
common_prefix = self.unique_serialised_prefix() |
301 |
split_at = len(common_prefix) + 1 |
|
302 |
result = {} |
|
303 |
for key, value in self._items.iteritems(): |
|
304 |
serialised_key = self._serialised_key(key) |
|
305 |
prefix = serialised_key[:split_at] |
|
306 |
if prefix not in result: |
|
307 |
node = LeafNode() |
|
308 |
node.set_maximum_size(self._maximum_size) |
|
309 |
node._key_width = self._key_width |
|
310 |
result[prefix] = node |
|
311 |
else: |
|
312 |
node = result[prefix] |
|
313 |
node.map(store, key, value) |
|
314 |
return common_prefix, result.items() |
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
315 |
else: |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
316 |
return self.unique_serialised_prefix(), [("", self)] |
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
317 |
|
318 |
def serialise(self, store): |
|
319 |
"""Serialise the tree to store. |
|
320 |
||
321 |
:param store: A VersionedFiles honouring the CHK extensions.
|
|
322 |
:return: An iterable of the keys inserted by this operation.
|
|
323 |
"""
|
|
324 |
lines = ["chkleaf:\n"] |
|
325 |
lines.append("%d\n" % self._maximum_size) |
|
326 |
lines.append("%d\n" % self._key_width) |
|
327 |
lines.append("%d\n" % self._len) |
|
328 |
for key, value in sorted(self._items.items()): |
|
329 |
lines.append("%s\x00%s\n" % ('\x00'.join(key), value)) |
|
330 |
sha1, _, _ = store.add_lines((None,), (), lines) |
|
331 |
self._key = ("sha1:" + sha1,) |
|
332 |
return [self._key] |
|
333 |
||
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
334 |
def _serialised_key(self, key): |
335 |
"""Return the serialised key for key in this node.""" |
|
336 |
return '\x00'.join(key) |
|
337 |
||
|
3735.2.26
by Robert Collins
CHKInventory migrated to new CHKMap code. |
338 |
def refs(self): |
339 |
"""Return the references to other CHK's held by this node.""" |
|
340 |
return [] |
|
341 |
||
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
342 |
def unique_serialised_prefix(self): |
343 |
"""Return the unique key prefix for this node. |
|
344 |
||
345 |
:return: A bytestring of the longest serialised key prefix that is
|
|
346 |
unique within this node.
|
|
347 |
"""
|
|
348 |
# may want to cache this eventually :- but wait for enough
|
|
349 |
# functionality to profile.
|
|
350 |
keys = list(self._items.keys()) |
|
351 |
if not keys: |
|
352 |
return "" |
|
353 |
current_prefix = self._serialised_key(keys.pop(-1)) |
|
354 |
while current_prefix and keys: |
|
355 |
next_key = self._serialised_key(keys.pop(-1)) |
|
356 |
for pos, (left, right) in enumerate(zip(current_prefix, next_key)): |
|
357 |
if left != right: |
|
358 |
pos -= 1 |
|
359 |
break
|
|
360 |
current_prefix = current_prefix[:pos + 1] |
|
361 |
return current_prefix |
|
362 |
||
363 |
def unmap(self, store, key): |
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
364 |
"""Unmap key from the node.""" |
365 |
self._size -= 2 + len('\x00'.join(key)) + len(self._items[key]) |
|
366 |
self._len -= 1 |
|
367 |
del self._items[key] |
|
368 |
self._key = None |
|
369 |
return self |
|
370 |
||
371 |
||
372 |
class InternalNode(Node): |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
373 |
"""A node that contains references to other nodes. |
374 |
|
|
375 |
An InternalNode is responsible for mapping serialised key prefixes to child
|
|
376 |
nodes. It is greedy - it will defer splitting itself as long as possible.
|
|
377 |
"""
|
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
378 |
|
379 |
def __init__(self): |
|
380 |
Node.__init__(self) |
|
381 |
# The size of an internalnode with default values and no children.
|
|
382 |
# self._size = 12
|
|
383 |
# How many octets key prefixes within this node are.
|
|
384 |
self._node_width = 0 |
|
385 |
||
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
386 |
def add_node(self, prefix, node): |
387 |
"""Add a child node with prefix prefix, and node node. |
|
388 |
||
389 |
:param prefix: The serialised key prefix for node.
|
|
390 |
:param node: The node being added.
|
|
391 |
"""
|
|
392 |
self._len += len(node) |
|
393 |
if not len(self._items): |
|
394 |
self._node_width = len(prefix) |
|
395 |
self._items[prefix] = node |
|
396 |
self._key = None |
|
397 |
||
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
398 |
def _current_size(self): |
399 |
"""Answer the current serialised size of this node.""" |
|
400 |
return (self._size + len(str(self._len)) + len(str(self._key_width)) + |
|
401 |
len(str(self._maximum_size))) |
|
402 |
||
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
403 |
@classmethod
|
404 |
def deserialise(klass, bytes, key): |
|
|
3735.2.25
by Robert Collins
CHKInventory core tests passing. |
405 |
"""Deserialise bytes to an InternalNode, with key key. |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
406 |
|
407 |
:param bytes: The bytes of the node.
|
|
408 |
:param key: The key that the serialised node has.
|
|
409 |
:return: An InternalNode instance.
|
|
410 |
"""
|
|
411 |
result = InternalNode() |
|
412 |
lines = bytes.splitlines() |
|
413 |
items = {} |
|
414 |
if lines[0] != 'chknode:': |
|
415 |
raise ValueError("not a serialised internal node: %r" % bytes) |
|
416 |
maximum_size = int(lines[1]) |
|
417 |
width = int(lines[2]) |
|
418 |
length = int(lines[3]) |
|
419 |
for line in lines[4:]: |
|
420 |
prefix, flat_key = line.rsplit('\x00', 1) |
|
421 |
items[prefix] = (flat_key,) |
|
422 |
result._items = items |
|
423 |
result._len = length |
|
424 |
result._maximum_size = maximum_size |
|
425 |
result._key = key |
|
426 |
result._key_width = width |
|
427 |
result._size = len(bytes) |
|
428 |
result._node_width = len(prefix) |
|
429 |
return result |
|
430 |
||
431 |
def iteritems(self, store, key_filter=None): |
|
432 |
for node in self._iter_nodes(store, key_filter=key_filter): |
|
433 |
for item in node.iteritems(store, key_filter=key_filter): |
|
434 |
yield item |
|
435 |
||
436 |
def _iter_nodes(self, store, key_filter=None): |
|
437 |
"""Iterate over node objects which match key_filter. |
|
438 |
||
439 |
:param store: A store to use for accessing content.
|
|
440 |
:param key_filter: A key filter to filter nodes. Only nodes that might
|
|
441 |
contain a key in key_filter will be returned.
|
|
442 |
:return: An iterable of nodes.
|
|
443 |
"""
|
|
444 |
nodes = [] |
|
445 |
keys = set() |
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
446 |
if key_filter is None: |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
447 |
for node in self._items.itervalues(): |
448 |
if type(node) == tuple: |
|
449 |
keys.add(node) |
|
450 |
else: |
|
451 |
nodes.append(node) |
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
452 |
else: |
453 |
serialised_filter = set([self._serialised_key(key) for key in |
|
454 |
key_filter]) |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
455 |
for prefix, node in self._items.iteritems(): |
456 |
if prefix in serialised_filter: |
|
457 |
if type(node) == tuple: |
|
458 |
keys.add(node) |
|
459 |
else: |
|
460 |
nodes.append(node) |
|
461 |
if keys: |
|
462 |
# demand load some pages.
|
|
463 |
stream = store.get_record_stream(keys, 'unordered', True) |
|
464 |
for record in stream: |
|
465 |
node = _deserialise(record.get_bytes_as('fulltext'), record.key) |
|
466 |
nodes.append(node) |
|
467 |
return nodes |
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
468 |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
469 |
def map(self, store, key, value): |
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
470 |
"""Map key to value.""" |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
471 |
if not len(self._items): |
472 |
raise AssertionError("cant map in an empty InternalNode.") |
|
473 |
children = self._iter_nodes(store, key_filter=[key]) |
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
474 |
serialised_key = self._serialised_key(key) |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
475 |
if children: |
476 |
child = children[0] |
|
477 |
else: |
|
478 |
# new child needed:
|
|
479 |
child = self._new_child(serialised_key, LeafNode) |
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
480 |
old_len = len(child) |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
481 |
prefix, node_details = child.map(store, key, value) |
482 |
if len(node_details) == 1: |
|
483 |
# child may have shrunk, or might be the same.
|
|
484 |
self._len = self._len - old_len + len(child) |
|
485 |
self._items[serialised_key] = child |
|
|
3735.2.29
by Robert Collins
Untested code is broken code. |
486 |
self._key = None |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
487 |
return self.unique_serialised_prefix(), [("", self)] |
488 |
# child has overflown - create a new intermediate node.
|
|
489 |
# XXX: This is where we might want to try and expand our depth
|
|
490 |
# to refer to more bytes of every child (which would give us
|
|
491 |
# multiple pointers to child nodes, but less intermediate nodes)
|
|
492 |
child = self._new_child(serialised_key, InternalNode) |
|
493 |
for split, node in node_details: |
|
494 |
child.add_node(split, node) |
|
495 |
self._len = self._len - old_len + len(child) |
|
|
3735.2.29
by Robert Collins
Untested code is broken code. |
496 |
self._key = None |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
497 |
return self.unique_serialised_prefix(), [("", self)] |
498 |
||
499 |
def _new_child(self, serialised_key, klass): |
|
500 |
"""Create a new child node of type klass.""" |
|
501 |
child = klass() |
|
502 |
child.set_maximum_size(self._maximum_size) |
|
503 |
child._key_width = self._key_width |
|
504 |
self._items[serialised_key] = child |
|
505 |
return child |
|
506 |
||
507 |
def serialise(self, store): |
|
508 |
"""Serialise the node to store. |
|
509 |
||
510 |
:param store: A VersionedFiles honouring the CHK extensions.
|
|
511 |
:return: An iterable of the keys inserted by this operation.
|
|
512 |
"""
|
|
513 |
for node in self._items.itervalues(): |
|
514 |
if type(node) == tuple: |
|
515 |
# Never deserialised.
|
|
516 |
continue
|
|
517 |
if node._key is not None: |
|
518 |
# Never altered
|
|
519 |
continue
|
|
520 |
for key in node.serialise(store): |
|
521 |
yield key |
|
522 |
lines = ["chknode:\n"] |
|
523 |
lines.append("%d\n" % self._maximum_size) |
|
524 |
lines.append("%d\n" % self._key_width) |
|
525 |
lines.append("%d\n" % self._len) |
|
526 |
for prefix, node in sorted(self._items.items()): |
|
527 |
if type(node) == tuple: |
|
528 |
key = node[0] |
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
529 |
else: |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
530 |
key = node._key[0] |
531 |
lines.append("%s\x00%s\n" % (prefix, key)) |
|
532 |
sha1, _, _ = store.add_lines((None,), (), lines) |
|
533 |
self._key = ("sha1:" + sha1,) |
|
534 |
yield self._key |
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
535 |
|
536 |
def _serialised_key(self, key): |
|
537 |
"""Return the serialised key for key in this node.""" |
|
538 |
return ('\x00'.join(key) + '\x00'*self._node_width)[:self._node_width] |
|
539 |
||
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
540 |
def _split(self, offset): |
541 |
"""Split this node into smaller nodes starting at offset. |
|
542 |
||
543 |
:param offset: The offset to start the new child nodes at.
|
|
544 |
:return: An iterable of (prefix, node) tuples. prefix is a byte
|
|
545 |
prefix for reaching node.
|
|
546 |
"""
|
|
547 |
if offset >= self._node_width: |
|
548 |
for node in self._items.values(): |
|
549 |
for result in node._split(offset): |
|
550 |
yield result |
|
551 |
return
|
|
552 |
for key, node in self._items.items(): |
|
553 |
pass
|
|
554 |
||
|
3735.2.26
by Robert Collins
CHKInventory migrated to new CHKMap code. |
555 |
def refs(self): |
556 |
"""Return the references to other CHK's held by this node.""" |
|
557 |
if self._key is None: |
|
558 |
raise AssertionError("unserialised nodes have no refs.") |
|
559 |
refs = [] |
|
560 |
for value in self._items.itervalues(): |
|
561 |
if type(value) == tuple: |
|
562 |
refs.append(value) |
|
563 |
else: |
|
564 |
refs.append(value.key()) |
|
565 |
return refs |
|
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
566 |
|
567 |
def unique_serialised_prefix(self): |
|
568 |
"""Return the unique key prefix for this node. |
|
569 |
||
570 |
:return: A bytestring of the longest serialised key prefix that is
|
|
571 |
unique within this node.
|
|
572 |
"""
|
|
573 |
# may want to cache this eventually :- but wait for enough
|
|
574 |
# functionality to profile.
|
|
575 |
keys = list(self._items.keys()) |
|
576 |
if not keys: |
|
577 |
return "" |
|
578 |
current_prefix = keys.pop(-1) |
|
579 |
while current_prefix and keys: |
|
580 |
next_key = keys.pop(-1) |
|
581 |
for pos, (left, right) in enumerate(zip(current_prefix, next_key)): |
|
582 |
if left != right: |
|
583 |
pos -= 1 |
|
584 |
break
|
|
585 |
current_prefix = current_prefix[:pos + 1] |
|
586 |
return current_prefix |
|
587 |
||
588 |
def unmap(self, store, key): |
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
589 |
"""Remove key from this node and it's children.""" |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
590 |
if not len(self._items): |
591 |
raise AssertionError("cant unmap in an empty InternalNode.") |
|
592 |
serialised_key = self._serialised_key(key) |
|
593 |
children = self._iter_nodes(store, key_filter=[key]) |
|
594 |
serialised_key = self._serialised_key(key) |
|
595 |
if children: |
|
596 |
child = children[0] |
|
597 |
else: |
|
598 |
raise KeyError(key) |
|
599 |
self._len -= 1 |
|
600 |
unmapped = child.unmap(store, key) |
|
601 |
if len(unmapped) == 0: |
|
602 |
# All child nodes are gone, remove the child:
|
|
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
603 |
del self._items[serialised_key] |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
604 |
else: |
605 |
# Stash the returned node
|
|
606 |
self._items[serialised_key] = unmapped |
|
|
3735.2.23
by Robert Collins
Test unmapping with one child left but multiple keys. |
607 |
if len(self._items) == 1: |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
608 |
# this node is no longer needed:
|
609 |
return self._items.values()[0] |
|
|
3735.2.29
by Robert Collins
Untested code is broken code. |
610 |
self._key = None |
|
3735.2.18
by Robert Collins
Partial multi-layer chk dictionary trees. |
611 |
return self |
612 |
||
613 |
||
|
3735.2.16
by Robert Collins
Untested extensions to support repodetails |
614 |
def _deserialise(bytes, key): |
615 |
"""Helper for repositorydetails - convert bytes to a node.""" |
|
|
3735.2.24
by Robert Collins
test_chk_map tests all passing. |
616 |
if bytes.startswith("chkleaf:\n"): |
|
3735.2.21
by Robert Collins
BROKEN: multi level CHKMap tries, unfinished, subsystem in flux. |
617 |
return LeafNode.deserialise(bytes, key) |
618 |
elif bytes.startswith("chknode:\n"): |
|
619 |
return InternalNode.deserialise(bytes, key) |
|
|
3735.2.16
by Robert Collins
Untested extensions to support repodetails |
620 |
else: |
621 |
raise AssertionError("Unknown node type.") |