40
from __future__ import absolute_import
43
from bzrlib import lazy_import
44
lazy_import.lazy_import(globals(), """
53
from ..sixish import (
57
from ..sixish import PY3
58
from ..static_tuple import StaticTuple
57
from bzrlib.static_tuple import StaticTuple
61
60
# If each line is 50 bytes, and you have 255 internal pages, with 255-way fan
62
61
# out, it takes 3.1MB to cache the layer.
63
_PAGE_CACHE_SIZE = 4 * 1024 * 1024
62
_PAGE_CACHE_SIZE = 4*1024*1024
64
63
# Per thread caches for 2 reasons:
65
64
# - in the server we may be serving very different content, so we get less
92
90
_INTERESTING_NEW_SIZE = 50
93
91
# If a ChildNode shrinks by more than this amount, we check for a remap
94
92
_INTERESTING_SHRINKAGE_LIMIT = 20
93
# If we delete more than this many nodes applying a delta, we check for a remap
94
_INTERESTING_DELETES_LIMIT = 5
97
97
def _search_key_plain(key):
98
98
"""Map the key tuple into a search string that just uses the key bytes."""
99
return b'\x00'.join(key)
99
return '\x00'.join(key)
102
102
search_key_registry = registry.Registry()
103
search_key_registry.register(b'plain', _search_key_plain)
103
search_key_registry.register('plain', _search_key_plain)
106
106
class CHKMap(object):
135
135
into the map; if old_key is not None, then the old mapping
136
136
of old_key is removed.
139
139
# Check preconditions first.
140
140
as_st = StaticTuple.from_sequence
141
new_items = {as_st(key) for (old, key, value) in delta
142
if key is not None and old is None}
141
new_items = set([as_st(key) for (old, key, value) in delta
142
if key is not None and old is None])
143
143
existing_new = list(self.iteritems(key_filter=new_items))
145
145
raise errors.InconsistentDeltaDelta(delta,
146
"New items are already in the map %r." % existing_new)
146
"New items are already in the map %r." % existing_new)
147
147
# Now apply changes.
148
148
for old, new, value in delta:
149
149
if old is not None and old != new:
150
150
self.unmap(old, check_remap=False)
152
152
for old, new, value in delta:
153
153
if new is not None:
154
154
self.map(new, value)
155
if delete_count > _INTERESTING_DELETES_LIMIT:
156
trace.mutter("checking remap as %d deletions", delete_count)
156
157
self._check_remap()
157
158
return self._save()
159
160
def _ensure_root(self):
160
161
"""Ensure that the root node is an object not a key."""
161
if isinstance(self._root_node, StaticTuple):
162
if type(self._root_node) is StaticTuple:
162
163
# Demand-load the root
163
164
self._root_node = self._get_node(self._root_node)
172
173
:param node: A tuple key or node object.
173
174
:return: A node object.
175
if isinstance(node, StaticTuple):
176
if type(node) is StaticTuple:
176
177
bytes = self._read_bytes(node)
177
178
return _deserialise(bytes, node,
178
search_key_func=self._search_key_func)
179
search_key_func=self._search_key_func)
184
185
return _get_cache()[key]
186
187
stream = self._store.get_record_stream([key], 'unordered', True)
187
bytes = next(stream).get_bytes_as('fulltext')
188
bytes = stream.next().get_bytes_as('fulltext')
188
189
_get_cache()[key] = bytes
191
def _dump_tree(self, include_keys=False, encoding='utf-8'):
192
def _dump_tree(self, include_keys=False):
192
193
"""Return the tree in a string representation."""
193
194
self._ensure_root()
195
def decode(x): return x.decode(encoding)
197
def decode(x): return x
198
res = self._dump_tree_node(self._root_node, prefix=b'', indent='',
199
decode=decode, include_keys=include_keys)
200
res.append('') # Give a trailing '\n'
195
res = self._dump_tree_node(self._root_node, prefix='', indent='',
196
include_keys=include_keys)
197
res.append('') # Give a trailing '\n'
201
198
return '\n'.join(res)
203
def _dump_tree_node(self, node, prefix, indent, decode, include_keys=True):
200
def _dump_tree_node(self, node, prefix, indent, include_keys=True):
204
201
"""For this node and all children, generate a string representation."""
206
203
if not include_keys:
209
206
node_key = node.key()
210
207
if node_key is not None:
211
key_str = ' %s' % (decode(node_key[0]),)
208
key_str = ' %s' % (node_key[0],)
213
210
key_str = ' None'
214
result.append('%s%r %s%s' % (indent, decode(prefix), node.__class__.__name__,
211
result.append('%s%r %s%s' % (indent, prefix, node.__class__.__name__,
216
if isinstance(node, InternalNode):
213
if type(node) is InternalNode:
217
214
# Trigger all child nodes to get loaded
218
215
list(node._iter_nodes(self._store))
219
for prefix, sub in sorted(viewitems(node._items)):
216
for prefix, sub in sorted(node._items.iteritems()):
220
217
result.extend(self._dump_tree_node(sub, prefix, indent + ' ',
221
decode=decode, include_keys=include_keys))
218
include_keys=include_keys))
223
for key, value in sorted(viewitems(node._items)):
220
for key, value in sorted(node._items.iteritems()):
224
221
# Don't use prefix nor indent here to line up when used in
225
222
# tests in conjunction with assertEqualDiff
226
result.append(' %r %r' % (
227
tuple([decode(ke) for ke in key]), decode(value)))
223
result.append(' %r %r' % (tuple(key), value))
231
227
def from_dict(klass, store, initial_value, maximum_size=0, key_width=1,
232
search_key_func=None):
228
search_key_func=None):
233
229
"""Create a CHKMap in store with initial_value as the content.
235
231
:param store: The store to record initial_value in, a VersionedFiles
246
242
:return: The root chk of the resulting CHKMap.
248
244
root_key = klass._create_directly(store, initial_value,
249
maximum_size=maximum_size, key_width=key_width,
250
search_key_func=search_key_func)
251
if not isinstance(root_key, StaticTuple):
245
maximum_size=maximum_size, key_width=key_width,
246
search_key_func=search_key_func)
247
if type(root_key) is not StaticTuple:
252
248
raise AssertionError('we got a %s instead of a StaticTuple'
253
249
% (type(root_key),))
260
256
result._root_node.set_maximum_size(maximum_size)
261
257
result._root_node._key_width = key_width
263
for key, value in viewitems(initial_value):
259
for key, value in initial_value.items():
264
260
delta.append((None, key, value))
265
261
root_key = result.apply_delta(delta)
272
268
node.set_maximum_size(maximum_size)
273
269
node._key_width = key_width
274
270
as_st = StaticTuple.from_sequence
275
node._items = dict((as_st(key), val)
276
for key, val in viewitems(initial_value))
277
node._raw_size = sum(node._key_value_len(key, value)
278
for key, value in viewitems(node._items))
271
node._items = dict([(as_st(key), val) for key, val
272
in initial_value.iteritems()])
273
node._raw_size = sum([node._key_value_len(key, value)
274
for key,value in node._items.iteritems()])
279
275
node._len = len(node._items)
280
276
node._compute_search_prefix()
281
277
node._compute_serialised_prefix()
282
if (node._len > 1 and
284
node._current_size() > maximum_size):
280
and node._current_size() > maximum_size):
285
281
prefix, node_details = node._split(store)
286
282
if len(node_details) == 1:
287
283
raise AssertionError('Failed to split using node._split')
333
329
# key_path (a list of tuples, tail-sharing down the tree.)
334
330
self_pending = []
335
331
basis_pending = []
337
332
def process_node(node, path, a_map, pending):
338
333
# take a node and expand it
339
334
node = a_map._get_node(node)
340
if isinstance(node, LeafNode):
335
if type(node) == LeafNode:
341
336
path = (node._key, path)
342
for key, value in viewitems(node._items):
337
for key, value in node._items.items():
343
338
# For a LeafNode, the key is a serialized_key, rather than
344
339
# a search_key, but the heap is using search_keys
345
340
search_key = node._search_key_func(key)
348
343
# type(node) == InternalNode
349
344
path = (node._key, path)
350
for prefix, child in viewitems(node._items):
345
for prefix, child in node._items.items():
351
346
heapq.heappush(pending, (prefix, None, child, path))
353
347
def process_common_internal_nodes(self_node, basis_node):
354
self_items = set(viewitems(self_node._items))
355
basis_items = set(viewitems(basis_node._items))
348
self_items = set(self_node._items.items())
349
basis_items = set(basis_node._items.items())
356
350
path = (self_node._key, None)
357
351
for prefix, child in self_items - basis_items:
358
352
heapq.heappush(self_pending, (prefix, None, child, path))
359
353
path = (basis_node._key, None)
360
354
for prefix, child in basis_items - self_items:
361
355
heapq.heappush(basis_pending, (prefix, None, child, path))
363
356
def process_common_leaf_nodes(self_node, basis_node):
364
self_items = set(viewitems(self_node._items))
365
basis_items = set(viewitems(basis_node._items))
357
self_items = set(self_node._items.items())
358
basis_items = set(basis_node._items.items())
366
359
path = (self_node._key, None)
367
360
for key, value in self_items - basis_items:
368
361
prefix = self._search_key_func(key)
371
364
for key, value in basis_items - self_items:
372
365
prefix = basis._search_key_func(key)
373
366
heapq.heappush(basis_pending, (prefix, key, value, path))
375
367
def process_common_prefix_nodes(self_node, self_path,
376
368
basis_node, basis_path):
377
369
# Would it be more efficient if we could request both at the same
379
371
self_node = self._get_node(self_node)
380
372
basis_node = basis._get_node(basis_node)
381
if (isinstance(self_node, InternalNode) and
382
isinstance(basis_node, InternalNode)):
373
if (type(self_node) == InternalNode
374
and type(basis_node) == InternalNode):
383
375
# Matching internal nodes
384
376
process_common_internal_nodes(self_node, basis_node)
385
elif (isinstance(self_node, LeafNode) and
386
isinstance(basis_node, LeafNode)):
377
elif (type(self_node) == LeafNode
378
and type(basis_node) == LeafNode):
387
379
process_common_leaf_nodes(self_node, basis_node)
389
381
process_node(self_node, self_path, self, self_pending)
392
384
self_seen = set()
393
385
basis_seen = set()
394
386
excluded_keys = set()
396
387
def check_excluded(key_path):
397
388
# Note that this is N^2, it depends on us trimming trees
398
389
# aggressively to not become slow.
399
390
# A better implementation would probably have a reverse map
400
391
# back to the children of a node, and jump straight to it when
401
392
# a common node is detected, the proceed to remove the already
402
# pending children. breezy.graph has a searcher module with a
393
# pending children. bzrlib.graph has a searcher module with a
403
394
# similar problem.
404
395
while key_path is not None:
405
396
key, key_path = key_path
482
473
basis_details = heapq.heappop(basis_pending)
483
474
if self_details[2] != basis_details[2]:
484
475
yield (self_details[1],
485
basis_details[2], self_details[2])
476
basis_details[2], self_details[2])
487
478
# At least one side wasn't a simple value
488
if (self._node_key(self_pending[0][2])
489
== self._node_key(basis_pending[0][2])):
479
if (self._node_key(self_pending[0][2]) ==
480
self._node_key(basis_pending[0][2])):
490
481
# Identical pointers, skip (and don't bother adding to
491
482
# excluded, it won't turn up again.
492
483
heapq.heappop(self_pending)
552
543
self._root_node = node_details[0][1]
554
545
self._root_node = InternalNode(prefix,
555
search_key_func=self._search_key_func)
546
search_key_func=self._search_key_func)
556
547
self._root_node.set_maximum_size(node_details[0][1].maximum_size)
557
548
self._root_node._key_width = node_details[0][1]._key_width
558
549
for split, node in node_details:
561
552
def _node_key(self, node):
562
553
"""Get the key for a node whether it's a tuple or node."""
563
if isinstance(node, tuple):
554
if type(node) is tuple:
564
555
node = StaticTuple.from_sequence(node)
565
if isinstance(node, StaticTuple):
556
if type(node) is StaticTuple:
571
562
"""remove key from the map."""
572
563
key = StaticTuple.from_sequence(key)
573
564
self._ensure_root()
574
if isinstance(self._root_node, InternalNode):
565
if type(self._root_node) is InternalNode:
575
566
unmapped = self._root_node.unmap(self._store, key,
576
check_remap=check_remap)
567
check_remap=check_remap)
578
569
unmapped = self._root_node.unmap(self._store, key)
579
570
self._root_node = unmapped
581
572
def _check_remap(self):
582
573
"""Check if nodes can be collapsed."""
583
574
self._ensure_root()
584
if isinstance(self._root_node, InternalNode):
585
self._root_node = self._root_node._check_remap(self._store)
575
if type(self._root_node) is InternalNode:
576
self._root_node._check_remap(self._store)
588
579
"""Save the map completely.
590
581
:return: The key of the root node.
592
if isinstance(self._root_node, StaticTuple):
583
if type(self._root_node) is StaticTuple:
594
585
return self._root_node
595
586
keys = list(self._root_node.serialise(self._store))
606
597
__slots__ = ('_key', '_len', '_maximum_size', '_key_width',
607
598
'_raw_size', '_items', '_search_prefix', '_search_key_func'
610
601
def __init__(self, key_width=1):
611
602
"""Create a node.
721
711
'%s(key:%s len:%s size:%s max:%s prefix:%s keywidth:%s items:%s)' \
722
712
% (self.__class__.__name__, self._key, self._len, self._raw_size,
723
self._maximum_size, self._search_prefix, self._key_width, items_str)
713
self._maximum_size, self._search_prefix, self._key_width, items_str)
725
715
def _current_size(self):
726
716
"""Answer the current serialised size of this node.
738
728
prefix_len = len(self._common_serialised_prefix)
739
729
bytes_for_items = (self._raw_size - (prefix_len * self._len))
740
return (9 + # 'chkleaf:\n' +
741
len(str(self._maximum_size)) + 1 +
742
len(str(self._key_width)) + 1 +
743
len(str(self._len)) + 1 +
730
return (9 # 'chkleaf:\n'
731
+ len(str(self._maximum_size)) + 1
732
+ len(str(self._key_width)) + 1
733
+ len(str(self._len)) + 1
748
738
def deserialise(klass, bytes, key, search_key_func=None):
779
769
# Short items, we need to match based on a prefix
780
filters.setdefault(len(key), set()).add(key)
770
length_filter = filters.setdefault(len(key), set())
771
length_filter.add(key)
782
filters_itemview = viewitems(filters)
783
for item in viewitems(self._items):
784
for length, length_filter in filters_itemview:
773
filters = filters.items()
774
for item in self._items.iteritems():
775
for length, length_filter in filters:
785
776
if item[0][:length] in length_filter:
789
for item in viewitems(self._items):
780
for item in self._items.iteritems():
792
783
def _key_value_len(self, key, value):
793
784
# TODO: Should probably be done without actually joining the key, but
794
785
# then that can be done via the C extension
795
return (len(self._serialise_key(key)) + 1 +
796
len(b'%d' % value.count(b'\n')) + 1 +
786
return (len(self._serialise_key(key)) + 1
787
+ len(str(value.count('\n'))) + 1
799
790
def _search_key(self, key):
800
791
return self._search_key_func(key)
825
816
self._search_prefix = self.common_prefix(
826
817
self._search_prefix, search_key)
827
if (self._len > 1 and
828
self._maximum_size and
829
self._current_size() > self._maximum_size):
819
and self._maximum_size
820
and self._current_size() > self._maximum_size):
830
821
# Check to see if all of the search_keys for this node are
831
822
# identical. We allow the node to grow under that circumstance
832
823
# (we could track this as common state, but it is infrequent)
833
if (search_key != self._search_prefix or
834
not self._are_search_keys_identical()):
824
if (search_key != self._search_prefix
825
or not self._are_search_keys_identical()):
848
839
common_prefix = self._search_prefix
849
840
split_at = len(common_prefix) + 1
851
for key, value in viewitems(self._items):
842
for key, value in self._items.iteritems():
852
843
search_key = self._search_key(key)
853
844
prefix = search_key[:split_at]
854
845
# TODO: Generally only 1 key can be exactly the right length,
860
851
# may get a '\00' node anywhere, but won't have keys of
861
852
# different lengths.
862
853
if len(prefix) < split_at:
863
prefix += b'\x00' * (split_at - len(prefix))
854
prefix += '\x00'*(split_at - len(prefix))
864
855
if prefix not in result:
865
856
node = LeafNode(search_key_func=self._search_key_func)
866
857
node.set_maximum_size(self._maximum_size)
876
867
result.pop(prefix)
877
868
new_node = InternalNode(sub_prefix,
878
search_key_func=self._search_key_func)
869
search_key_func=self._search_key_func)
879
870
new_node.set_maximum_size(self._maximum_size)
880
871
new_node._key_width = self._key_width
881
872
for split, node in node_details:
882
873
new_node.add_node(split, node)
883
874
result[prefix] = new_node
884
return common_prefix, list(viewitems(result))
875
return common_prefix, result.items()
886
877
def map(self, store, key, value):
887
878
"""Map key to value."""
895
886
if self._search_prefix is _unknown:
896
887
raise AssertionError('%r must be known' % self._search_prefix)
897
return self._search_prefix, [(b"", self)]
888
return self._search_prefix, [("", self)]
899
_serialise_key = b'\x00'.join
890
_serialise_key = '\x00'.join
901
892
def serialise(self, store):
902
893
"""Serialise the LeafNode to store.
904
895
:param store: A VersionedFiles honouring the CHK extensions.
905
896
:return: An iterable of the keys inserted by this operation.
907
lines = [b"chkleaf:\n"]
908
lines.append(b"%d\n" % self._maximum_size)
909
lines.append(b"%d\n" % self._key_width)
910
lines.append(b"%d\n" % self._len)
898
lines = ["chkleaf:\n"]
899
lines.append("%d\n" % self._maximum_size)
900
lines.append("%d\n" % self._key_width)
901
lines.append("%d\n" % self._len)
911
902
if self._common_serialised_prefix is None:
913
904
if len(self._items) != 0:
914
905
raise AssertionError('If _common_serialised_prefix is None'
915
' we should have no items')
906
' we should have no items')
917
lines.append(b'%s\n' % (self._common_serialised_prefix,))
908
lines.append('%s\n' % (self._common_serialised_prefix,))
918
909
prefix_len = len(self._common_serialised_prefix)
919
for key, value in sorted(viewitems(self._items)):
910
for key, value in sorted(self._items.items()):
920
911
# Always add a final newline
921
value_lines = osutils.chunks_to_lines([value + b'\n'])
922
serialized = b"%s\x00%d\n" % (self._serialise_key(key),
912
value_lines = osutils.chunks_to_lines([value + '\n'])
913
serialized = "%s\x00%s\n" % (self._serialise_key(key),
924
915
if not serialized.startswith(self._common_serialised_prefix):
925
916
raise AssertionError('We thought the common prefix was %r'
926
' but entry %r does not have it in common'
927
% (self._common_serialised_prefix, serialized))
917
' but entry %r does not have it in common'
918
% (self._common_serialised_prefix, serialized))
928
919
lines.append(serialized[prefix_len:])
929
920
lines.extend(value_lines)
930
921
sha1, _, _ = store.add_lines((None,), (), lines)
931
self._key = StaticTuple(b"sha1:" + sha1,).intern()
932
data = b''.join(lines)
933
if len(data) != self._current_size():
922
self._key = StaticTuple("sha1:" + sha1,).intern()
923
bytes = ''.join(lines)
924
if len(bytes) != self._current_size():
934
925
raise AssertionError('Invalid _current_size')
935
_get_cache()[self._key] = data
926
_get_cache().add(self._key, bytes)
936
927
return [self._key]
1005
996
__slots__ = ('_node_width',)
1007
def __init__(self, prefix=b'', search_key_func=None):
998
def __init__(self, prefix='', search_key_func=None):
1008
999
Node.__init__(self)
1009
1000
# The size of an internalnode with default values and no children.
1010
1001
# How many octets key prefixes within this node are.
1025
1016
raise AssertionError("_search_prefix should not be None")
1026
1017
if not prefix.startswith(self._search_prefix):
1027
1018
raise AssertionError("prefixes mismatch: %s must start with %s"
1028
% (prefix, self._search_prefix))
1019
% (prefix,self._search_prefix))
1029
1020
if len(prefix) != len(self._search_prefix) + 1:
1030
1021
raise AssertionError("prefix wrong length: len(%s) is not %d" %
1031
(prefix, len(self._search_prefix) + 1))
1022
(prefix, len(self._search_prefix) + 1))
1032
1023
self._len += len(node)
1033
1024
if not len(self._items):
1034
1025
self._node_width = len(prefix)
1035
1026
if self._node_width != len(self._search_prefix) + 1:
1036
1027
raise AssertionError("node width mismatch: %d is not %d" %
1037
(self._node_width, len(self._search_prefix) + 1))
1028
(self._node_width, len(self._search_prefix) + 1))
1038
1029
self._items[prefix] = node
1039
1030
self._key = None
1041
1032
def _current_size(self):
1042
1033
"""Answer the current serialised size of this node."""
1043
return (self._raw_size + len(str(self._len)) + len(str(self._key_width))
1044
+ len(str(self._maximum_size)))
1034
return (self._raw_size + len(str(self._len)) + len(str(self._key_width)) +
1035
len(str(self._maximum_size)))
1047
1038
def deserialise(klass, bytes, key, search_key_func=None):
1081
1072
# yielding all nodes, yield whatever we have, and queue up a read
1082
1073
# for whatever we are missing
1083
1074
shortcut = True
1084
for prefix, node in viewitems(self._items):
1075
for prefix, node in self._items.iteritems():
1085
1076
if node.__class__ is StaticTuple:
1086
1077
keys[node] = (prefix, None)
1132
1123
for key in key_filter:
1133
1124
search_prefix = self._search_prefix_filter(key)
1134
1125
length_filter = length_filters.setdefault(
1135
len(search_prefix), set())
1126
len(search_prefix), set())
1136
1127
length_filter.add(search_prefix)
1137
1128
prefix_to_keys.setdefault(search_prefix, []).append(key)
1139
if (self._node_width in length_filters and
1140
len(length_filters) == 1):
1130
if (self._node_width in length_filters
1131
and len(length_filters) == 1):
1141
1132
# all of the search prefixes match exactly _node_width. This
1142
1133
# means that everything is an exact match, and we can do a
1143
1134
# lookup into self._items, rather than iterating over the items
1158
1149
# The slow way. We walk every item in self._items, and check to
1159
1150
# see if there are any matches
1160
length_filters_itemview = viewitems(length_filters)
1161
for prefix, node in viewitems(self._items):
1151
length_filters = length_filters.items()
1152
for prefix, node in self._items.iteritems():
1162
1153
node_key_filter = []
1163
for length, length_filter in length_filters_itemview:
1154
for length, length_filter in length_filters:
1164
1155
sub_prefix = prefix[:length]
1165
1156
if sub_prefix in length_filter:
1166
1157
node_key_filter.extend(prefix_to_keys[sub_prefix])
1167
if node_key_filter: # this key matched something, yield it
1158
if node_key_filter: # this key matched something, yield it
1168
1159
if node.__class__ is StaticTuple:
1169
1160
keys[node] = (prefix, node_key_filter)
1181
1172
node = _deserialise(bytes, key,
1182
search_key_func=self._search_key_func)
1173
search_key_func=self._search_key_func)
1183
1174
prefix, node_key_filter = keys[key]
1184
1175
self._items[prefix] = node
1185
1176
found_keys.add(key)
1201
1192
for record in stream:
1202
1193
bytes = record.get_bytes_as('fulltext')
1203
1194
node = _deserialise(bytes, record.key,
1204
search_key_func=self._search_key_func)
1195
search_key_func=self._search_key_func)
1205
1196
prefix, node_key_filter = keys[record.key]
1206
1197
node_and_filters.append((node, node_key_filter))
1207
1198
self._items[prefix] = node
1208
_get_cache()[record.key] = bytes
1199
_get_cache().add(record.key, bytes)
1209
1200
for info in node_and_filters:
1216
1207
search_key = self._search_key(key)
1217
1208
if self._node_width != len(self._search_prefix) + 1:
1218
1209
raise AssertionError("node width mismatch: %d is not %d" %
1219
(self._node_width, len(self._search_prefix) + 1))
1210
(self._node_width, len(self._search_prefix) + 1))
1220
1211
if not search_key.startswith(self._search_prefix):
1221
1212
# This key doesn't fit in this index, so we need to split at the
1222
1213
# point where it would fit, insert self into that internal node,
1224
1215
new_prefix = self.common_prefix(self._search_prefix,
1226
1217
new_parent = InternalNode(new_prefix,
1227
search_key_func=self._search_key_func)
1218
search_key_func=self._search_key_func)
1228
1219
new_parent.set_maximum_size(self._maximum_size)
1229
1220
new_parent._key_width = self._key_width
1230
new_parent.add_node(self._search_prefix[:len(new_prefix) + 1],
1221
new_parent.add_node(self._search_prefix[:len(new_prefix)+1],
1232
1223
return new_parent.map(store, key, value)
1233
children = [node for node, _ in self._iter_nodes(
1234
store, key_filter=[key])]
1224
children = [node for node, _
1225
in self._iter_nodes(store, key_filter=[key])]
1236
1227
child = children[0]
1238
1229
# new child needed:
1239
1230
child = self._new_child(search_key, LeafNode)
1240
1231
old_len = len(child)
1241
if isinstance(child, LeafNode):
1232
if type(child) is LeafNode:
1242
1233
old_size = child._current_size()
1244
1235
old_size = None
1266
1257
# amount is over a configurable limit.
1267
1258
new_size = child._current_size()
1268
1259
shrinkage = old_size - new_size
1269
if (shrinkage > 0 and new_size < _INTERESTING_NEW_SIZE or
1270
shrinkage > _INTERESTING_SHRINKAGE_LIMIT):
1260
if (shrinkage > 0 and new_size < _INTERESTING_NEW_SIZE
1261
or shrinkage > _INTERESTING_SHRINKAGE_LIMIT):
1272
1263
"checking remap as size shrunk by %d to be %d",
1273
1264
shrinkage, new_size)
1274
1265
new_node = self._check_remap(store)
1275
1266
if new_node._search_prefix is None:
1276
1267
raise AssertionError("_search_prefix should not be None")
1277
return new_node._search_prefix, [(b'', new_node)]
1268
return new_node._search_prefix, [('', new_node)]
1278
1269
# child has overflown - create a new intermediate node.
1279
1270
# XXX: This is where we might want to try and expand our depth
1280
1271
# to refer to more bytes of every child (which would give us
1285
1276
child.add_node(split, node)
1286
1277
self._len = self._len - old_len + len(child)
1287
1278
self._key = None
1288
return self._search_prefix, [(b"", self)]
1279
return self._search_prefix, [("", self)]
1290
1281
def _new_child(self, search_key, klass):
1291
1282
"""Create a new child node of type klass."""
1302
1293
:param store: A VersionedFiles honouring the CHK extensions.
1303
1294
:return: An iterable of the keys inserted by this operation.
1305
for node in viewvalues(self._items):
1306
if isinstance(node, StaticTuple):
1296
for node in self._items.itervalues():
1297
if type(node) is StaticTuple:
1307
1298
# Never deserialised.
1309
1300
if node._key is not None:
1312
1303
for key in node.serialise(store):
1314
lines = [b"chknode:\n"]
1315
lines.append(b"%d\n" % self._maximum_size)
1316
lines.append(b"%d\n" % self._key_width)
1317
lines.append(b"%d\n" % self._len)
1305
lines = ["chknode:\n"]
1306
lines.append("%d\n" % self._maximum_size)
1307
lines.append("%d\n" % self._key_width)
1308
lines.append("%d\n" % self._len)
1318
1309
if self._search_prefix is None:
1319
1310
raise AssertionError("_search_prefix should not be None")
1320
lines.append(b'%s\n' % (self._search_prefix,))
1311
lines.append('%s\n' % (self._search_prefix,))
1321
1312
prefix_len = len(self._search_prefix)
1322
for prefix, node in sorted(viewitems(self._items)):
1323
if isinstance(node, StaticTuple):
1313
for prefix, node in sorted(self._items.items()):
1314
if type(node) is StaticTuple:
1326
1317
key = node._key[0]
1327
serialised = b"%s\x00%s\n" % (prefix, key)
1318
serialised = "%s\x00%s\n" % (prefix, key)
1328
1319
if not serialised.startswith(self._search_prefix):
1329
1320
raise AssertionError("prefixes mismatch: %s must start with %s"
1330
% (serialised, self._search_prefix))
1321
% (serialised, self._search_prefix))
1331
1322
lines.append(serialised[prefix_len:])
1332
1323
sha1, _, _ = store.add_lines((None,), (), lines)
1333
self._key = StaticTuple(b"sha1:" + sha1,).intern()
1334
_get_cache()[self._key] = b''.join(lines)
1324
self._key = StaticTuple("sha1:" + sha1,).intern()
1325
_get_cache().add(self._key, ''.join(lines))
1335
1326
yield self._key
1337
1328
def _search_key(self, key):
1338
1329
"""Return the serialised key for key in this node."""
1339
1330
# search keys are fixed width. All will be self._node_width wide, so we
1340
1331
# pad as necessary.
1341
return (self._search_key_func(key) + b'\x00' * self._node_width)[:self._node_width]
1332
return (self._search_key_func(key) + '\x00'*self._node_width)[:self._node_width]
1343
1334
def _search_prefix_filter(self, key):
1344
1335
"""Serialise key for use as a prefix filter in iteritems."""
1352
1343
prefix for reaching node.
1354
1345
if offset >= self._node_width:
1355
for node in valueview(self._items):
1346
for node in self._items.values():
1356
1347
for result in node._split(offset):
1350
for key, node in self._items.items():
1359
1353
def refs(self):
1360
1354
"""Return the references to other CHK's held by this node."""
1361
1355
if self._key is None:
1362
1356
raise AssertionError("unserialised nodes have no refs.")
1364
for value in viewvalues(self._items):
1365
if isinstance(value, StaticTuple):
1358
for value in self._items.itervalues():
1359
if type(value) is StaticTuple:
1366
1360
refs.append(value)
1368
1362
refs.append(value.key())
1378
1372
return self._search_prefix
1380
1374
def unmap(self, store, key, check_remap=True):
1381
"""Remove key from this node and its children."""
1375
"""Remove key from this node and it's children."""
1382
1376
if not len(self._items):
1383
1377
raise AssertionError("can't unmap in an empty InternalNode.")
1384
1378
children = [node for node, _
1385
in self._iter_nodes(store, key_filter=[key])]
1379
in self._iter_nodes(store, key_filter=[key])]
1387
1381
child = children[0]
1400
1394
self._items[search_key] = unmapped
1401
1395
if len(self._items) == 1:
1402
1396
# this node is no longer needed:
1403
return list(viewvalues(self._items))[0]
1404
if isinstance(unmapped, InternalNode):
1397
return self._items.values()[0]
1398
if type(unmapped) is InternalNode:
1406
1400
if check_remap:
1407
1401
return self._check_remap(store)
1447
1441
# c) With 255-way fan out, we don't want to read all 255 and destroy
1448
1442
# the page cache, just to determine that we really don't need it.
1449
1443
for node, _ in self._iter_nodes(store, batch_size=16):
1450
if isinstance(node, InternalNode):
1444
if type(node) is InternalNode:
1451
1445
# Without looking at any leaf nodes, we are sure
1453
for key, value in viewitems(node._items):
1447
for key, value in node._items.iteritems():
1454
1448
if new_leaf._map_no_split(key, value):
1456
1450
trace.mutter("remap generated a new LeafNode")
1457
1451
return new_leaf
1460
def _deserialise(data, key, search_key_func):
1454
def _deserialise(bytes, key, search_key_func):
1461
1455
"""Helper for repositorydetails - convert bytes to a node."""
1462
if data.startswith(b"chkleaf:\n"):
1463
node = LeafNode.deserialise(data, key, search_key_func=search_key_func)
1464
elif data.startswith(b"chknode:\n"):
1465
node = InternalNode.deserialise(data, key,
1466
search_key_func=search_key_func)
1456
if bytes.startswith("chkleaf:\n"):
1457
node = LeafNode.deserialise(bytes, key, search_key_func=search_key_func)
1458
elif bytes.startswith("chknode:\n"):
1459
node = InternalNode.deserialise(bytes, key,
1460
search_key_func=search_key_func)
1468
1462
raise AssertionError("Unknown node type.")
1532
1526
bytes = record.get_bytes_as('fulltext')
1533
1527
node = _deserialise(bytes, record.key,
1534
1528
search_key_func=self._search_key_func)
1535
if isinstance(node, InternalNode):
1529
if type(node) is InternalNode:
1536
1530
# Note we don't have to do node.refs() because we know that
1537
1531
# there are no children that have been pushed into this node
1538
1532
# Note: Using as_st() here seemed to save 1.2MB, which would
1539
1533
# indicate that we keep 100k prefix_refs around while
1540
1534
# processing. They *should* be shorter lived than that...
1541
1535
# It does cost us ~10s of processing time
1542
prefix_refs = list(viewitems(node._items))
1536
#prefix_refs = [as_st(item) for item in node._items.iteritems()]
1537
prefix_refs = node._items.items()
1545
1540
prefix_refs = []
1546
1541
# Note: We don't use a StaticTuple here. Profiling showed a
1547
1542
# minor memory improvement (0.8MB out of 335MB peak 0.2%)
1548
1543
# But a significant slowdown (15s / 145s, or 10%)
1549
items = list(viewitems(node._items))
1544
items = node._items.items()
1550
1545
yield record, node, prefix_refs, items
1552
1547
def _read_old_roots(self):
1556
1551
self._read_nodes_from_store(self._old_root_keys):
1557
1552
# Uninteresting node
1558
1553
prefix_refs = [p_r for p_r in prefix_refs
1559
if p_r[1] not in all_old_chks]
1554
if p_r[1] not in all_old_chks]
1560
1555
new_refs = [p_r[1] for p_r in prefix_refs]
1561
1556
all_old_chks.update(new_refs)
1562
1557
# TODO: This might be a good time to turn items into StaticTuple
1576
1571
# handled the interesting ones
1577
1572
for prefix, ref in old_chks_to_enqueue:
1578
1573
not_interesting = True
1579
for i in range(len(prefix), 0, -1):
1574
for i in xrange(len(prefix), 0, -1):
1580
1575
if prefix[:i] in new_prefixes:
1581
1576
not_interesting = False
1612
1607
# At this level, we now know all the uninteresting references
1613
1608
# So we filter and queue up whatever is remaining
1614
1609
prefix_refs = [p_r for p_r in prefix_refs
1615
if p_r[1] not in self._all_old_chks and
1616
p_r[1] not in processed_new_refs]
1610
if p_r[1] not in self._all_old_chks
1611
and p_r[1] not in processed_new_refs]
1617
1612
refs = [p_r[1] for p_r in prefix_refs]
1618
1613
new_prefixes.update([p_r[0] for p_r in prefix_refs])
1619
1614
self._new_queue.extend(refs)
1625
1620
# self._new_item_queue will hold the contents of multiple
1626
1621
# records for an extended lifetime
1627
1622
new_items = [item for item in items
1628
if item not in self._all_old_items]
1623
if item not in self._all_old_items]
1629
1624
self._new_item_queue.extend(new_items)
1630
1625
new_prefixes.update([self._search_key_func(item[0])
1631
1626
for item in new_items])
1636
1631
# 'ab', then we also need to include 'a'.) So expand the
1637
1632
# new_prefixes to include all shorter prefixes
1638
1633
for prefix in list(new_prefixes):
1639
new_prefixes.update([prefix[:i] for i in range(1, len(prefix))])
1634
new_prefixes.update([prefix[:i] for i in xrange(1, len(prefix))])
1640
1635
self._enqueue_old(new_prefixes, old_chks_to_enqueue)
1642
1637
def _flush_new_queue(self):
1693
1688
for record, _, prefix_refs, items in self._read_nodes_from_store(refs):
1694
1689
# TODO: Use StaticTuple here?
1695
1690
self._all_old_items.update(items)
1696
refs = [r for _, r in prefix_refs if r not in all_old_chks]
1691
refs = [r for _,r in prefix_refs if r not in all_old_chks]
1697
1692
self._old_queue.extend(refs)
1698
1693
all_old_chks.update(refs)
1734
from ._chk_map_pyx import (
1729
from bzrlib._chk_map_pyx import (
1735
1730
_bytes_to_text_key,
1736
1731
_search_key_16,
1737
1732
_search_key_255,
1738
1733
_deserialise_leaf_node,
1739
1734
_deserialise_internal_node,
1741
except ImportError as e:
1736
except ImportError, e:
1742
1737
osutils.failed_to_load_extension(e)
1743
from ._chk_map_py import (
1738
from bzrlib._chk_map_py import (
1744
1739
_bytes_to_text_key,
1745
1740
_search_key_16,
1746
1741
_search_key_255,
1747
1742
_deserialise_leaf_node,
1748
1743
_deserialise_internal_node,
1750
search_key_registry.register(b'hash-16-way', _search_key_16)
1751
search_key_registry.register(b'hash-255-way', _search_key_255)
1745
search_key_registry.register('hash-16-way', _search_key_16)
1746
search_key_registry.register('hash-255-way', _search_key_255)
1754
1749
def _check_key(key):
1757
1752
This generally shouldn't be used in production code, but it can be helpful
1758
1753
to debug problems.
1760
if not isinstance(key, StaticTuple):
1755
if type(key) is not StaticTuple:
1761
1756
raise TypeError('key %r is not StaticTuple but %s' % (key, type(key)))
1762
1757
if len(key) != 1:
1763
raise ValueError('key %r should have length 1, not %d' %
1765
if not isinstance(key[0], str):
1758
raise ValueError('key %r should have length 1, not %d' % (key, len(key),))
1759
if type(key[0]) is not str:
1766
1760
raise TypeError('key %r should hold a str, not %r'
1767
1761
% (key, type(key[0])))
1768
1762
if not key[0].startswith('sha1:'):
1769
1763
raise ValueError('key %r should point to a sha1:' % (key,))