43
from bzrlib import lazy_import
44
lazy_import.lazy_import(globals(), """
57
from bzrlib.static_tuple import StaticTuple
51
from ..static_tuple import StaticTuple
60
54
# If each line is 50 bytes, and you have 255 internal pages, with 255-way fan
61
55
# out, it takes 3.1MB to cache the layer.
62
_PAGE_CACHE_SIZE = 4*1024*1024
56
_PAGE_CACHE_SIZE = 4 * 1024 * 1024
63
57
# Per thread caches for 2 reasons:
64
58
# - in the server we may be serving very different content, so we get less
90
85
_INTERESTING_NEW_SIZE = 50
91
86
# If a ChildNode shrinks by more than this amount, we check for a remap
92
87
_INTERESTING_SHRINKAGE_LIMIT = 20
93
# If we delete more than this many nodes applying a delta, we check for a remap
94
_INTERESTING_DELETES_LIMIT = 5
97
90
def _search_key_plain(key):
98
91
"""Map the key tuple into a search string that just uses the key bytes."""
99
return '\x00'.join(key)
92
return b'\x00'.join(key)
102
95
search_key_registry = registry.Registry()
103
search_key_registry.register('plain', _search_key_plain)
96
search_key_registry.register(b'plain', _search_key_plain)
106
99
class CHKMap(object):
135
128
into the map; if old_key is not None, then the old mapping
136
129
of old_key is removed.
139
132
# Check preconditions first.
140
133
as_st = StaticTuple.from_sequence
141
new_items = set([as_st(key) for (old, key, value) in delta
142
if key is not None and old is None])
134
new_items = {as_st(key) for (old, key, value) in delta
135
if key is not None and old is None}
143
136
existing_new = list(self.iteritems(key_filter=new_items))
145
138
raise errors.InconsistentDeltaDelta(delta,
146
"New items are already in the map %r." % existing_new)
139
"New items are already in the map %r." % existing_new)
147
140
# Now apply changes.
148
141
for old, new, value in delta:
149
142
if old is not None and old != new:
150
143
self.unmap(old, check_remap=False)
152
145
for old, new, value in delta:
153
146
if new is not None:
154
147
self.map(new, value)
155
if delete_count > _INTERESTING_DELETES_LIMIT:
156
trace.mutter("checking remap as %d deletions", delete_count)
157
149
self._check_remap()
158
150
return self._save()
160
152
def _ensure_root(self):
161
153
"""Ensure that the root node is an object not a key."""
162
if type(self._root_node) is StaticTuple:
154
if isinstance(self._root_node, StaticTuple):
163
155
# Demand-load the root
164
156
self._root_node = self._get_node(self._root_node)
173
165
:param node: A tuple key or node object.
174
166
:return: A node object.
176
if type(node) is StaticTuple:
168
if isinstance(node, StaticTuple):
177
169
bytes = self._read_bytes(node)
178
170
return _deserialise(bytes, node,
179
search_key_func=self._search_key_func)
171
search_key_func=self._search_key_func)
185
177
return _get_cache()[key]
187
179
stream = self._store.get_record_stream([key], 'unordered', True)
188
bytes = stream.next().get_bytes_as('fulltext')
180
bytes = next(stream).get_bytes_as('fulltext')
189
181
_get_cache()[key] = bytes
192
def _dump_tree(self, include_keys=False):
184
def _dump_tree(self, include_keys=False, encoding='utf-8'):
193
185
"""Return the tree in a string representation."""
194
186
self._ensure_root()
195
res = self._dump_tree_node(self._root_node, prefix='', indent='',
196
include_keys=include_keys)
197
res.append('') # Give a trailing '\n'
187
def decode(x): return x.decode(encoding)
188
res = self._dump_tree_node(self._root_node, prefix=b'', indent='',
189
decode=decode, include_keys=include_keys)
190
res.append('') # Give a trailing '\n'
198
191
return '\n'.join(res)
200
def _dump_tree_node(self, node, prefix, indent, include_keys=True):
193
def _dump_tree_node(self, node, prefix, indent, decode, include_keys=True):
201
194
"""For this node and all children, generate a string representation."""
203
196
if not include_keys:
206
199
node_key = node.key()
207
200
if node_key is not None:
208
key_str = ' %s' % (node_key[0],)
201
key_str = ' %s' % (decode(node_key[0]),)
210
203
key_str = ' None'
211
result.append('%s%r %s%s' % (indent, prefix, node.__class__.__name__,
204
result.append('%s%r %s%s' % (indent, decode(prefix), node.__class__.__name__,
213
if type(node) is InternalNode:
206
if isinstance(node, InternalNode):
214
207
# Trigger all child nodes to get loaded
215
208
list(node._iter_nodes(self._store))
216
for prefix, sub in sorted(node._items.iteritems()):
209
for prefix, sub in sorted(node._items.items()):
217
210
result.extend(self._dump_tree_node(sub, prefix, indent + ' ',
218
include_keys=include_keys))
211
decode=decode, include_keys=include_keys))
220
for key, value in sorted(node._items.iteritems()):
213
for key, value in sorted(node._items.items()):
221
214
# Don't use prefix nor indent here to line up when used in
222
215
# tests in conjunction with assertEqualDiff
223
result.append(' %r %r' % (tuple(key), value))
216
result.append(' %r %r' % (
217
tuple([decode(ke) for ke in key]), decode(value)))
227
221
def from_dict(klass, store, initial_value, maximum_size=0, key_width=1,
228
search_key_func=None):
222
search_key_func=None):
229
223
"""Create a CHKMap in store with initial_value as the content.
231
225
:param store: The store to record initial_value in, a VersionedFiles
242
236
:return: The root chk of the resulting CHKMap.
244
238
root_key = klass._create_directly(store, initial_value,
245
maximum_size=maximum_size, key_width=key_width,
246
search_key_func=search_key_func)
247
if type(root_key) is not StaticTuple:
239
maximum_size=maximum_size, key_width=key_width,
240
search_key_func=search_key_func)
241
if not isinstance(root_key, StaticTuple):
248
242
raise AssertionError('we got a %s instead of a StaticTuple'
249
243
% (type(root_key),))
268
262
node.set_maximum_size(maximum_size)
269
263
node._key_width = key_width
270
264
as_st = StaticTuple.from_sequence
271
node._items = dict([(as_st(key), val) for key, val
272
in initial_value.iteritems()])
273
node._raw_size = sum([node._key_value_len(key, value)
274
for key,value in node._items.iteritems()])
265
node._items = dict((as_st(key), val)
266
for key, val in initial_value.items())
267
node._raw_size = sum(node._key_value_len(key, value)
268
for key, value in node._items.items())
275
269
node._len = len(node._items)
276
270
node._compute_search_prefix()
277
271
node._compute_serialised_prefix()
280
and node._current_size() > maximum_size):
272
if (node._len > 1 and
274
node._current_size() > maximum_size):
281
275
prefix, node_details = node._split(store)
282
276
if len(node_details) == 1:
283
277
raise AssertionError('Failed to split using node._split')
329
323
# key_path (a list of tuples, tail-sharing down the tree.)
330
324
self_pending = []
331
325
basis_pending = []
332
327
def process_node(node, path, a_map, pending):
333
328
# take a node and expand it
334
329
node = a_map._get_node(node)
335
if type(node) == LeafNode:
330
if isinstance(node, LeafNode):
336
331
path = (node._key, path)
337
332
for key, value in node._items.items():
338
333
# For a LeafNode, the key is a serialized_key, rather than
344
339
path = (node._key, path)
345
340
for prefix, child in node._items.items():
346
341
heapq.heappush(pending, (prefix, None, child, path))
347
343
def process_common_internal_nodes(self_node, basis_node):
348
344
self_items = set(self_node._items.items())
349
345
basis_items = set(basis_node._items.items())
353
349
path = (basis_node._key, None)
354
350
for prefix, child in basis_items - self_items:
355
351
heapq.heappush(basis_pending, (prefix, None, child, path))
356
353
def process_common_leaf_nodes(self_node, basis_node):
357
354
self_items = set(self_node._items.items())
358
355
basis_items = set(basis_node._items.items())
364
361
for key, value in basis_items - self_items:
365
362
prefix = basis._search_key_func(key)
366
363
heapq.heappush(basis_pending, (prefix, key, value, path))
367
365
def process_common_prefix_nodes(self_node, self_path,
368
366
basis_node, basis_path):
369
367
# Would it be more efficient if we could request both at the same
371
369
self_node = self._get_node(self_node)
372
370
basis_node = basis._get_node(basis_node)
373
if (type(self_node) == InternalNode
374
and type(basis_node) == InternalNode):
371
if (isinstance(self_node, InternalNode) and
372
isinstance(basis_node, InternalNode)):
375
373
# Matching internal nodes
376
374
process_common_internal_nodes(self_node, basis_node)
377
elif (type(self_node) == LeafNode
378
and type(basis_node) == LeafNode):
375
elif (isinstance(self_node, LeafNode) and
376
isinstance(basis_node, LeafNode)):
379
377
process_common_leaf_nodes(self_node, basis_node)
381
379
process_node(self_node, self_path, self, self_pending)
384
382
self_seen = set()
385
383
basis_seen = set()
386
384
excluded_keys = set()
387
386
def check_excluded(key_path):
388
387
# Note that this is N^2, it depends on us trimming trees
389
388
# aggressively to not become slow.
390
389
# A better implementation would probably have a reverse map
391
390
# back to the children of a node, and jump straight to it when
392
391
# a common node is detected, the proceed to remove the already
393
# pending children. bzrlib.graph has a searcher module with a
392
# pending children. breezy.graph has a searcher module with a
394
393
# similar problem.
395
394
while key_path is not None:
396
395
key, key_path = key_path
473
472
basis_details = heapq.heappop(basis_pending)
474
473
if self_details[2] != basis_details[2]:
475
474
yield (self_details[1],
476
basis_details[2], self_details[2])
475
basis_details[2], self_details[2])
478
477
# At least one side wasn't a simple value
479
if (self._node_key(self_pending[0][2]) ==
480
self._node_key(basis_pending[0][2])):
478
if (self._node_key(self_pending[0][2])
479
== self._node_key(basis_pending[0][2])):
481
480
# Identical pointers, skip (and don't bother adding to
482
481
# excluded, it won't turn up again.
483
482
heapq.heappop(self_pending)
543
542
self._root_node = node_details[0][1]
545
544
self._root_node = InternalNode(prefix,
546
search_key_func=self._search_key_func)
545
search_key_func=self._search_key_func)
547
546
self._root_node.set_maximum_size(node_details[0][1].maximum_size)
548
547
self._root_node._key_width = node_details[0][1]._key_width
549
548
for split, node in node_details:
552
551
def _node_key(self, node):
553
552
"""Get the key for a node whether it's a tuple or node."""
554
if type(node) is tuple:
553
if isinstance(node, tuple):
555
554
node = StaticTuple.from_sequence(node)
556
if type(node) is StaticTuple:
555
if isinstance(node, StaticTuple):
562
561
"""remove key from the map."""
563
562
key = StaticTuple.from_sequence(key)
564
563
self._ensure_root()
565
if type(self._root_node) is InternalNode:
564
if isinstance(self._root_node, InternalNode):
566
565
unmapped = self._root_node.unmap(self._store, key,
567
check_remap=check_remap)
566
check_remap=check_remap)
569
568
unmapped = self._root_node.unmap(self._store, key)
570
569
self._root_node = unmapped
572
571
def _check_remap(self):
573
572
"""Check if nodes can be collapsed."""
574
573
self._ensure_root()
575
if type(self._root_node) is InternalNode:
576
self._root_node._check_remap(self._store)
574
if isinstance(self._root_node, InternalNode):
575
self._root_node = self._root_node._check_remap(self._store)
579
578
"""Save the map completely.
581
580
:return: The key of the root node.
583
if type(self._root_node) is StaticTuple:
582
if isinstance(self._root_node, StaticTuple):
585
584
return self._root_node
586
585
keys = list(self._root_node.serialise(self._store))
597
596
__slots__ = ('_key', '_len', '_maximum_size', '_key_width',
598
597
'_raw_size', '_items', '_search_prefix', '_search_key_func'
601
600
def __init__(self, key_width=1):
602
601
"""Create a node.
711
711
'%s(key:%s len:%s size:%s max:%s prefix:%s keywidth:%s items:%s)' \
712
712
% (self.__class__.__name__, self._key, self._len, self._raw_size,
713
self._maximum_size, self._search_prefix, self._key_width, items_str)
713
self._maximum_size, self._search_prefix, self._key_width, items_str)
715
715
def _current_size(self):
716
716
"""Answer the current serialised size of this node.
728
728
prefix_len = len(self._common_serialised_prefix)
729
729
bytes_for_items = (self._raw_size - (prefix_len * self._len))
730
return (9 # 'chkleaf:\n'
731
+ len(str(self._maximum_size)) + 1
732
+ len(str(self._key_width)) + 1
733
+ len(str(self._len)) + 1
730
return (9 + # 'chkleaf:\n' +
731
len(str(self._maximum_size)) + 1 +
732
len(str(self._key_width)) + 1 +
733
len(str(self._len)) + 1 +
738
738
def deserialise(klass, bytes, key, search_key_func=None):
769
769
# Short items, we need to match based on a prefix
770
length_filter = filters.setdefault(len(key), set())
771
length_filter.add(key)
770
filters.setdefault(len(key), set()).add(key)
773
filters = filters.items()
774
for item in self._items.iteritems():
775
for length, length_filter in filters:
772
filters_itemview = filters.items()
773
for item in self._items.items():
774
for length, length_filter in filters_itemview:
776
775
if item[0][:length] in length_filter:
780
for item in self._items.iteritems():
779
yield from self._items.items()
783
781
def _key_value_len(self, key, value):
784
782
# TODO: Should probably be done without actually joining the key, but
785
783
# then that can be done via the C extension
786
return (len(self._serialise_key(key)) + 1
787
+ len(str(value.count('\n'))) + 1
784
return (len(self._serialise_key(key)) + 1 +
785
len(b'%d' % value.count(b'\n')) + 1 +
790
788
def _search_key(self, key):
791
789
return self._search_key_func(key)
816
814
self._search_prefix = self.common_prefix(
817
815
self._search_prefix, search_key)
819
and self._maximum_size
820
and self._current_size() > self._maximum_size):
816
if (self._len > 1 and
817
self._maximum_size and
818
self._current_size() > self._maximum_size):
821
819
# Check to see if all of the search_keys for this node are
822
820
# identical. We allow the node to grow under that circumstance
823
821
# (we could track this as common state, but it is infrequent)
824
if (search_key != self._search_prefix
825
or not self._are_search_keys_identical()):
822
if (search_key != self._search_prefix or
823
not self._are_search_keys_identical()):
839
837
common_prefix = self._search_prefix
840
838
split_at = len(common_prefix) + 1
842
for key, value in self._items.iteritems():
840
for key, value in self._items.items():
843
841
search_key = self._search_key(key)
844
842
prefix = search_key[:split_at]
845
843
# TODO: Generally only 1 key can be exactly the right length,
851
849
# may get a '\00' node anywhere, but won't have keys of
852
850
# different lengths.
853
851
if len(prefix) < split_at:
854
prefix += '\x00'*(split_at - len(prefix))
852
prefix += b'\x00' * (split_at - len(prefix))
855
853
if prefix not in result:
856
854
node = LeafNode(search_key_func=self._search_key_func)
857
855
node.set_maximum_size(self._maximum_size)
867
865
result.pop(prefix)
868
866
new_node = InternalNode(sub_prefix,
869
search_key_func=self._search_key_func)
867
search_key_func=self._search_key_func)
870
868
new_node.set_maximum_size(self._maximum_size)
871
869
new_node._key_width = self._key_width
872
870
for split, node in node_details:
873
871
new_node.add_node(split, node)
874
872
result[prefix] = new_node
875
return common_prefix, result.items()
873
return common_prefix, list(result.items())
877
875
def map(self, store, key, value):
878
876
"""Map key to value."""
886
884
if self._search_prefix is _unknown:
887
885
raise AssertionError('%r must be known' % self._search_prefix)
888
return self._search_prefix, [("", self)]
886
return self._search_prefix, [(b"", self)]
890
_serialise_key = '\x00'.join
888
_serialise_key = b'\x00'.join
892
890
def serialise(self, store):
893
891
"""Serialise the LeafNode to store.
895
893
:param store: A VersionedFiles honouring the CHK extensions.
896
894
:return: An iterable of the keys inserted by this operation.
898
lines = ["chkleaf:\n"]
899
lines.append("%d\n" % self._maximum_size)
900
lines.append("%d\n" % self._key_width)
901
lines.append("%d\n" % self._len)
896
lines = [b"chkleaf:\n"]
897
lines.append(b"%d\n" % self._maximum_size)
898
lines.append(b"%d\n" % self._key_width)
899
lines.append(b"%d\n" % self._len)
902
900
if self._common_serialised_prefix is None:
904
902
if len(self._items) != 0:
905
903
raise AssertionError('If _common_serialised_prefix is None'
906
' we should have no items')
904
' we should have no items')
908
lines.append('%s\n' % (self._common_serialised_prefix,))
906
lines.append(b'%s\n' % (self._common_serialised_prefix,))
909
907
prefix_len = len(self._common_serialised_prefix)
910
908
for key, value in sorted(self._items.items()):
911
909
# Always add a final newline
912
value_lines = osutils.chunks_to_lines([value + '\n'])
913
serialized = "%s\x00%s\n" % (self._serialise_key(key),
910
value_lines = osutils.chunks_to_lines([value + b'\n'])
911
serialized = b"%s\x00%d\n" % (self._serialise_key(key),
915
913
if not serialized.startswith(self._common_serialised_prefix):
916
914
raise AssertionError('We thought the common prefix was %r'
917
' but entry %r does not have it in common'
918
% (self._common_serialised_prefix, serialized))
915
' but entry %r does not have it in common'
916
% (self._common_serialised_prefix, serialized))
919
917
lines.append(serialized[prefix_len:])
920
918
lines.extend(value_lines)
921
919
sha1, _, _ = store.add_lines((None,), (), lines)
922
self._key = StaticTuple("sha1:" + sha1,).intern()
923
bytes = ''.join(lines)
924
if len(bytes) != self._current_size():
920
self._key = StaticTuple(b"sha1:" + sha1,).intern()
921
data = b''.join(lines)
922
if len(data) != self._current_size():
925
923
raise AssertionError('Invalid _current_size')
926
_get_cache().add(self._key, bytes)
924
_get_cache()[self._key] = data
927
925
return [self._key]
996
994
__slots__ = ('_node_width',)
998
def __init__(self, prefix='', search_key_func=None):
996
def __init__(self, prefix=b'', search_key_func=None):
999
997
Node.__init__(self)
1000
998
# The size of an internalnode with default values and no children.
1001
999
# How many octets key prefixes within this node are.
1016
1014
raise AssertionError("_search_prefix should not be None")
1017
1015
if not prefix.startswith(self._search_prefix):
1018
1016
raise AssertionError("prefixes mismatch: %s must start with %s"
1019
% (prefix,self._search_prefix))
1017
% (prefix, self._search_prefix))
1020
1018
if len(prefix) != len(self._search_prefix) + 1:
1021
1019
raise AssertionError("prefix wrong length: len(%s) is not %d" %
1022
(prefix, len(self._search_prefix) + 1))
1020
(prefix, len(self._search_prefix) + 1))
1023
1021
self._len += len(node)
1024
1022
if not len(self._items):
1025
1023
self._node_width = len(prefix)
1026
1024
if self._node_width != len(self._search_prefix) + 1:
1027
1025
raise AssertionError("node width mismatch: %d is not %d" %
1028
(self._node_width, len(self._search_prefix) + 1))
1026
(self._node_width, len(self._search_prefix) + 1))
1029
1027
self._items[prefix] = node
1030
1028
self._key = None
1032
1030
def _current_size(self):
1033
1031
"""Answer the current serialised size of this node."""
1034
return (self._raw_size + len(str(self._len)) + len(str(self._key_width)) +
1035
len(str(self._maximum_size)))
1032
return (self._raw_size + len(str(self._len)) + len(str(self._key_width))
1033
+ len(str(self._maximum_size)))
1038
1036
def deserialise(klass, bytes, key, search_key_func=None):
1072
1070
# yielding all nodes, yield whatever we have, and queue up a read
1073
1071
# for whatever we are missing
1074
1072
shortcut = True
1075
for prefix, node in self._items.iteritems():
1073
for prefix, node in self._items.items():
1076
1074
if node.__class__ is StaticTuple:
1077
1075
keys[node] = (prefix, None)
1123
1121
for key in key_filter:
1124
1122
search_prefix = self._search_prefix_filter(key)
1125
1123
length_filter = length_filters.setdefault(
1126
len(search_prefix), set())
1124
len(search_prefix), set())
1127
1125
length_filter.add(search_prefix)
1128
1126
prefix_to_keys.setdefault(search_prefix, []).append(key)
1130
if (self._node_width in length_filters
1131
and len(length_filters) == 1):
1128
if (self._node_width in length_filters and
1129
len(length_filters) == 1):
1132
1130
# all of the search prefixes match exactly _node_width. This
1133
1131
# means that everything is an exact match, and we can do a
1134
1132
# lookup into self._items, rather than iterating over the items
1149
1147
# The slow way. We walk every item in self._items, and check to
1150
1148
# see if there are any matches
1151
length_filters = length_filters.items()
1152
for prefix, node in self._items.iteritems():
1149
length_filters_itemview = length_filters.items()
1150
for prefix, node in self._items.items():
1153
1151
node_key_filter = []
1154
for length, length_filter in length_filters:
1152
for length, length_filter in length_filters_itemview:
1155
1153
sub_prefix = prefix[:length]
1156
1154
if sub_prefix in length_filter:
1157
1155
node_key_filter.extend(prefix_to_keys[sub_prefix])
1158
if node_key_filter: # this key matched something, yield it
1156
if node_key_filter: # this key matched something, yield it
1159
1157
if node.__class__ is StaticTuple:
1160
1158
keys[node] = (prefix, node_key_filter)
1172
1170
node = _deserialise(bytes, key,
1173
search_key_func=self._search_key_func)
1171
search_key_func=self._search_key_func)
1174
1172
prefix, node_key_filter = keys[key]
1175
1173
self._items[prefix] = node
1176
1174
found_keys.add(key)
1192
1190
for record in stream:
1193
1191
bytes = record.get_bytes_as('fulltext')
1194
1192
node = _deserialise(bytes, record.key,
1195
search_key_func=self._search_key_func)
1193
search_key_func=self._search_key_func)
1196
1194
prefix, node_key_filter = keys[record.key]
1197
1195
node_and_filters.append((node, node_key_filter))
1198
1196
self._items[prefix] = node
1199
_get_cache().add(record.key, bytes)
1197
_get_cache()[record.key] = bytes
1200
1198
for info in node_and_filters:
1207
1205
search_key = self._search_key(key)
1208
1206
if self._node_width != len(self._search_prefix) + 1:
1209
1207
raise AssertionError("node width mismatch: %d is not %d" %
1210
(self._node_width, len(self._search_prefix) + 1))
1208
(self._node_width, len(self._search_prefix) + 1))
1211
1209
if not search_key.startswith(self._search_prefix):
1212
1210
# This key doesn't fit in this index, so we need to split at the
1213
1211
# point where it would fit, insert self into that internal node,
1215
1213
new_prefix = self.common_prefix(self._search_prefix,
1217
1215
new_parent = InternalNode(new_prefix,
1218
search_key_func=self._search_key_func)
1216
search_key_func=self._search_key_func)
1219
1217
new_parent.set_maximum_size(self._maximum_size)
1220
1218
new_parent._key_width = self._key_width
1221
new_parent.add_node(self._search_prefix[:len(new_prefix)+1],
1219
new_parent.add_node(self._search_prefix[:len(new_prefix) + 1],
1223
1221
return new_parent.map(store, key, value)
1224
children = [node for node, _
1225
in self._iter_nodes(store, key_filter=[key])]
1222
children = [node for node, _ in self._iter_nodes(
1223
store, key_filter=[key])]
1227
1225
child = children[0]
1229
1227
# new child needed:
1230
1228
child = self._new_child(search_key, LeafNode)
1231
1229
old_len = len(child)
1232
if type(child) is LeafNode:
1230
if isinstance(child, LeafNode):
1233
1231
old_size = child._current_size()
1235
1233
old_size = None
1241
1239
self._items[search_key] = child
1242
1240
self._key = None
1243
1241
new_node = self
1244
if type(child) is LeafNode:
1242
if isinstance(child, LeafNode):
1245
1243
if old_size is None:
1246
1244
# The old node was an InternalNode which means it has now
1247
1245
# collapsed, so we need to check if it will chain to a
1257
1255
# amount is over a configurable limit.
1258
1256
new_size = child._current_size()
1259
1257
shrinkage = old_size - new_size
1260
if (shrinkage > 0 and new_size < _INTERESTING_NEW_SIZE
1261
or shrinkage > _INTERESTING_SHRINKAGE_LIMIT):
1258
if (shrinkage > 0 and new_size < _INTERESTING_NEW_SIZE or
1259
shrinkage > _INTERESTING_SHRINKAGE_LIMIT):
1263
1261
"checking remap as size shrunk by %d to be %d",
1264
1262
shrinkage, new_size)
1265
1263
new_node = self._check_remap(store)
1266
1264
if new_node._search_prefix is None:
1267
1265
raise AssertionError("_search_prefix should not be None")
1268
return new_node._search_prefix, [('', new_node)]
1266
return new_node._search_prefix, [(b'', new_node)]
1269
1267
# child has overflown - create a new intermediate node.
1270
1268
# XXX: This is where we might want to try and expand our depth
1271
1269
# to refer to more bytes of every child (which would give us
1276
1274
child.add_node(split, node)
1277
1275
self._len = self._len - old_len + len(child)
1278
1276
self._key = None
1279
return self._search_prefix, [("", self)]
1277
return self._search_prefix, [(b"", self)]
1281
1279
def _new_child(self, search_key, klass):
1282
1280
"""Create a new child node of type klass."""
1293
1291
:param store: A VersionedFiles honouring the CHK extensions.
1294
1292
:return: An iterable of the keys inserted by this operation.
1296
for node in self._items.itervalues():
1297
if type(node) is StaticTuple:
1294
for node in self._items.values():
1295
if isinstance(node, StaticTuple):
1298
1296
# Never deserialised.
1300
1298
if node._key is not None:
1303
1301
for key in node.serialise(store):
1305
lines = ["chknode:\n"]
1306
lines.append("%d\n" % self._maximum_size)
1307
lines.append("%d\n" % self._key_width)
1308
lines.append("%d\n" % self._len)
1303
lines = [b"chknode:\n"]
1304
lines.append(b"%d\n" % self._maximum_size)
1305
lines.append(b"%d\n" % self._key_width)
1306
lines.append(b"%d\n" % self._len)
1309
1307
if self._search_prefix is None:
1310
1308
raise AssertionError("_search_prefix should not be None")
1311
lines.append('%s\n' % (self._search_prefix,))
1309
lines.append(b'%s\n' % (self._search_prefix,))
1312
1310
prefix_len = len(self._search_prefix)
1313
1311
for prefix, node in sorted(self._items.items()):
1314
if type(node) is StaticTuple:
1312
if isinstance(node, StaticTuple):
1317
1315
key = node._key[0]
1318
serialised = "%s\x00%s\n" % (prefix, key)
1316
serialised = b"%s\x00%s\n" % (prefix, key)
1319
1317
if not serialised.startswith(self._search_prefix):
1320
1318
raise AssertionError("prefixes mismatch: %s must start with %s"
1321
% (serialised, self._search_prefix))
1319
% (serialised, self._search_prefix))
1322
1320
lines.append(serialised[prefix_len:])
1323
1321
sha1, _, _ = store.add_lines((None,), (), lines)
1324
self._key = StaticTuple("sha1:" + sha1,).intern()
1325
_get_cache().add(self._key, ''.join(lines))
1322
self._key = StaticTuple(b"sha1:" + sha1,).intern()
1323
_get_cache()[self._key] = b''.join(lines)
1326
1324
yield self._key
1328
1326
def _search_key(self, key):
1329
1327
"""Return the serialised key for key in this node."""
1330
1328
# search keys are fixed width. All will be self._node_width wide, so we
1331
1329
# pad as necessary.
1332
return (self._search_key_func(key) + '\x00'*self._node_width)[:self._node_width]
1330
return (self._search_key_func(key) + b'\x00' * self._node_width)[:self._node_width]
1334
1332
def _search_prefix_filter(self, key):
1335
1333
"""Serialise key for use as a prefix filter in iteritems."""
1343
1341
prefix for reaching node.
1345
1343
if offset >= self._node_width:
1346
for node in self._items.values():
1344
for node in valueview(self._items):
1347
1345
for result in node._split(offset):
1350
for key, node in self._items.items():
1353
1348
def refs(self):
1354
1349
"""Return the references to other CHK's held by this node."""
1355
1350
if self._key is None:
1356
1351
raise AssertionError("unserialised nodes have no refs.")
1358
for value in self._items.itervalues():
1359
if type(value) is StaticTuple:
1353
for value in self._items.values():
1354
if isinstance(value, StaticTuple):
1360
1355
refs.append(value)
1362
1357
refs.append(value.key())
1372
1367
return self._search_prefix
1374
1369
def unmap(self, store, key, check_remap=True):
1375
"""Remove key from this node and it's children."""
1370
"""Remove key from this node and its children."""
1376
1371
if not len(self._items):
1377
1372
raise AssertionError("can't unmap in an empty InternalNode.")
1378
1373
children = [node for node, _
1379
in self._iter_nodes(store, key_filter=[key])]
1374
in self._iter_nodes(store, key_filter=[key])]
1381
1376
child = children[0]
1394
1389
self._items[search_key] = unmapped
1395
1390
if len(self._items) == 1:
1396
1391
# this node is no longer needed:
1397
return self._items.values()[0]
1398
if type(unmapped) is InternalNode:
1392
return list(self._items.values())[0]
1393
if isinstance(unmapped, InternalNode):
1400
1395
if check_remap:
1401
1396
return self._check_remap(store)
1441
1436
# c) With 255-way fan out, we don't want to read all 255 and destroy
1442
1437
# the page cache, just to determine that we really don't need it.
1443
1438
for node, _ in self._iter_nodes(store, batch_size=16):
1444
if type(node) is InternalNode:
1439
if isinstance(node, InternalNode):
1445
1440
# Without looking at any leaf nodes, we are sure
1447
for key, value in node._items.iteritems():
1442
for key, value in node._items.items():
1448
1443
if new_leaf._map_no_split(key, value):
1450
1445
trace.mutter("remap generated a new LeafNode")
1451
1446
return new_leaf
1454
def _deserialise(bytes, key, search_key_func):
1449
def _deserialise(data, key, search_key_func):
1455
1450
"""Helper for repositorydetails - convert bytes to a node."""
1456
if bytes.startswith("chkleaf:\n"):
1457
node = LeafNode.deserialise(bytes, key, search_key_func=search_key_func)
1458
elif bytes.startswith("chknode:\n"):
1459
node = InternalNode.deserialise(bytes, key,
1460
search_key_func=search_key_func)
1451
if data.startswith(b"chkleaf:\n"):
1452
node = LeafNode.deserialise(data, key, search_key_func=search_key_func)
1453
elif data.startswith(b"chknode:\n"):
1454
node = InternalNode.deserialise(data, key,
1455
search_key_func=search_key_func)
1462
1457
raise AssertionError("Unknown node type.")
1526
1521
bytes = record.get_bytes_as('fulltext')
1527
1522
node = _deserialise(bytes, record.key,
1528
1523
search_key_func=self._search_key_func)
1529
if type(node) is InternalNode:
1524
if isinstance(node, InternalNode):
1530
1525
# Note we don't have to do node.refs() because we know that
1531
1526
# there are no children that have been pushed into this node
1532
1527
# Note: Using as_st() here seemed to save 1.2MB, which would
1533
1528
# indicate that we keep 100k prefix_refs around while
1534
1529
# processing. They *should* be shorter lived than that...
1535
1530
# It does cost us ~10s of processing time
1536
#prefix_refs = [as_st(item) for item in node._items.iteritems()]
1537
prefix_refs = node._items.items()
1531
prefix_refs = list(node._items.items())
1540
1534
prefix_refs = []
1541
1535
# Note: We don't use a StaticTuple here. Profiling showed a
1542
1536
# minor memory improvement (0.8MB out of 335MB peak 0.2%)
1543
1537
# But a significant slowdown (15s / 145s, or 10%)
1544
items = node._items.items()
1538
items = list(node._items.items())
1545
1539
yield record, node, prefix_refs, items
1547
1541
def _read_old_roots(self):
1551
1545
self._read_nodes_from_store(self._old_root_keys):
1552
1546
# Uninteresting node
1553
1547
prefix_refs = [p_r for p_r in prefix_refs
1554
if p_r[1] not in all_old_chks]
1548
if p_r[1] not in all_old_chks]
1555
1549
new_refs = [p_r[1] for p_r in prefix_refs]
1556
1550
all_old_chks.update(new_refs)
1557
1551
# TODO: This might be a good time to turn items into StaticTuple
1571
1565
# handled the interesting ones
1572
1566
for prefix, ref in old_chks_to_enqueue:
1573
1567
not_interesting = True
1574
for i in xrange(len(prefix), 0, -1):
1568
for i in range(len(prefix), 0, -1):
1575
1569
if prefix[:i] in new_prefixes:
1576
1570
not_interesting = False
1607
1601
# At this level, we now know all the uninteresting references
1608
1602
# So we filter and queue up whatever is remaining
1609
1603
prefix_refs = [p_r for p_r in prefix_refs
1610
if p_r[1] not in self._all_old_chks
1611
and p_r[1] not in processed_new_refs]
1604
if p_r[1] not in self._all_old_chks and
1605
p_r[1] not in processed_new_refs]
1612
1606
refs = [p_r[1] for p_r in prefix_refs]
1613
1607
new_prefixes.update([p_r[0] for p_r in prefix_refs])
1614
1608
self._new_queue.extend(refs)
1620
1614
# self._new_item_queue will hold the contents of multiple
1621
1615
# records for an extended lifetime
1622
1616
new_items = [item for item in items
1623
if item not in self._all_old_items]
1617
if item not in self._all_old_items]
1624
1618
self._new_item_queue.extend(new_items)
1625
1619
new_prefixes.update([self._search_key_func(item[0])
1626
1620
for item in new_items])
1631
1625
# 'ab', then we also need to include 'a'.) So expand the
1632
1626
# new_prefixes to include all shorter prefixes
1633
1627
for prefix in list(new_prefixes):
1634
new_prefixes.update([prefix[:i] for i in xrange(1, len(prefix))])
1628
new_prefixes.update([prefix[:i] for i in range(1, len(prefix))])
1635
1629
self._enqueue_old(new_prefixes, old_chks_to_enqueue)
1637
1631
def _flush_new_queue(self):
1644
1638
processed_new_refs = self._processed_new_refs
1645
1639
all_old_items = self._all_old_items
1646
1640
new_items = [item for item in self._new_item_queue
1647
if item not in all_old_items]
1641
if item not in all_old_items]
1648
1642
self._new_item_queue = []
1650
1644
yield None, new_items
1688
1682
for record, _, prefix_refs, items in self._read_nodes_from_store(refs):
1689
1683
# TODO: Use StaticTuple here?
1690
1684
self._all_old_items.update(items)
1691
refs = [r for _,r in prefix_refs if r not in all_old_chks]
1685
refs = [r for _, r in prefix_refs if r not in all_old_chks]
1692
1686
self._old_queue.extend(refs)
1693
1687
all_old_chks.update(refs)
1729
from bzrlib._chk_map_pyx import (
1723
from ._chk_map_pyx import (
1730
1725
_search_key_16,
1731
1726
_search_key_255,
1732
1727
_deserialise_leaf_node,
1733
1728
_deserialise_internal_node,
1735
except ImportError, e:
1730
except ImportError as e:
1736
1731
osutils.failed_to_load_extension(e)
1737
from bzrlib._chk_map_py import (
1732
from ._chk_map_py import (
1738
1734
_search_key_16,
1739
1735
_search_key_255,
1740
1736
_deserialise_leaf_node,
1741
1737
_deserialise_internal_node,
1743
search_key_registry.register('hash-16-way', _search_key_16)
1744
search_key_registry.register('hash-255-way', _search_key_255)
1739
search_key_registry.register(b'hash-16-way', _search_key_16)
1740
search_key_registry.register(b'hash-255-way', _search_key_255)
1747
1743
def _check_key(key):
1750
1746
This generally shouldn't be used in production code, but it can be helpful
1751
1747
to debug problems.
1753
if type(key) is not StaticTuple:
1749
if not isinstance(key, StaticTuple):
1754
1750
raise TypeError('key %r is not StaticTuple but %s' % (key, type(key)))
1755
1751
if len(key) != 1:
1756
raise ValueError('key %r should have length 1, not %d' % (key, len(key),))
1757
if type(key[0]) is not str:
1752
raise ValueError('key %r should have length 1, not %d' %
1754
if not isinstance(key[0], str):
1758
1755
raise TypeError('key %r should hold a str, not %r'
1759
1756
% (key, type(key[0])))
1760
1757
if not key[0].startswith('sha1:'):
1761
1758
raise ValueError('key %r should point to a sha1:' % (key,))