40
from __future__ import absolute_import
43
from bzrlib import lazy_import
44
lazy_import.lazy_import(globals(), """
57
from bzrlib.static_tuple import StaticTuple
53
from ..sixish import (
57
from ..static_tuple import StaticTuple
60
60
# If each line is 50 bytes, and you have 255 internal pages, with 255-way fan
90
90
_INTERESTING_NEW_SIZE = 50
91
91
# If a ChildNode shrinks by more than this amount, we check for a remap
92
92
_INTERESTING_SHRINKAGE_LIMIT = 20
93
# If we delete more than this many nodes applying a delta, we check for a remap
94
_INTERESTING_DELETES_LIMIT = 5
97
95
def _search_key_plain(key):
98
96
"""Map the key tuple into a search string that just uses the key bytes."""
99
return '\x00'.join(key)
97
return b'\x00'.join(key)
102
100
search_key_registry = registry.Registry()
135
133
into the map; if old_key is not None, then the old mapping
136
134
of old_key is removed.
139
137
# Check preconditions first.
140
138
as_st = StaticTuple.from_sequence
141
new_items = set([as_st(key) for (old, key, value) in delta
142
if key is not None and old is None])
139
new_items = {as_st(key) for (old, key, value) in delta
140
if key is not None and old is None}
143
141
existing_new = list(self.iteritems(key_filter=new_items))
145
143
raise errors.InconsistentDeltaDelta(delta,
148
146
for old, new, value in delta:
149
147
if old is not None and old != new:
150
148
self.unmap(old, check_remap=False)
152
150
for old, new, value in delta:
153
151
if new is not None:
154
152
self.map(new, value)
155
if delete_count > _INTERESTING_DELETES_LIMIT:
156
trace.mutter("checking remap as %d deletions", delete_count)
157
154
self._check_remap()
158
155
return self._save()
160
157
def _ensure_root(self):
161
158
"""Ensure that the root node is an object not a key."""
162
if type(self._root_node) is StaticTuple:
159
if isinstance(self._root_node, StaticTuple):
163
160
# Demand-load the root
164
161
self._root_node = self._get_node(self._root_node)
173
170
:param node: A tuple key or node object.
174
171
:return: A node object.
176
if type(node) is StaticTuple:
173
if isinstance(node, StaticTuple):
177
174
bytes = self._read_bytes(node)
178
175
return _deserialise(bytes, node,
179
176
search_key_func=self._search_key_func)
194
191
self._ensure_root()
195
192
res = self._dump_tree_node(self._root_node, prefix='', indent='',
196
193
include_keys=include_keys)
197
res.append('') # Give a trailing '\n'
198
return '\n'.join(res)
194
res.append(b'') # Give a trailing '\n'
195
return b'\n'.join(res)
200
197
def _dump_tree_node(self, node, prefix, indent, include_keys=True):
201
198
"""For this node and all children, generate a string representation."""
206
203
node_key = node.key()
207
204
if node_key is not None:
208
key_str = ' %s' % (node_key[0],)
205
key_str = b' %s' % (node_key[0],)
211
result.append('%s%r %s%s' % (indent, prefix, node.__class__.__name__,
213
if type(node) is InternalNode:
208
result.append(b'%s%r %s%s' % (indent, prefix, node.__class__.__name__,
210
if isinstance(node, InternalNode):
214
211
# Trigger all child nodes to get loaded
215
212
list(node._iter_nodes(self._store))
216
for prefix, sub in sorted(node._items.iteritems()):
213
for prefix, sub in sorted(viewitems(node._items)):
217
214
result.extend(self._dump_tree_node(sub, prefix, indent + ' ',
218
215
include_keys=include_keys))
220
for key, value in sorted(node._items.iteritems()):
217
for key, value in sorted(viewitems(node._items)):
221
218
# Don't use prefix nor indent here to line up when used in
222
219
# tests in conjunction with assertEqualDiff
223
result.append(' %r %r' % (tuple(key), value))
220
result.append(b' %r %r' % (tuple(key), value))
244
241
root_key = klass._create_directly(store, initial_value,
245
242
maximum_size=maximum_size, key_width=key_width,
246
243
search_key_func=search_key_func)
247
if type(root_key) is not StaticTuple:
244
if not isinstance(root_key, StaticTuple):
248
245
raise AssertionError('we got a %s instead of a StaticTuple'
249
246
% (type(root_key),))
256
253
result._root_node.set_maximum_size(maximum_size)
257
254
result._root_node._key_width = key_width
259
for key, value in initial_value.items():
256
for key, value in viewitems(initial_value):
260
257
delta.append((None, key, value))
261
258
root_key = result.apply_delta(delta)
268
265
node.set_maximum_size(maximum_size)
269
266
node._key_width = key_width
270
267
as_st = StaticTuple.from_sequence
271
node._items = dict([(as_st(key), val) for key, val
272
in initial_value.iteritems()])
273
node._raw_size = sum([node._key_value_len(key, value)
274
for key,value in node._items.iteritems()])
268
node._items = dict((as_st(key), val)
269
for key, val in viewitems(initial_value))
270
node._raw_size = sum(node._key_value_len(key, value)
271
for key, value in viewitems(node._items))
275
272
node._len = len(node._items)
276
273
node._compute_search_prefix()
277
274
node._compute_serialised_prefix()
332
329
def process_node(node, path, a_map, pending):
333
330
# take a node and expand it
334
331
node = a_map._get_node(node)
335
if type(node) == LeafNode:
332
if isinstance(node, LeafNode):
336
333
path = (node._key, path)
337
for key, value in node._items.items():
334
for key, value in viewitems(node._items):
338
335
# For a LeafNode, the key is a serialized_key, rather than
339
336
# a search_key, but the heap is using search_keys
340
337
search_key = node._search_key_func(key)
343
340
# type(node) == InternalNode
344
341
path = (node._key, path)
345
for prefix, child in node._items.items():
342
for prefix, child in viewitems(node._items):
346
343
heapq.heappush(pending, (prefix, None, child, path))
347
344
def process_common_internal_nodes(self_node, basis_node):
348
self_items = set(self_node._items.items())
349
basis_items = set(basis_node._items.items())
345
self_items = set(viewitems(self_node._items))
346
basis_items = set(viewitems(basis_node._items))
350
347
path = (self_node._key, None)
351
348
for prefix, child in self_items - basis_items:
352
349
heapq.heappush(self_pending, (prefix, None, child, path))
354
351
for prefix, child in basis_items - self_items:
355
352
heapq.heappush(basis_pending, (prefix, None, child, path))
356
353
def process_common_leaf_nodes(self_node, basis_node):
357
self_items = set(self_node._items.items())
358
basis_items = set(basis_node._items.items())
354
self_items = set(viewitems(self_node._items))
355
basis_items = set(viewitems(basis_node._items))
359
356
path = (self_node._key, None)
360
357
for key, value in self_items - basis_items:
361
358
prefix = self._search_key_func(key)
371
368
self_node = self._get_node(self_node)
372
369
basis_node = basis._get_node(basis_node)
373
if (type(self_node) == InternalNode
374
and type(basis_node) == InternalNode):
370
if (isinstance(self_node, InternalNode)
371
and isinstance(basis_node, InternalNode)):
375
372
# Matching internal nodes
376
373
process_common_internal_nodes(self_node, basis_node)
377
elif (type(self_node) == LeafNode
378
and type(basis_node) == LeafNode):
374
elif (isinstance(self_node, LeafNode)
375
and isinstance(basis_node, LeafNode)):
379
376
process_common_leaf_nodes(self_node, basis_node)
381
378
process_node(self_node, self_path, self, self_pending)
390
387
# A better implementation would probably have a reverse map
391
388
# back to the children of a node, and jump straight to it when
392
389
# a common node is detected, the proceed to remove the already
393
# pending children. bzrlib.graph has a searcher module with a
390
# pending children. breezy.graph has a searcher module with a
394
391
# similar problem.
395
392
while key_path is not None:
396
393
key, key_path = key_path
552
549
def _node_key(self, node):
553
550
"""Get the key for a node whether it's a tuple or node."""
554
if type(node) is tuple:
551
if isinstance(node, tuple):
555
552
node = StaticTuple.from_sequence(node)
556
if type(node) is StaticTuple:
553
if isinstance(node, StaticTuple):
562
559
"""remove key from the map."""
563
560
key = StaticTuple.from_sequence(key)
564
561
self._ensure_root()
565
if type(self._root_node) is InternalNode:
562
if isinstance(self._root_node, InternalNode):
566
563
unmapped = self._root_node.unmap(self._store, key,
567
564
check_remap=check_remap)
572
569
def _check_remap(self):
573
570
"""Check if nodes can be collapsed."""
574
571
self._ensure_root()
575
if type(self._root_node) is InternalNode:
576
self._root_node._check_remap(self._store)
572
if isinstance(self._root_node, InternalNode):
573
self._root_node = self._root_node._check_remap(self._store)
579
576
"""Save the map completely.
581
578
:return: The key of the root node.
583
if type(self._root_node) is StaticTuple:
580
if isinstance(self._root_node, StaticTuple):
585
582
return self._root_node
586
583
keys = list(self._root_node.serialise(self._store))
769
766
# Short items, we need to match based on a prefix
770
length_filter = filters.setdefault(len(key), set())
771
length_filter.add(key)
767
filters.setdefault(len(key), set()).add(key)
773
filters = filters.items()
774
for item in self._items.iteritems():
775
for length, length_filter in filters:
769
filters_itemview = viewitems(filters)
770
for item in viewitems(self._items):
771
for length, length_filter in filters_itemview:
776
772
if item[0][:length] in length_filter:
780
for item in self._items.iteritems():
776
for item in viewitems(self._items):
783
779
def _key_value_len(self, key, value):
784
780
# TODO: Should probably be done without actually joining the key, but
785
781
# then that can be done via the C extension
786
782
return (len(self._serialise_key(key)) + 1
787
+ len(str(value.count('\n'))) + 1
783
+ len(str(value.count(b'\n'))) + 1
788
784
+ len(value) + 1)
790
786
def _search_key(self, key):
839
835
common_prefix = self._search_prefix
840
836
split_at = len(common_prefix) + 1
842
for key, value in self._items.iteritems():
838
for key, value in viewitems(self._items):
843
839
search_key = self._search_key(key)
844
840
prefix = search_key[:split_at]
845
841
# TODO: Generally only 1 key can be exactly the right length,
851
847
# may get a '\00' node anywhere, but won't have keys of
852
848
# different lengths.
853
849
if len(prefix) < split_at:
854
prefix += '\x00'*(split_at - len(prefix))
850
prefix += b'\x00'*(split_at - len(prefix))
855
851
if prefix not in result:
856
852
node = LeafNode(search_key_func=self._search_key_func)
857
853
node.set_maximum_size(self._maximum_size)
872
868
for split, node in node_details:
873
869
new_node.add_node(split, node)
874
870
result[prefix] = new_node
875
return common_prefix, result.items()
871
return common_prefix, list(viewitems(result))
877
873
def map(self, store, key, value):
878
874
"""Map key to value."""
887
883
raise AssertionError('%r must be known' % self._search_prefix)
888
884
return self._search_prefix, [("", self)]
890
_serialise_key = '\x00'.join
886
_serialise_key = b'\x00'.join
892
888
def serialise(self, store):
893
889
"""Serialise the LeafNode to store.
895
891
:param store: A VersionedFiles honouring the CHK extensions.
896
892
:return: An iterable of the keys inserted by this operation.
898
lines = ["chkleaf:\n"]
899
lines.append("%d\n" % self._maximum_size)
900
lines.append("%d\n" % self._key_width)
901
lines.append("%d\n" % self._len)
894
lines = [b"chkleaf:\n"]
895
lines.append(b"%d\n" % self._maximum_size)
896
lines.append(b"%d\n" % self._key_width)
897
lines.append(b"%d\n" % self._len)
902
898
if self._common_serialised_prefix is None:
904
900
if len(self._items) != 0:
905
901
raise AssertionError('If _common_serialised_prefix is None'
906
902
' we should have no items')
908
lines.append('%s\n' % (self._common_serialised_prefix,))
904
lines.append(b'%s\n' % (self._common_serialised_prefix,))
909
905
prefix_len = len(self._common_serialised_prefix)
910
for key, value in sorted(self._items.items()):
906
for key, value in sorted(viewitems(self._items)):
911
907
# Always add a final newline
912
value_lines = osutils.chunks_to_lines([value + '\n'])
913
serialized = "%s\x00%s\n" % (self._serialise_key(key),
908
value_lines = osutils.chunks_to_lines([value + b'\n'])
909
serialized = b"%s\x00%d\n" % (self._serialise_key(key),
914
910
len(value_lines))
915
911
if not serialized.startswith(self._common_serialised_prefix):
916
912
raise AssertionError('We thought the common prefix was %r'
919
915
lines.append(serialized[prefix_len:])
920
916
lines.extend(value_lines)
921
917
sha1, _, _ = store.add_lines((None,), (), lines)
922
self._key = StaticTuple("sha1:" + sha1,).intern()
923
bytes = ''.join(lines)
924
if len(bytes) != self._current_size():
918
self._key = StaticTuple(b"sha1:" + sha1,).intern()
919
data = b''.join(lines)
920
if len(data) != self._current_size():
925
921
raise AssertionError('Invalid _current_size')
926
_get_cache().add(self._key, bytes)
922
_get_cache()[self._key] = data
927
923
return [self._key]
1016
1012
raise AssertionError("_search_prefix should not be None")
1017
1013
if not prefix.startswith(self._search_prefix):
1018
1014
raise AssertionError("prefixes mismatch: %s must start with %s"
1019
% (prefix,self._search_prefix))
1015
% (prefix, self._search_prefix))
1020
1016
if len(prefix) != len(self._search_prefix) + 1:
1021
1017
raise AssertionError("prefix wrong length: len(%s) is not %d" %
1022
1018
(prefix, len(self._search_prefix) + 1))
1072
1068
# yielding all nodes, yield whatever we have, and queue up a read
1073
1069
# for whatever we are missing
1074
1070
shortcut = True
1075
for prefix, node in self._items.iteritems():
1071
for prefix, node in viewitems(self._items):
1076
1072
if node.__class__ is StaticTuple:
1077
1073
keys[node] = (prefix, None)
1149
1145
# The slow way. We walk every item in self._items, and check to
1150
1146
# see if there are any matches
1151
length_filters = length_filters.items()
1152
for prefix, node in self._items.iteritems():
1147
length_filters_itemview = viewitems(length_filters)
1148
for prefix, node in viewitems(self._items):
1153
1149
node_key_filter = []
1154
for length, length_filter in length_filters:
1150
for length, length_filter in length_filters_itemview:
1155
1151
sub_prefix = prefix[:length]
1156
1152
if sub_prefix in length_filter:
1157
1153
node_key_filter.extend(prefix_to_keys[sub_prefix])
1196
1192
prefix, node_key_filter = keys[record.key]
1197
1193
node_and_filters.append((node, node_key_filter))
1198
1194
self._items[prefix] = node
1199
_get_cache().add(record.key, bytes)
1195
_get_cache()[record.key] = bytes
1200
1196
for info in node_and_filters:
1241
1237
self._items[search_key] = child
1242
1238
self._key = None
1243
1239
new_node = self
1244
if type(child) is LeafNode:
1240
if isinstance(child, LeafNode):
1245
1241
if old_size is None:
1246
1242
# The old node was an InternalNode which means it has now
1247
1243
# collapsed, so we need to check if it will chain to a
1293
1289
:param store: A VersionedFiles honouring the CHK extensions.
1294
1290
:return: An iterable of the keys inserted by this operation.
1296
for node in self._items.itervalues():
1297
if type(node) is StaticTuple:
1292
for node in viewvalues(self._items):
1293
if isinstance(node, StaticTuple):
1298
1294
# Never deserialised.
1300
1296
if node._key is not None:
1303
1299
for key in node.serialise(store):
1305
lines = ["chknode:\n"]
1306
lines.append("%d\n" % self._maximum_size)
1307
lines.append("%d\n" % self._key_width)
1308
lines.append("%d\n" % self._len)
1301
lines = [b"chknode:\n"]
1302
lines.append(b"%d\n" % self._maximum_size)
1303
lines.append(b"%d\n" % self._key_width)
1304
lines.append(b"%d\n" % self._len)
1309
1305
if self._search_prefix is None:
1310
1306
raise AssertionError("_search_prefix should not be None")
1311
lines.append('%s\n' % (self._search_prefix,))
1307
lines.append(b'%s\n' % (self._search_prefix,))
1312
1308
prefix_len = len(self._search_prefix)
1313
for prefix, node in sorted(self._items.items()):
1314
if type(node) is StaticTuple:
1309
for prefix, node in sorted(viewitems(self._items)):
1310
if isinstance(node, StaticTuple):
1317
1313
key = node._key[0]
1318
serialised = "%s\x00%s\n" % (prefix, key)
1314
serialised = b"%s\x00%s\n" % (prefix, key)
1319
1315
if not serialised.startswith(self._search_prefix):
1320
1316
raise AssertionError("prefixes mismatch: %s must start with %s"
1321
1317
% (serialised, self._search_prefix))
1322
1318
lines.append(serialised[prefix_len:])
1323
1319
sha1, _, _ = store.add_lines((None,), (), lines)
1324
self._key = StaticTuple("sha1:" + sha1,).intern()
1325
_get_cache().add(self._key, ''.join(lines))
1320
self._key = StaticTuple(b"sha1:" + sha1,).intern()
1321
_get_cache()[self._key] = b''.join(lines)
1326
1322
yield self._key
1328
1324
def _search_key(self, key):
1329
1325
"""Return the serialised key for key in this node."""
1330
1326
# search keys are fixed width. All will be self._node_width wide, so we
1331
1327
# pad as necessary.
1332
return (self._search_key_func(key) + '\x00'*self._node_width)[:self._node_width]
1328
return (self._search_key_func(key) + b'\x00'*self._node_width)[:self._node_width]
1334
1330
def _search_prefix_filter(self, key):
1335
1331
"""Serialise key for use as a prefix filter in iteritems."""
1343
1339
prefix for reaching node.
1345
1341
if offset >= self._node_width:
1346
for node in self._items.values():
1342
for node in valueview(self._items):
1347
1343
for result in node._split(offset):
1350
for key, node in self._items.items():
1353
1346
def refs(self):
1354
1347
"""Return the references to other CHK's held by this node."""
1355
1348
if self._key is None:
1356
1349
raise AssertionError("unserialised nodes have no refs.")
1358
for value in self._items.itervalues():
1359
if type(value) is StaticTuple:
1351
for value in viewvalues(self._items):
1352
if isinstance(value, StaticTuple):
1360
1353
refs.append(value)
1362
1355
refs.append(value.key())
1372
1365
return self._search_prefix
1374
1367
def unmap(self, store, key, check_remap=True):
1375
"""Remove key from this node and it's children."""
1368
"""Remove key from this node and its children."""
1376
1369
if not len(self._items):
1377
1370
raise AssertionError("can't unmap in an empty InternalNode.")
1378
1371
children = [node for node, _
1394
1387
self._items[search_key] = unmapped
1395
1388
if len(self._items) == 1:
1396
1389
# this node is no longer needed:
1397
return self._items.values()[0]
1398
if type(unmapped) is InternalNode:
1390
return list(viewvalues(self._items))[0]
1391
if isinstance(unmapped, InternalNode):
1400
1393
if check_remap:
1401
1394
return self._check_remap(store)
1441
1434
# c) With 255-way fan out, we don't want to read all 255 and destroy
1442
1435
# the page cache, just to determine that we really don't need it.
1443
1436
for node, _ in self._iter_nodes(store, batch_size=16):
1444
if type(node) is InternalNode:
1437
if isinstance(node, InternalNode):
1445
1438
# Without looking at any leaf nodes, we are sure
1447
for key, value in node._items.iteritems():
1440
for key, value in viewitems(node._items):
1448
1441
if new_leaf._map_no_split(key, value):
1450
1443
trace.mutter("remap generated a new LeafNode")
1451
1444
return new_leaf
1454
def _deserialise(bytes, key, search_key_func):
1447
def _deserialise(data, key, search_key_func):
1455
1448
"""Helper for repositorydetails - convert bytes to a node."""
1456
if bytes.startswith("chkleaf:\n"):
1457
node = LeafNode.deserialise(bytes, key, search_key_func=search_key_func)
1458
elif bytes.startswith("chknode:\n"):
1459
node = InternalNode.deserialise(bytes, key,
1449
if data.startswith(b"chkleaf:\n"):
1450
node = LeafNode.deserialise(data, key, search_key_func=search_key_func)
1451
elif data.startswith(b"chknode:\n"):
1452
node = InternalNode.deserialise(data, key,
1460
1453
search_key_func=search_key_func)
1462
1455
raise AssertionError("Unknown node type.")
1526
1519
bytes = record.get_bytes_as('fulltext')
1527
1520
node = _deserialise(bytes, record.key,
1528
1521
search_key_func=self._search_key_func)
1529
if type(node) is InternalNode:
1522
if isinstance(node, InternalNode):
1530
1523
# Note we don't have to do node.refs() because we know that
1531
1524
# there are no children that have been pushed into this node
1532
1525
# Note: Using as_st() here seemed to save 1.2MB, which would
1533
1526
# indicate that we keep 100k prefix_refs around while
1534
1527
# processing. They *should* be shorter lived than that...
1535
1528
# It does cost us ~10s of processing time
1536
#prefix_refs = [as_st(item) for item in node._items.iteritems()]
1537
prefix_refs = node._items.items()
1529
prefix_refs = list(viewitems(node._items))
1540
1532
prefix_refs = []
1541
1533
# Note: We don't use a StaticTuple here. Profiling showed a
1542
1534
# minor memory improvement (0.8MB out of 335MB peak 0.2%)
1543
1535
# But a significant slowdown (15s / 145s, or 10%)
1544
items = node._items.items()
1536
items = list(viewitems(node._items))
1545
1537
yield record, node, prefix_refs, items
1547
1539
def _read_old_roots(self):
1571
1563
# handled the interesting ones
1572
1564
for prefix, ref in old_chks_to_enqueue:
1573
1565
not_interesting = True
1574
for i in xrange(len(prefix), 0, -1):
1566
for i in range(len(prefix), 0, -1):
1575
1567
if prefix[:i] in new_prefixes:
1576
1568
not_interesting = False
1631
1623
# 'ab', then we also need to include 'a'.) So expand the
1632
1624
# new_prefixes to include all shorter prefixes
1633
1625
for prefix in list(new_prefixes):
1634
new_prefixes.update([prefix[:i] for i in xrange(1, len(prefix))])
1626
new_prefixes.update([prefix[:i] for i in range(1, len(prefix))])
1635
1627
self._enqueue_old(new_prefixes, old_chks_to_enqueue)
1637
1629
def _flush_new_queue(self):
1688
1680
for record, _, prefix_refs, items in self._read_nodes_from_store(refs):
1689
1681
# TODO: Use StaticTuple here?
1690
1682
self._all_old_items.update(items)
1691
refs = [r for _,r in prefix_refs if r not in all_old_chks]
1683
refs = [r for _, r in prefix_refs if r not in all_old_chks]
1692
1684
self._old_queue.extend(refs)
1693
1685
all_old_chks.update(refs)
1729
from bzrlib._chk_map_pyx import (
1721
from ._chk_map_pyx import (
1730
1723
_search_key_16,
1731
1724
_search_key_255,
1732
1725
_deserialise_leaf_node,
1733
1726
_deserialise_internal_node,
1735
except ImportError, e:
1728
except ImportError as e:
1736
1729
osutils.failed_to_load_extension(e)
1737
from bzrlib._chk_map_py import (
1730
from ._chk_map_py import (
1738
1732
_search_key_16,
1739
1733
_search_key_255,
1740
1734
_deserialise_leaf_node,
1750
1744
This generally shouldn't be used in production code, but it can be helpful
1751
1745
to debug problems.
1753
if type(key) is not StaticTuple:
1747
if not isinstance(key, StaticTuple):
1754
1748
raise TypeError('key %r is not StaticTuple but %s' % (key, type(key)))
1755
1749
if len(key) != 1:
1756
1750
raise ValueError('key %r should have length 1, not %d' % (key, len(key),))
1757
if type(key[0]) is not str:
1751
if not isinstance(key[0], str):
1758
1752
raise TypeError('key %r should hold a str, not %r'
1759
1753
% (key, type(key[0])))
1760
1754
if not key[0].startswith('sha1:'):