136
138
has_deletes = False
137
139
# Check preconditions first.
138
140
as_st = StaticTuple.from_sequence
139
new_items = {as_st(key) for (old, key, value) in delta
140
if key is not None and old is None}
141
new_items = set([as_st(key) for (old, key, value) in delta
142
if key is not None and old is None])
141
143
existing_new = list(self.iteritems(key_filter=new_items))
143
145
raise errors.InconsistentDeltaDelta(delta,
170
172
:param node: A tuple key or node object.
171
173
:return: A node object.
173
if isinstance(node, StaticTuple):
175
if type(node) is StaticTuple:
174
176
bytes = self._read_bytes(node)
175
177
return _deserialise(bytes, node,
176
178
search_key_func=self._search_key_func)
191
193
self._ensure_root()
192
194
res = self._dump_tree_node(self._root_node, prefix='', indent='',
193
195
include_keys=include_keys)
194
res.append(b'') # Give a trailing '\n'
195
return b'\n'.join(res)
196
res.append('') # Give a trailing '\n'
197
return '\n'.join(res)
197
199
def _dump_tree_node(self, node, prefix, indent, include_keys=True):
198
200
"""For this node and all children, generate a string representation."""
203
205
node_key = node.key()
204
206
if node_key is not None:
205
key_str = b' %s' % (node_key[0],)
207
key_str = ' %s' % (node_key[0],)
208
result.append(b'%s%r %s%s' % (indent, prefix, node.__class__.__name__,
210
if isinstance(node, InternalNode):
210
result.append('%s%r %s%s' % (indent, prefix, node.__class__.__name__,
212
if type(node) is InternalNode:
211
213
# Trigger all child nodes to get loaded
212
214
list(node._iter_nodes(self._store))
213
for prefix, sub in sorted(viewitems(node._items)):
215
for prefix, sub in sorted(node._items.iteritems()):
214
216
result.extend(self._dump_tree_node(sub, prefix, indent + ' ',
215
217
include_keys=include_keys))
217
for key, value in sorted(viewitems(node._items)):
219
for key, value in sorted(node._items.iteritems()):
218
220
# Don't use prefix nor indent here to line up when used in
219
221
# tests in conjunction with assertEqualDiff
220
result.append(b' %r %r' % (tuple(key), value))
222
result.append(' %r %r' % (tuple(key), value))
241
243
root_key = klass._create_directly(store, initial_value,
242
244
maximum_size=maximum_size, key_width=key_width,
243
245
search_key_func=search_key_func)
244
if not isinstance(root_key, StaticTuple):
246
if type(root_key) is not StaticTuple:
245
247
raise AssertionError('we got a %s instead of a StaticTuple'
246
248
% (type(root_key),))
253
255
result._root_node.set_maximum_size(maximum_size)
254
256
result._root_node._key_width = key_width
256
for key, value in viewitems(initial_value):
258
for key, value in initial_value.items():
257
259
delta.append((None, key, value))
258
260
root_key = result.apply_delta(delta)
265
267
node.set_maximum_size(maximum_size)
266
268
node._key_width = key_width
267
269
as_st = StaticTuple.from_sequence
268
node._items = dict((as_st(key), val)
269
for key, val in viewitems(initial_value))
270
node._raw_size = sum(node._key_value_len(key, value)
271
for key, value in viewitems(node._items))
270
node._items = dict([(as_st(key), val) for key, val
271
in initial_value.iteritems()])
272
node._raw_size = sum([node._key_value_len(key, value)
273
for key,value in node._items.iteritems()])
272
274
node._len = len(node._items)
273
275
node._compute_search_prefix()
274
276
node._compute_serialised_prefix()
329
331
def process_node(node, path, a_map, pending):
330
332
# take a node and expand it
331
333
node = a_map._get_node(node)
332
if isinstance(node, LeafNode):
334
if type(node) == LeafNode:
333
335
path = (node._key, path)
334
for key, value in viewitems(node._items):
336
for key, value in node._items.items():
335
337
# For a LeafNode, the key is a serialized_key, rather than
336
338
# a search_key, but the heap is using search_keys
337
339
search_key = node._search_key_func(key)
340
342
# type(node) == InternalNode
341
343
path = (node._key, path)
342
for prefix, child in viewitems(node._items):
344
for prefix, child in node._items.items():
343
345
heapq.heappush(pending, (prefix, None, child, path))
344
346
def process_common_internal_nodes(self_node, basis_node):
345
self_items = set(viewitems(self_node._items))
346
basis_items = set(viewitems(basis_node._items))
347
self_items = set(self_node._items.items())
348
basis_items = set(basis_node._items.items())
347
349
path = (self_node._key, None)
348
350
for prefix, child in self_items - basis_items:
349
351
heapq.heappush(self_pending, (prefix, None, child, path))
351
353
for prefix, child in basis_items - self_items:
352
354
heapq.heappush(basis_pending, (prefix, None, child, path))
353
355
def process_common_leaf_nodes(self_node, basis_node):
354
self_items = set(viewitems(self_node._items))
355
basis_items = set(viewitems(basis_node._items))
356
self_items = set(self_node._items.items())
357
basis_items = set(basis_node._items.items())
356
358
path = (self_node._key, None)
357
359
for key, value in self_items - basis_items:
358
360
prefix = self._search_key_func(key)
368
370
self_node = self._get_node(self_node)
369
371
basis_node = basis._get_node(basis_node)
370
if (isinstance(self_node, InternalNode)
371
and isinstance(basis_node, InternalNode)):
372
if (type(self_node) == InternalNode
373
and type(basis_node) == InternalNode):
372
374
# Matching internal nodes
373
375
process_common_internal_nodes(self_node, basis_node)
374
elif (isinstance(self_node, LeafNode)
375
and isinstance(basis_node, LeafNode)):
376
elif (type(self_node) == LeafNode
377
and type(basis_node) == LeafNode):
376
378
process_common_leaf_nodes(self_node, basis_node)
378
380
process_node(self_node, self_path, self, self_pending)
387
389
# A better implementation would probably have a reverse map
388
390
# back to the children of a node, and jump straight to it when
389
391
# a common node is detected, the proceed to remove the already
390
# pending children. breezy.graph has a searcher module with a
392
# pending children. bzrlib.graph has a searcher module with a
391
393
# similar problem.
392
394
while key_path is not None:
393
395
key, key_path = key_path
549
551
def _node_key(self, node):
550
552
"""Get the key for a node whether it's a tuple or node."""
551
if isinstance(node, tuple):
553
if type(node) is tuple:
552
554
node = StaticTuple.from_sequence(node)
553
if isinstance(node, StaticTuple):
555
if type(node) is StaticTuple:
559
561
"""remove key from the map."""
560
562
key = StaticTuple.from_sequence(key)
561
563
self._ensure_root()
562
if isinstance(self._root_node, InternalNode):
564
if type(self._root_node) is InternalNode:
563
565
unmapped = self._root_node.unmap(self._store, key,
564
566
check_remap=check_remap)
766
768
# Short items, we need to match based on a prefix
767
filters.setdefault(len(key), set()).add(key)
769
length_filter = filters.setdefault(len(key), set())
770
length_filter.add(key)
769
filters_itemview = viewitems(filters)
770
for item in viewitems(self._items):
771
for length, length_filter in filters_itemview:
772
filters = filters.items()
773
for item in self._items.iteritems():
774
for length, length_filter in filters:
772
775
if item[0][:length] in length_filter:
776
for item in viewitems(self._items):
779
for item in self._items.iteritems():
779
782
def _key_value_len(self, key, value):
780
783
# TODO: Should probably be done without actually joining the key, but
781
784
# then that can be done via the C extension
782
785
return (len(self._serialise_key(key)) + 1
783
+ len(str(value.count(b'\n'))) + 1
786
+ len(str(value.count('\n'))) + 1
784
787
+ len(value) + 1)
786
789
def _search_key(self, key):
835
838
common_prefix = self._search_prefix
836
839
split_at = len(common_prefix) + 1
838
for key, value in viewitems(self._items):
841
for key, value in self._items.iteritems():
839
842
search_key = self._search_key(key)
840
843
prefix = search_key[:split_at]
841
844
# TODO: Generally only 1 key can be exactly the right length,
847
850
# may get a '\00' node anywhere, but won't have keys of
848
851
# different lengths.
849
852
if len(prefix) < split_at:
850
prefix += b'\x00'*(split_at - len(prefix))
853
prefix += '\x00'*(split_at - len(prefix))
851
854
if prefix not in result:
852
855
node = LeafNode(search_key_func=self._search_key_func)
853
856
node.set_maximum_size(self._maximum_size)
868
871
for split, node in node_details:
869
872
new_node.add_node(split, node)
870
873
result[prefix] = new_node
871
return common_prefix, list(viewitems(result))
874
return common_prefix, result.items()
873
876
def map(self, store, key, value):
874
877
"""Map key to value."""
883
886
raise AssertionError('%r must be known' % self._search_prefix)
884
887
return self._search_prefix, [("", self)]
886
_serialise_key = b'\x00'.join
889
_serialise_key = '\x00'.join
888
891
def serialise(self, store):
889
892
"""Serialise the LeafNode to store.
891
894
:param store: A VersionedFiles honouring the CHK extensions.
892
895
:return: An iterable of the keys inserted by this operation.
894
lines = [b"chkleaf:\n"]
895
lines.append(b"%d\n" % self._maximum_size)
896
lines.append(b"%d\n" % self._key_width)
897
lines.append(b"%d\n" % self._len)
897
lines = ["chkleaf:\n"]
898
lines.append("%d\n" % self._maximum_size)
899
lines.append("%d\n" % self._key_width)
900
lines.append("%d\n" % self._len)
898
901
if self._common_serialised_prefix is None:
900
903
if len(self._items) != 0:
901
904
raise AssertionError('If _common_serialised_prefix is None'
902
905
' we should have no items')
904
lines.append(b'%s\n' % (self._common_serialised_prefix,))
907
lines.append('%s\n' % (self._common_serialised_prefix,))
905
908
prefix_len = len(self._common_serialised_prefix)
906
for key, value in sorted(viewitems(self._items)):
909
for key, value in sorted(self._items.items()):
907
910
# Always add a final newline
908
value_lines = osutils.chunks_to_lines([value + b'\n'])
909
serialized = b"%s\x00%d\n" % (self._serialise_key(key),
911
value_lines = osutils.chunks_to_lines([value + '\n'])
912
serialized = "%s\x00%s\n" % (self._serialise_key(key),
910
913
len(value_lines))
911
914
if not serialized.startswith(self._common_serialised_prefix):
912
915
raise AssertionError('We thought the common prefix was %r'
915
918
lines.append(serialized[prefix_len:])
916
919
lines.extend(value_lines)
917
920
sha1, _, _ = store.add_lines((None,), (), lines)
918
self._key = StaticTuple(b"sha1:" + sha1,).intern()
919
data = b''.join(lines)
920
if len(data) != self._current_size():
921
self._key = StaticTuple("sha1:" + sha1,).intern()
922
bytes = ''.join(lines)
923
if len(bytes) != self._current_size():
921
924
raise AssertionError('Invalid _current_size')
922
_get_cache()[self._key] = data
925
_get_cache()[self._key] = bytes
923
926
return [self._key]
1012
1015
raise AssertionError("_search_prefix should not be None")
1013
1016
if not prefix.startswith(self._search_prefix):
1014
1017
raise AssertionError("prefixes mismatch: %s must start with %s"
1015
% (prefix, self._search_prefix))
1018
% (prefix,self._search_prefix))
1016
1019
if len(prefix) != len(self._search_prefix) + 1:
1017
1020
raise AssertionError("prefix wrong length: len(%s) is not %d" %
1018
1021
(prefix, len(self._search_prefix) + 1))
1068
1071
# yielding all nodes, yield whatever we have, and queue up a read
1069
1072
# for whatever we are missing
1070
1073
shortcut = True
1071
for prefix, node in viewitems(self._items):
1074
for prefix, node in self._items.iteritems():
1072
1075
if node.__class__ is StaticTuple:
1073
1076
keys[node] = (prefix, None)
1145
1148
# The slow way. We walk every item in self._items, and check to
1146
1149
# see if there are any matches
1147
length_filters_itemview = viewitems(length_filters)
1148
for prefix, node in viewitems(self._items):
1150
length_filters = length_filters.items()
1151
for prefix, node in self._items.iteritems():
1149
1152
node_key_filter = []
1150
for length, length_filter in length_filters_itemview:
1153
for length, length_filter in length_filters:
1151
1154
sub_prefix = prefix[:length]
1152
1155
if sub_prefix in length_filter:
1153
1156
node_key_filter.extend(prefix_to_keys[sub_prefix])
1237
1240
self._items[search_key] = child
1238
1241
self._key = None
1239
1242
new_node = self
1240
if isinstance(child, LeafNode):
1243
if type(child) is LeafNode:
1241
1244
if old_size is None:
1242
1245
# The old node was an InternalNode which means it has now
1243
1246
# collapsed, so we need to check if it will chain to a
1289
1292
:param store: A VersionedFiles honouring the CHK extensions.
1290
1293
:return: An iterable of the keys inserted by this operation.
1292
for node in viewvalues(self._items):
1293
if isinstance(node, StaticTuple):
1295
for node in self._items.itervalues():
1296
if type(node) is StaticTuple:
1294
1297
# Never deserialised.
1296
1299
if node._key is not None:
1299
1302
for key in node.serialise(store):
1301
lines = [b"chknode:\n"]
1302
lines.append(b"%d\n" % self._maximum_size)
1303
lines.append(b"%d\n" % self._key_width)
1304
lines.append(b"%d\n" % self._len)
1304
lines = ["chknode:\n"]
1305
lines.append("%d\n" % self._maximum_size)
1306
lines.append("%d\n" % self._key_width)
1307
lines.append("%d\n" % self._len)
1305
1308
if self._search_prefix is None:
1306
1309
raise AssertionError("_search_prefix should not be None")
1307
lines.append(b'%s\n' % (self._search_prefix,))
1310
lines.append('%s\n' % (self._search_prefix,))
1308
1311
prefix_len = len(self._search_prefix)
1309
for prefix, node in sorted(viewitems(self._items)):
1310
if isinstance(node, StaticTuple):
1312
for prefix, node in sorted(self._items.items()):
1313
if type(node) is StaticTuple:
1313
1316
key = node._key[0]
1314
serialised = b"%s\x00%s\n" % (prefix, key)
1317
serialised = "%s\x00%s\n" % (prefix, key)
1315
1318
if not serialised.startswith(self._search_prefix):
1316
1319
raise AssertionError("prefixes mismatch: %s must start with %s"
1317
1320
% (serialised, self._search_prefix))
1318
1321
lines.append(serialised[prefix_len:])
1319
1322
sha1, _, _ = store.add_lines((None,), (), lines)
1320
self._key = StaticTuple(b"sha1:" + sha1,).intern()
1321
_get_cache()[self._key] = b''.join(lines)
1323
self._key = StaticTuple("sha1:" + sha1,).intern()
1324
_get_cache()[self._key] = ''.join(lines)
1322
1325
yield self._key
1324
1327
def _search_key(self, key):
1325
1328
"""Return the serialised key for key in this node."""
1326
1329
# search keys are fixed width. All will be self._node_width wide, so we
1327
1330
# pad as necessary.
1328
return (self._search_key_func(key) + b'\x00'*self._node_width)[:self._node_width]
1331
return (self._search_key_func(key) + '\x00'*self._node_width)[:self._node_width]
1330
1333
def _search_prefix_filter(self, key):
1331
1334
"""Serialise key for use as a prefix filter in iteritems."""
1339
1342
prefix for reaching node.
1341
1344
if offset >= self._node_width:
1342
for node in valueview(self._items):
1345
for node in self._items.values():
1343
1346
for result in node._split(offset):
1349
for key, node in self._items.items():
1346
1352
def refs(self):
1347
1353
"""Return the references to other CHK's held by this node."""
1348
1354
if self._key is None:
1349
1355
raise AssertionError("unserialised nodes have no refs.")
1351
for value in viewvalues(self._items):
1352
if isinstance(value, StaticTuple):
1357
for value in self._items.itervalues():
1358
if type(value) is StaticTuple:
1353
1359
refs.append(value)
1355
1361
refs.append(value.key())
1387
1393
self._items[search_key] = unmapped
1388
1394
if len(self._items) == 1:
1389
1395
# this node is no longer needed:
1390
return list(viewvalues(self._items))[0]
1391
if isinstance(unmapped, InternalNode):
1396
return self._items.values()[0]
1397
if type(unmapped) is InternalNode:
1393
1399
if check_remap:
1394
1400
return self._check_remap(store)
1434
1440
# c) With 255-way fan out, we don't want to read all 255 and destroy
1435
1441
# the page cache, just to determine that we really don't need it.
1436
1442
for node, _ in self._iter_nodes(store, batch_size=16):
1437
if isinstance(node, InternalNode):
1443
if type(node) is InternalNode:
1438
1444
# Without looking at any leaf nodes, we are sure
1440
for key, value in viewitems(node._items):
1446
for key, value in node._items.iteritems():
1441
1447
if new_leaf._map_no_split(key, value):
1443
1449
trace.mutter("remap generated a new LeafNode")
1444
1450
return new_leaf
1447
def _deserialise(data, key, search_key_func):
1453
def _deserialise(bytes, key, search_key_func):
1448
1454
"""Helper for repositorydetails - convert bytes to a node."""
1449
if data.startswith(b"chkleaf:\n"):
1450
node = LeafNode.deserialise(data, key, search_key_func=search_key_func)
1451
elif data.startswith(b"chknode:\n"):
1452
node = InternalNode.deserialise(data, key,
1455
if bytes.startswith("chkleaf:\n"):
1456
node = LeafNode.deserialise(bytes, key, search_key_func=search_key_func)
1457
elif bytes.startswith("chknode:\n"):
1458
node = InternalNode.deserialise(bytes, key,
1453
1459
search_key_func=search_key_func)
1455
1461
raise AssertionError("Unknown node type.")
1519
1525
bytes = record.get_bytes_as('fulltext')
1520
1526
node = _deserialise(bytes, record.key,
1521
1527
search_key_func=self._search_key_func)
1522
if isinstance(node, InternalNode):
1528
if type(node) is InternalNode:
1523
1529
# Note we don't have to do node.refs() because we know that
1524
1530
# there are no children that have been pushed into this node
1525
1531
# Note: Using as_st() here seemed to save 1.2MB, which would
1526
1532
# indicate that we keep 100k prefix_refs around while
1527
1533
# processing. They *should* be shorter lived than that...
1528
1534
# It does cost us ~10s of processing time
1529
prefix_refs = list(viewitems(node._items))
1535
#prefix_refs = [as_st(item) for item in node._items.iteritems()]
1536
prefix_refs = node._items.items()
1532
1539
prefix_refs = []
1533
1540
# Note: We don't use a StaticTuple here. Profiling showed a
1534
1541
# minor memory improvement (0.8MB out of 335MB peak 0.2%)
1535
1542
# But a significant slowdown (15s / 145s, or 10%)
1536
items = list(viewitems(node._items))
1543
items = node._items.items()
1537
1544
yield record, node, prefix_refs, items
1539
1546
def _read_old_roots(self):
1563
1570
# handled the interesting ones
1564
1571
for prefix, ref in old_chks_to_enqueue:
1565
1572
not_interesting = True
1566
for i in range(len(prefix), 0, -1):
1573
for i in xrange(len(prefix), 0, -1):
1567
1574
if prefix[:i] in new_prefixes:
1568
1575
not_interesting = False
1623
1630
# 'ab', then we also need to include 'a'.) So expand the
1624
1631
# new_prefixes to include all shorter prefixes
1625
1632
for prefix in list(new_prefixes):
1626
new_prefixes.update([prefix[:i] for i in range(1, len(prefix))])
1633
new_prefixes.update([prefix[:i] for i in xrange(1, len(prefix))])
1627
1634
self._enqueue_old(new_prefixes, old_chks_to_enqueue)
1629
1636
def _flush_new_queue(self):
1680
1687
for record, _, prefix_refs, items in self._read_nodes_from_store(refs):
1681
1688
# TODO: Use StaticTuple here?
1682
1689
self._all_old_items.update(items)
1683
refs = [r for _, r in prefix_refs if r not in all_old_chks]
1690
refs = [r for _,r in prefix_refs if r not in all_old_chks]
1684
1691
self._old_queue.extend(refs)
1685
1692
all_old_chks.update(refs)
1721
from ._chk_map_pyx import (
1728
from bzrlib._chk_map_pyx import (
1722
1729
_bytes_to_text_key,
1723
1730
_search_key_16,
1724
1731
_search_key_255,
1725
1732
_deserialise_leaf_node,
1726
1733
_deserialise_internal_node,
1728
except ImportError as e:
1735
except ImportError, e:
1729
1736
osutils.failed_to_load_extension(e)
1730
from ._chk_map_py import (
1737
from bzrlib._chk_map_py import (
1731
1738
_bytes_to_text_key,
1732
1739
_search_key_16,
1733
1740
_search_key_255,
1744
1751
This generally shouldn't be used in production code, but it can be helpful
1745
1752
to debug problems.
1747
if not isinstance(key, StaticTuple):
1754
if type(key) is not StaticTuple:
1748
1755
raise TypeError('key %r is not StaticTuple but %s' % (key, type(key)))
1749
1756
if len(key) != 1:
1750
1757
raise ValueError('key %r should have length 1, not %d' % (key, len(key),))
1751
if not isinstance(key[0], str):
1758
if type(key[0]) is not str:
1752
1759
raise TypeError('key %r should hold a str, not %r'
1753
1760
% (key, type(key[0])))
1754
1761
if not key[0].startswith('sha1:'):