204
202
:return: The root chk of the resulting CHKMap.
206
result = CHKMap(store, None, search_key_func=search_key_func)
204
root_key = klass._create_directly(store, initial_value,
205
maximum_size=maximum_size, key_width=key_width,
206
search_key_func=search_key_func)
210
def _create_via_map(klass, store, initial_value, maximum_size=0,
211
key_width=1, search_key_func=None):
212
result = klass(store, None, search_key_func=search_key_func)
207
213
result._root_node.set_maximum_size(maximum_size)
208
214
result._root_node._key_width = key_width
210
216
for key, value in initial_value.items():
211
217
delta.append((None, key, value))
212
return result.apply_delta(delta)
218
root_key = result.apply_delta(delta)
222
def _create_directly(klass, store, initial_value, maximum_size=0,
223
key_width=1, search_key_func=None):
224
node = LeafNode(search_key_func=search_key_func)
225
node.set_maximum_size(maximum_size)
226
node._key_width = key_width
227
node._items = dict(initial_value)
228
node._raw_size = sum([node._key_value_len(key, value)
229
for key,value in initial_value.iteritems()])
230
node._len = len(node._items)
231
node._compute_search_prefix()
232
node._compute_serialised_prefix()
235
and node._current_size() > maximum_size):
236
prefix, node_details = node._split(store)
237
if len(node_details) == 1:
238
raise AssertionError('Failed to split using node._split')
239
node = InternalNode(prefix, search_key_func=search_key_func)
240
node.set_maximum_size(maximum_size)
241
node._key_width = key_width
242
for split, subnode in node_details:
243
node.add_node(split, subnode)
244
keys = list(node.serialise(store))
214
247
def iter_changes(self, basis):
215
248
"""Iterate over the changes between basis and self.
764
797
result[prefix] = node
766
799
node = result[prefix]
767
node.map(store, key, value)
800
sub_prefix, node_details = node.map(store, key, value)
801
if len(node_details) > 1:
802
if prefix != sub_prefix:
803
# This node has been split and is now found via a different
806
new_node = InternalNode(sub_prefix,
807
search_key_func=self._search_key_func)
808
new_node.set_maximum_size(self._maximum_size)
809
new_node._key_width = self._key_width
810
for split, node in node_details:
811
new_node.add_node(split, node)
812
result[prefix] = new_node
768
813
return common_prefix, result.items()
770
815
def map(self, store, key, value):
955
1000
# prefix is the key in self._items to use, key_filter is the key_filter
956
1001
# entries that would match this node
958
1004
if key_filter is None:
1005
# yielding all nodes, yield whatever we have, and queue up a read
1006
# for whatever we are missing
959
1008
for prefix, node in self._items.iteritems():
960
if type(node) == tuple:
1009
if node.__class__ is tuple:
961
1010
keys[node] = (prefix, None)
963
1012
yield node, None
1013
elif len(key_filter) == 1:
1014
# Technically, this path could also be handled by the first check
1015
# in 'self._node_width' in length_filters. However, we can handle
1016
# this case without spending any time building up the
1017
# prefix_to_keys, etc state.
1019
# This is a bit ugly, but TIMEIT showed it to be by far the fastest
1020
# 0.626us list(key_filter)[0]
1021
# is a func() for list(), 2 mallocs, and a getitem
1022
# 0.489us [k for k in key_filter][0]
1023
# still has the mallocs, avoids the func() call
1024
# 0.350us iter(key_filter).next()
1025
# has a func() call, and mallocs an iterator
1026
# 0.125us for key in key_filter: pass
1027
# no func() overhead, might malloc an iterator
1028
# 0.105us for key in key_filter: break
1029
# no func() overhead, might malloc an iterator, probably
1030
# avoids checking an 'else' clause as part of the for
1031
for key in key_filter:
1033
search_prefix = self._search_prefix_filter(key)
1034
if len(search_prefix) == self._node_width:
1035
# This item will match exactly, so just do a dict lookup, and
1036
# see what we can return
1039
node = self._items[search_prefix]
1041
# A given key can only match 1 child node, if it isn't
1042
# there, then we can just return nothing
1044
if node.__class__ is tuple:
1045
keys[node] = (search_prefix, [key])
1047
# This is loaded, and the only thing that can match,
1052
# First, convert all keys into a list of search prefixes
1053
# Aggregate common prefixes, and track the keys they come from
966
1054
prefix_to_keys = {}
967
1055
length_filters = {}
968
1056
for key in key_filter:
969
search_key = self._search_prefix_filter(key)
1057
search_prefix = self._search_prefix_filter(key)
970
1058
length_filter = length_filters.setdefault(
971
len(search_key), set())
972
length_filter.add(search_key)
973
prefix_to_keys.setdefault(search_key, []).append(key)
974
length_filters = length_filters.items()
975
for prefix, node in self._items.iteritems():
977
for length, length_filter in length_filters:
978
sub_prefix = prefix[:length]
979
if sub_prefix in length_filter:
980
node_key_filter.extend(prefix_to_keys[sub_prefix])
981
if node_key_filter: # this key matched something, yield it
982
if type(node) == tuple:
983
keys[node] = (prefix, node_key_filter)
1059
len(search_prefix), set())
1060
length_filter.add(search_prefix)
1061
prefix_to_keys.setdefault(search_prefix, []).append(key)
1063
if (self._node_width in length_filters
1064
and len(length_filters) == 1):
1065
# all of the search prefixes match exactly _node_width. This
1066
# means that everything is an exact match, and we can do a
1067
# lookup into self._items, rather than iterating over the items
1069
search_prefixes = length_filters[self._node_width]
1070
for search_prefix in search_prefixes:
1072
node = self._items[search_prefix]
1074
# We can ignore this one
1076
node_key_filter = prefix_to_keys[search_prefix]
1077
if node.__class__ is tuple:
1078
keys[node] = (search_prefix, node_key_filter)
985
1080
yield node, node_key_filter
1082
# The slow way. We walk every item in self._items, and check to
1083
# see if there are any matches
1084
length_filters = length_filters.items()
1085
for prefix, node in self._items.iteritems():
1086
node_key_filter = []
1087
for length, length_filter in length_filters:
1088
sub_prefix = prefix[:length]
1089
if sub_prefix in length_filter:
1090
node_key_filter.extend(prefix_to_keys[sub_prefix])
1091
if node_key_filter: # this key matched something, yield it
1092
if node.__class__ is tuple:
1093
keys[node] = (prefix, node_key_filter)
1095
yield node, node_key_filter
987
1097
# Look in the page cache for some more bytes
988
1098
found_keys = set()
1314
1425
interesting_to_yield.append(record.key)
1315
1426
if type(node) is InternalNode:
1427
if next_interesting_intersection is None:
1428
next_interesting_intersection = set(node.refs())
1430
next_interesting_intersection = \
1431
next_interesting_intersection.intersection(node.refs())
1316
1432
next_interesting.update(node.refs())
1318
1434
interesting_items.update(node.iteritems(None))
1319
1435
return (next_uninteresting, uninteresting_items,
1320
next_interesting, interesting_to_yield, interesting_items)
1436
next_interesting, interesting_to_yield, interesting_items,
1437
next_interesting_intersection)
1323
1440
def _find_all_uninteresting(store, interesting_root_keys,
1338
1455
# uninteresting set
1339
1456
(uninteresting_keys, uninteresting_items,
1340
1457
interesting_keys, interesting_to_yield,
1341
interesting_items) = _find_children_info(store, interesting_root_keys,
1458
interesting_items, interesting_intersection,
1459
) = _find_children_info(store, interesting_root_keys,
1342
1460
uninteresting_root_keys,
1344
1462
all_uninteresting_chks.update(uninteresting_keys)
1345
1463
all_uninteresting_items.update(uninteresting_items)
1346
1464
del uninteresting_items
1347
# Note: Exact matches between interesting and uninteresting do not need
1348
# to be search further. Non-exact matches need to be searched in case
1349
# there is a future exact-match
1350
uninteresting_keys.difference_update(interesting_keys)
1465
# Do not examine in detail pages common to all interesting trees.
1466
# Pages that are common to all interesting trees will have their
1467
# older versions found via the uninteresting tree traversal. Some pages
1468
# found via the interesting trees traversal will be uninteresting for
1469
# other of the interesting trees, which is why we require the pages to be
1470
# common for us to trim them.
1471
if interesting_intersection is not None:
1472
uninteresting_keys.difference_update(interesting_intersection)
1352
1474
# Second, find the full set of uninteresting bits reachable by the
1353
1475
# uninteresting roots