431
431
efficient order for the index (keys iteration order in this case).
434
local_keys = keys.intersection(self._keys)
434
435
if self.reference_lists:
435
for key in keys.intersection(self._keys):
436
for key in local_keys:
436
437
node = self._nodes[key]
437
438
yield self, key, node[1], node[0]
439
for key in keys.intersection(self._keys):
440
for key in local_keys:
440
441
node = self._nodes[key]
441
442
yield self, key, node[1]
442
keys.difference_update(self._keys)
443
# Find things that are in backing indices that have not been handled
445
if not self._backing_indices:
446
return # We won't find anything there either
447
# Remove all of the keys that we found locally
448
keys.difference_update(local_keys)
443
449
for backing in self._backing_indices:
444
450
if backing is None:
879
885
"iter_all_entries scales with size of history.")
880
886
if not self.key_count():
888
if self._row_offsets[-1] == 1:
889
# There is only the root node, and we read that via key_count()
890
if self.node_ref_lists:
891
for key, (value, refs) in sorted(self._root_node.keys.items()):
892
yield (self, key, value, refs)
894
for key, (value, refs) in sorted(self._root_node.keys.items()):
895
yield (self, key, value)
882
897
start_of_leaves = self._row_offsets[-2]
883
898
end_of_leaves = self._row_offsets[-1]
884
needed_nodes = range(start_of_leaves, end_of_leaves)
899
needed_offsets = range(start_of_leaves, end_of_leaves)
900
if needed_offsets == [0]:
901
# Special case when we only have a root node, as we have already
903
nodes = [(0, self._root_node)]
905
nodes = self._read_nodes(needed_offsets)
885
906
# We iterate strictly in-order so that we can use this function
886
907
# for spilling index builds to disk.
887
908
if self.node_ref_lists:
888
for _, node in self._read_nodes(needed_nodes):
909
for _, node in nodes:
889
910
for key, (value, refs) in sorted(node.keys.items()):
890
911
yield (self, key, value, refs)
892
for _, node in self._read_nodes(needed_nodes):
913
for _, node in nodes:
893
914
for key, (value, refs) in sorted(node.keys.items()):
894
915
yield (self, key, value)
1239
1260
"""Read some nodes from disk into the LRU cache.
1241
1262
This performs a readv to get the node data into memory, and parses each
1242
node, the yields it to the caller. The nodes are requested in the
1263
node, then yields it to the caller. The nodes are requested in the
1243
1264
supplied order. If possible doing sort() on the list before requesting
1244
1265
a read may improve performance.
1246
1267
:param nodes: The nodes to read. 0 - first node, 1 - second node etc.
1270
# may be the byte string of the whole file
1272
# list of (offset, length) regions of the file that should, evenually
1273
# be read in to data_ranges, either from 'bytes' or from the transport
1250
1275
for index in nodes:
1251
1276
offset = index * _PAGE_SIZE
1256
1281
size = min(_PAGE_SIZE, self._size)
1258
stream = self._transport.get(self._name)
1259
start = stream.read(_PAGE_SIZE)
1260
# Avoid doing this again
1261
self._size = len(start)
1262
size = min(_PAGE_SIZE, self._size)
1283
# The only case where we don't know the size, is for very
1284
# small indexes. So we read the whole thing
1285
bytes = self._transport.get_bytes(self._name)
1286
self._size = len(bytes)
1287
# the whole thing should be parsed out of 'bytes'
1288
ranges.append((0, len(bytes)))
1264
1291
if offset > self._size:
1265
1292
raise AssertionError('tried to read past the end'
1269
1296
ranges.append((offset, size))
1272
if self._file is None:
1299
elif bytes is not None:
1300
# already have the whole file
1301
data_ranges = [(start, bytes[start:start+_PAGE_SIZE])
1302
for start in xrange(0, len(bytes), _PAGE_SIZE)]
1303
elif self._file is None:
1273
1304
data_ranges = self._transport.readv(self._name, ranges)
1275
1306
data_ranges = []