27
29
from bisect import bisect_right
28
from cStringIO import StringIO
32
from bzrlib.lazy_import import lazy_import
32
from ..lazy_import import lazy_import
33
33
lazy_import(globals(), """
34
from bzrlib import trace
35
from bzrlib.bisect_multi import bisect_multi_bytes
36
from bzrlib.revision import NULL_REVISION
37
from bzrlib.trace import mutter
36
revision as _mod_revision,
43
from bzrlib.static_tuple import StaticTuple
44
from ..sixish import (
51
from ..static_tuple import StaticTuple
45
53
_HEADER_READV = (0, 200)
46
_OPTION_KEY_ELEMENTS = "key_elements="
48
_OPTION_NODE_REFS = "node_ref_lists="
49
_SIGNATURE = "Bazaar Graph Index 1\n"
52
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
53
_newline_null_re = re.compile('[\n\0]')
54
_OPTION_KEY_ELEMENTS = b"key_elements="
56
_OPTION_NODE_REFS = b"node_ref_lists="
57
_SIGNATURE = b"Bazaar Graph Index 1\n"
60
class BadIndexFormatSignature(errors.BzrError):
62
_fmt = "%(value)s is not an index of type %(_type)s."
64
def __init__(self, value, _type):
65
errors.BzrError.__init__(self)
70
class BadIndexData(errors.BzrError):
72
_fmt = "Error in data for index %(value)s."
74
def __init__(self, value):
75
errors.BzrError.__init__(self)
79
class BadIndexDuplicateKey(errors.BzrError):
81
_fmt = "The key '%(key)s' is already in index '%(index)s'."
83
def __init__(self, key, index):
84
errors.BzrError.__init__(self)
89
class BadIndexKey(errors.BzrError):
91
_fmt = "The key '%(key)s' is not a valid key."
93
def __init__(self, key):
94
errors.BzrError.__init__(self)
98
class BadIndexOptions(errors.BzrError):
100
_fmt = "Could not parse options for index %(value)s."
102
def __init__(self, value):
103
errors.BzrError.__init__(self)
107
class BadIndexValue(errors.BzrError):
109
_fmt = "The value '%(value)s' is not a valid value."
111
def __init__(self, value):
112
errors.BzrError.__init__(self)
116
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r\x00 ]')
117
_newline_null_re = re.compile(b'[\n\0]')
56
120
def _has_key_from_parent_map(self, key):
69
133
class GraphIndexBuilder(object):
70
134
"""A builder that can build a GraphIndex.
72
The resulting graph has the structure:
136
The resulting graph has the structure::
74
_SIGNATURE OPTIONS NODES NEWLINE
75
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
76
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
78
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
79
KEY := Not-whitespace-utf8
81
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
82
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
83
REFERENCE := DIGITS ; digits is the byte offset in the index of the
85
VALUE := no-newline-no-null-bytes
138
_SIGNATURE OPTIONS NODES NEWLINE
139
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
140
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
142
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
143
KEY := Not-whitespace-utf8
145
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
146
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
147
REFERENCE := DIGITS ; digits is the byte offset in the index of the
149
VALUE := no-newline-no-null-bytes
88
152
def __init__(self, reference_lists=0, key_elements=1):
105
169
def _check_key(self, key):
106
170
"""Raise BadIndexKey if key is not a valid key for this index."""
107
171
if type(key) not in (tuple, StaticTuple):
108
raise errors.BadIndexKey(key)
172
raise BadIndexKey(key)
109
173
if self._key_length != len(key):
110
raise errors.BadIndexKey(key)
174
raise BadIndexKey(key)
111
175
for element in key:
112
if not element or _whitespace_re.search(element) is not None:
113
raise errors.BadIndexKey(element)
176
if not element or type(element) != bytes or _whitespace_re.search(element) is not None:
177
raise BadIndexKey(key)
115
179
def _external_references(self):
116
180
"""Return references that are not present in this index.
146
210
key_dict = key_dict.setdefault(subkey, {})
147
211
key_dict[key[-1]] = key, value, references
149
for key, (absent, references, value) in self._nodes.iteritems():
213
for key, (absent, references, value) in viewitems(self._nodes):
152
216
key_dict = nodes_by_key
184
248
:param value: The value associate with this key. Must not contain
185
249
newlines or null characters.
186
250
:return: (node_refs, absent_references)
187
node_refs basically a packed form of 'references' where all
189
absent_references reference keys that are not in self._nodes.
190
This may contain duplicates if the same key is
191
referenced in multiple lists.
252
* node_refs: basically a packed form of 'references' where all
254
* absent_references: reference keys that are not in self._nodes.
255
This may contain duplicates if the same key is referenced in
193
258
as_st = StaticTuple.from_sequence
194
259
self._check_key(key)
195
260
if _newline_null_re.search(value) is not None:
196
raise errors.BadIndexValue(value)
261
raise BadIndexValue(value)
197
262
if len(references) != self.reference_lists:
198
raise errors.BadIndexValue(references)
263
raise BadIndexValue(references)
200
265
absent_references = []
201
266
for reference_list in references:
219
284
:param references: An iterable of iterables of keys. Each is a
220
285
reference to another key.
221
286
:param value: The value to associate with the key. It may be any
222
bytes as long as it does not contain \0 or \n.
287
bytes as long as it does not contain \\0 or \\n.
225
290
absent_references) = self._check_key_ref_value(key, references, value)
226
if key in self._nodes and self._nodes[key][0] != 'a':
227
raise errors.BadIndexDuplicateKey(key, self)
291
if key in self._nodes and self._nodes[key][0] != b'a':
292
raise BadIndexDuplicateKey(key, self)
228
293
for reference in absent_references:
229
294
# There may be duplicates, but I don't think it is worth worrying
231
self._nodes[reference] = ('a', (), '')
296
self._nodes[reference] = (b'a', (), b'')
232
297
self._absent_keys.update(absent_references)
233
298
self._absent_keys.discard(key)
234
self._nodes[key] = ('', node_refs, value)
299
self._nodes[key] = (b'', node_refs, value)
235
300
if self._nodes_by_key is not None and self._key_length > 1:
236
301
self._update_nodes_by_key(key, value, node_refs)
245
310
def finish(self):
313
:returns: cBytesIO holding the full context of the index as it
314
should be written to disk.
246
316
lines = [_SIGNATURE]
247
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
248
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
317
lines.append(b'%s%d\n' % (_OPTION_NODE_REFS, self.reference_lists))
318
lines.append(b'%s%d\n' % (_OPTION_KEY_ELEMENTS, self._key_length))
249
319
key_count = len(self._nodes) - len(self._absent_keys)
250
lines.append(_OPTION_LEN + str(key_count) + '\n')
320
lines.append(b'%s%d\n' % (_OPTION_LEN, key_count))
251
321
prefix_length = sum(len(x) for x in lines)
252
322
# references are byte offsets. To avoid having to do nasty
253
323
# polynomial work to resolve offsets (references to later in the
264
334
# forward sorted by key. In future we may consider topological sorting,
265
335
# at the cost of table scans for direct lookup, or a second index for
267
nodes = sorted(self._nodes.items())
337
nodes = sorted(viewitems(self._nodes))
268
338
# if we do not prepass, we don't know how long it will be up front.
269
339
expected_bytes = None
270
340
# we only need to pre-pass if we have reference lists at all.
310
380
for key, non_ref_bytes, total_references in key_offset_info:
311
381
key_addresses[key] = non_ref_bytes + total_references*digits
313
format_string = '%%0%sd' % digits
383
format_string = b'%%0%dd' % digits
314
384
for key, (absent, references, value) in nodes:
315
385
flattened_references = []
316
386
for ref_list in references:
317
387
ref_addresses = []
318
388
for reference in ref_list:
319
389
ref_addresses.append(format_string % key_addresses[reference])
320
flattened_references.append('\r'.join(ref_addresses))
321
string_key = '\x00'.join(key)
322
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
323
'\t'.join(flattened_references), value))
325
result = StringIO(''.join(lines))
390
flattened_references.append(b'\r'.join(ref_addresses))
391
string_key = b'\x00'.join(key)
392
lines.append(b"%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
393
b'\t'.join(flattened_references), value))
395
result = BytesIO(b''.join(lines))
326
396
if expected_bytes and len(result.getvalue()) != expected_bytes:
327
397
raise errors.BzrError('Failed index creation. Internal error:'
328
398
' mismatched output length and expected length: %d %d' %
385
455
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
386
456
"""Open an index called name on transport.
388
:param transport: A bzrlib.transport.Transport.
458
:param transport: A breezy.transport.Transport.
389
459
:param name: A path to provide to transport API calls.
390
460
:param size: The size of the index in bytes. This is used for bisection
391
461
logic to perform partial index reads. While the size could be
431
501
def __ne__(self, other):
432
502
return not self.__eq__(other)
504
def __lt__(self, other):
505
# We don't really care about the order, just that there is an order.
506
if (not isinstance(other, GraphIndex) and
507
not isinstance(other, InMemoryGraphIndex)):
508
raise TypeError(other)
509
return hash(self) < hash(other)
512
return hash((type(self), self._transport, self._name, self._size))
434
514
def __repr__(self):
435
515
return "%s(%r)" % (self.__class__.__name__,
436
516
self._transport.abspath(self._name))
444
524
# We already did this
446
526
if 'index' in debug.debug_flags:
447
mutter('Reading entire index %s', self._transport.abspath(self._name))
527
trace.mutter('Reading entire index %s',
528
self._transport.abspath(self._name))
448
529
if stream is None:
449
530
stream = self._transport.get(self._name)
450
531
if self._base_offset != 0:
451
532
# This is wasteful, but it is better than dealing with
452
533
# adjusting all the offsets, etc.
453
stream = StringIO(stream.read()[self._base_offset:])
454
self._read_prefix(stream)
455
self._expected_elements = 3 + self._key_length
457
# raw data keyed by offset
458
self._keys_by_offset = {}
459
# ready-to-return key:value or key:value, node_ref_lists
461
self._nodes_by_key = None
464
lines = stream.read().split('\n')
534
stream = BytesIO(stream.read()[self._base_offset:])
536
self._read_prefix(stream)
537
self._expected_elements = 3 + self._key_length
539
# raw data keyed by offset
540
self._keys_by_offset = {}
541
# ready-to-return key:value or key:value, node_ref_lists
543
self._nodes_by_key = None
546
lines = stream.read().split(b'\n')
467
550
_, _, _, trailers = self._parse_lines(lines, pos)
468
for key, absent, references, value in self._keys_by_offset.itervalues():
551
for key, absent, references, value in viewvalues(self._keys_by_offset):
471
554
# resolve references:
496
579
% (ref_list_num, self.node_ref_lists))
498
581
nodes = self._nodes
499
for key, (value, ref_lists) in nodes.iteritems():
582
for key, (value, ref_lists) in viewitems(nodes):
500
583
ref_list = ref_lists[ref_list_num]
501
584
refs.update([ref for ref in ref_list if ref not in nodes])
505
588
if self._nodes_by_key is None:
506
589
nodes_by_key = {}
507
590
if self.node_ref_lists:
508
for key, (value, references) in self._nodes.iteritems():
591
for key, (value, references) in viewitems(self._nodes):
509
592
key_dict = nodes_by_key
510
593
for subkey in key[:-1]:
511
594
key_dict = key_dict.setdefault(subkey, {})
512
595
key_dict[key[-1]] = key, value, references
514
for key, value in self._nodes.iteritems():
597
for key, value in viewitems(self._nodes):
515
598
key_dict = nodes_by_key
516
599
for subkey in key[:-1]:
517
600
key_dict = key_dict.setdefault(subkey, {})
534
617
if self._nodes is None:
535
618
self._buffer_all()
536
619
if self.node_ref_lists:
537
for key, (value, node_ref_lists) in self._nodes.iteritems():
620
for key, (value, node_ref_lists) in viewitems(self._nodes):
538
621
yield self, key, value, node_ref_lists
540
for key, value in self._nodes.iteritems():
623
for key, value in viewitems(self._nodes):
541
624
yield self, key, value
543
626
def _read_prefix(self, stream):
544
627
signature = stream.read(len(self._signature()))
545
628
if not signature == self._signature():
546
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
629
raise BadIndexFormatSignature(self._name, GraphIndex)
547
630
options_line = stream.readline()
548
631
if not options_line.startswith(_OPTION_NODE_REFS):
549
raise errors.BadIndexOptions(self)
632
raise BadIndexOptions(self)
551
634
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
552
635
except ValueError:
553
raise errors.BadIndexOptions(self)
636
raise BadIndexOptions(self)
554
637
options_line = stream.readline()
555
638
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
556
raise errors.BadIndexOptions(self)
639
raise BadIndexOptions(self)
558
641
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
559
642
except ValueError:
560
raise errors.BadIndexOptions(self)
643
raise BadIndexOptions(self)
561
644
options_line = stream.readline()
562
645
if not options_line.startswith(_OPTION_LEN):
563
raise errors.BadIndexOptions(self)
646
raise BadIndexOptions(self)
565
648
self._key_count = int(options_line[len(_OPTION_LEN):-1])
566
649
except ValueError:
567
raise errors.BadIndexOptions(self)
650
raise BadIndexOptions(self)
569
652
def _resolve_references(self, references):
570
653
"""Return the resolved key references for references.
581
664
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
582
665
return tuple(node_refs)
584
def _find_index(self, range_map, key):
668
def _find_index(range_map, key):
585
669
"""Helper for the _parsed_*_index calls.
587
671
Given a range map - [(start, end), ...], finds the index of the range
670
754
if self._nodes is not None:
671
755
return self._iter_entries_from_total_buffer(keys)
673
return (result[1] for result in bisect_multi_bytes(
757
return (result[1] for result in bisect_multi.bisect_multi_bytes(
674
758
self._lookup_keys_via_location, self._size, keys))
676
760
def iter_entries_prefix(self, keys):
703
787
self._buffer_all()
704
788
if self._key_length == 1:
708
raise errors.BadIndexKey(key)
709
if len(key) != self._key_length:
710
raise errors.BadIndexKey(key)
790
_sanity_check_key(self, key)
711
791
if self.node_ref_lists:
712
792
value, node_refs = self._nodes[key]
713
793
yield self, key, value, node_refs
715
795
yield self, key, self._nodes[key]
717
797
nodes_by_key = self._get_nodes_by_key()
721
raise errors.BadIndexKey(key)
722
if len(key) != self._key_length:
723
raise errors.BadIndexKey(key)
724
# find what it refers to:
725
key_dict = nodes_by_key
727
# find the subdict whose contents should be returned.
729
while len(elements) and elements[0] is not None:
730
key_dict = key_dict[elements[0]]
733
# a non-existant lookup.
738
key_dict = dicts.pop(-1)
739
# can't be empty or would not exist
740
item, value = key_dict.iteritems().next()
741
if type(value) == dict:
743
dicts.extend(key_dict.itervalues())
746
for value in key_dict.itervalues():
747
# each value is the key:value:node refs tuple
749
yield (self, ) + value
751
# the last thing looked up was a terminal element
752
yield (self, ) + key_dict
798
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
754
801
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
755
802
"""See BTreeIndex._find_ancestors."""
929
976
signature = bytes[0:len(self._signature())]
930
977
if not signature == self._signature():
931
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
978
raise BadIndexFormatSignature(self._name, GraphIndex)
932
979
lines = bytes[len(self._signature()):].splitlines()
933
980
options_line = lines[0]
934
981
if not options_line.startswith(_OPTION_NODE_REFS):
935
raise errors.BadIndexOptions(self)
982
raise BadIndexOptions(self)
937
984
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
938
985
except ValueError:
939
raise errors.BadIndexOptions(self)
986
raise BadIndexOptions(self)
940
987
options_line = lines[1]
941
988
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
942
raise errors.BadIndexOptions(self)
989
raise BadIndexOptions(self)
944
991
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
945
992
except ValueError:
946
raise errors.BadIndexOptions(self)
993
raise BadIndexOptions(self)
947
994
options_line = lines[2]
948
995
if not options_line.startswith(_OPTION_LEN):
949
raise errors.BadIndexOptions(self)
996
raise BadIndexOptions(self)
951
998
self._key_count = int(options_line[len(_OPTION_LEN):])
952
999
except ValueError:
953
raise errors.BadIndexOptions(self)
1000
raise BadIndexOptions(self)
954
1001
# calculate the bytes we have processed
955
1002
header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
956
1003
len(lines[2]) + 3)
957
self._parsed_bytes(0, None, header_end, None)
1004
self._parsed_bytes(0, (), header_end, ())
958
1005
# setup parsing state
959
1006
self._expected_elements = 3 + self._key_length
960
1007
# raw data keyed by offset
1060
1107
if not start_adjacent:
1061
1108
# work around python bug in rfind
1062
1109
if trim_start is None:
1063
trim_start = data.find('\n') + 1
1110
trim_start = data.find(b'\n') + 1
1065
trim_start = data.find('\n', trim_start) + 1
1112
trim_start = data.find(b'\n', trim_start) + 1
1066
1113
if not (trim_start != 0):
1067
1114
raise AssertionError('no \n was present')
1068
1115
# print 'removing start', offset, trim_start, repr(data[:trim_start])
1069
1116
if not end_adjacent:
1070
1117
# work around python bug in rfind
1071
1118
if trim_end is None:
1072
trim_end = data.rfind('\n') + 1
1119
trim_end = data.rfind(b'\n') + 1
1074
trim_end = data.rfind('\n', None, trim_end) + 1
1121
trim_end = data.rfind(b'\n', None, trim_end) + 1
1075
1122
if not (trim_end != 0):
1076
1123
raise AssertionError('no \n was present')
1077
1124
# print 'removing end', offset, trim_end, repr(data[trim_end:])
1084
1131
offset += trim_start
1085
1132
# print "parsing", repr(trimmed_data)
1086
1133
# splitlines mangles the \r delimiters.. don't use it.
1087
lines = trimmed_data.split('\n')
1134
lines = trimmed_data.split(b'\n')
1090
1137
first_key, last_key, nodes, _ = self._parse_lines(lines, pos)
1102
1149
for line in lines:
1104
1151
# must be at the end
1106
1153
if not (self._size == pos + 1):
1107
1154
raise AssertionError("%s %s" % (self._size, pos))
1110
elements = line.split('\0')
1157
elements = line.split(b'\0')
1111
1158
if len(elements) != self._expected_elements:
1112
raise errors.BadIndexData(self)
1159
raise BadIndexData(self)
1113
1160
# keys are tuples. Each element is a string that may occur many
1114
1161
# times, so we intern them to save space. AB, RC, 200807
1115
key = tuple([intern(element) for element in elements[:self._key_length]])
1162
key = tuple([bytesintern(element) for element in elements[:self._key_length]])
1116
1163
if first_key is None:
1117
1164
first_key = key
1118
1165
absent, references, value = elements[-3:]
1120
for ref_string in references.split('\t'):
1167
for ref_string in references.split(b'\t'):
1121
1168
ref_lists.append(tuple([
1122
int(ref) for ref in ref_string.split('\r') if ref
1169
int(ref) for ref in ref_string.split(b'\r') if ref
1124
1171
ref_lists = tuple(ref_lists)
1125
1172
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
1217
1264
# We read the whole range, most likely because the
1218
1265
# Transport upcast our readv ranges into one long request
1219
1266
# for enough total data to grab the whole index.
1220
self._buffer_all(StringIO(data))
1267
self._buffer_all(BytesIO(data))
1222
1269
if self._bisect_nodes is None:
1223
1270
# this must be the start
1287
1334
def get_parent_map(self, keys):
1288
1335
"""See graph.StackedParentsProvider.get_parent_map"""
1289
1336
search_keys = set(keys)
1290
if NULL_REVISION in search_keys:
1291
search_keys.discard(NULL_REVISION)
1292
found_parents = {NULL_REVISION:[]}
1337
if _mod_revision.NULL_REVISION in search_keys:
1338
search_keys.discard(_mod_revision.NULL_REVISION)
1339
found_parents = {_mod_revision.NULL_REVISION:[]}
1294
1341
found_parents = {}
1295
1342
for index, key, value, refs in self.iter_entries(search_keys):
1296
1343
parents = refs[0]
1297
1344
if not parents:
1298
parents = (NULL_REVISION,)
1345
parents = (_mod_revision.NULL_REVISION,)
1299
1346
found_parents[key] = parents
1300
1347
return found_parents
1302
has_key = _has_key_from_parent_map
1349
__contains__ = _has_key_from_parent_map
1304
1351
def insert_index(self, pos, index, name=None):
1305
1352
"""Insert a new index in the list of indices to query.
1434
1484
indices_info = zip(self._index_names, self._indices)
1435
1485
if 'index' in debug.debug_flags:
1436
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1437
indices_info, hit_indices)
1486
indices_info = list(indices_info)
1487
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1488
'promoting %r', indices_info, hit_indices)
1439
1490
unhit_names = []
1440
1491
new_hit_indices = []
1457
1508
self._indices = new_hit_indices + unhit_indices
1458
1509
self._index_names = hit_names + unhit_names
1459
1510
if 'index' in debug.debug_flags:
1460
mutter('CombinedGraphIndex reordered: %r', self._indices)
1511
trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
1461
1512
return hit_names
1463
1514
def _move_to_front_by_name(self, hit_names):
1550
1601
return sum((index.key_count() for index in self._indices), 0)
1551
except errors.NoSuchFile:
1552
self._reload_or_raise()
1602
except errors.NoSuchFile as e:
1603
if not self._try_reload(e):
1554
1606
missing_keys = _missing_keys_from_parent_map
1556
def _reload_or_raise(self):
1608
def _try_reload(self, error):
1557
1609
"""We just got a NoSuchFile exception.
1559
1611
Try to reload the indices, if it fails, just raise the current
1562
1614
if self._reload_func is None:
1564
exc_type, exc_value, exc_traceback = sys.exc_info()
1565
trace.mutter('Trying to reload after getting exception: %s',
1616
trace.mutter('Trying to reload after getting exception: %s', str(error))
1567
1617
if not self._reload_func():
1568
1618
# We tried to reload, but nothing changed, so we fail anyway
1569
1619
trace.mutter('_reload_func indicated nothing has changed.'
1570
1620
' Raising original exception.')
1571
raise exc_type, exc_value, exc_traceback
1573
1624
def set_sibling_indices(self, sibling_combined_graph_indices):
1574
1625
"""Set the CombinedGraphIndex objects to reorder after reordering self.
1617
1669
trace.mutter_callsite(3,
1618
1670
"iter_all_entries scales with size of history.")
1619
1671
if self.reference_lists:
1620
for key, (absent, references, value) in self._nodes.iteritems():
1672
for key, (absent, references, value) in viewitems(self._nodes):
1622
1674
yield self, key, value, references
1624
for key, (absent, references, value) in self._nodes.iteritems():
1676
for key, (absent, references, value) in viewitems(self._nodes):
1626
1678
yield self, key, value
1665
1717
will be returned, and every match that is in the index will be
1668
# XXX: To much duplication with the GraphIndex class; consider finding
1669
# a good place to pull out the actual common logic.
1670
1720
keys = set(keys)
1673
1723
if self._key_length == 1:
1674
1724
for key in keys:
1677
raise errors.BadIndexKey(key)
1678
if len(key) != self._key_length:
1679
raise errors.BadIndexKey(key)
1725
_sanity_check_key(self, key)
1680
1726
node = self._nodes[key]
1686
1732
yield self, key, node[2]
1688
1734
nodes_by_key = self._get_nodes_by_key()
1692
raise errors.BadIndexKey(key)
1693
if len(key) != self._key_length:
1694
raise errors.BadIndexKey(key)
1695
# find what it refers to:
1696
key_dict = nodes_by_key
1697
elements = list(key)
1698
# find the subdict to return
1700
while len(elements) and elements[0] is not None:
1701
key_dict = key_dict[elements[0]]
1704
# a non-existant lookup.
1709
key_dict = dicts.pop(-1)
1710
# can't be empty or would not exist
1711
item, value = key_dict.iteritems().next()
1712
if type(value) == dict:
1714
dicts.extend(key_dict.itervalues())
1717
for value in key_dict.itervalues():
1718
yield (self, ) + value
1720
yield (self, ) + key_dict
1735
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
1722
1738
def key_count(self):
1723
1739
"""Return an estimate of the number of keys in this index.
1729
1745
def validate(self):
1730
1746
"""In memory index's have no known corruption at the moment."""
1748
def __lt__(self, other):
1749
# We don't really care about the order, just that there is an order.
1750
if (not isinstance(other, GraphIndex) and
1751
not isinstance(other, InMemoryGraphIndex)):
1752
raise TypeError(other)
1753
return hash(self) < hash(other)
1733
1756
class GraphIndexPrefixAdapter(object):
1734
1757
"""An adapter between GraphIndex with different key lengths.
1792
1815
for node in an_iter:
1794
1817
if node[1][:self.prefix_len] != self.prefix:
1795
raise errors.BadIndexData(self)
1818
raise BadIndexData(self)
1796
1819
for ref_list in node[3]:
1797
1820
for ref_node in ref_list:
1798
1821
if ref_node[:self.prefix_len] != self.prefix:
1799
raise errors.BadIndexData(self)
1822
raise BadIndexData(self)
1800
1823
yield node[0], node[1][self.prefix_len:], node[2], (
1801
1824
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
1802
1825
for ref_list in node[3]))
1855
1878
def validate(self):
1856
1879
"""Call the adapted's validate."""
1857
1880
self.adapted.validate()
1883
def _sanity_check_key(index_or_builder, key):
1884
"""Raise BadIndexKey if key cannot be used for prefix matching."""
1886
raise BadIndexKey(key)
1887
if len(key) != index_or_builder._key_length:
1888
raise BadIndexKey(key)
1891
def _iter_entries_prefix(index_or_builder, nodes_by_key, keys):
1892
"""Helper for implementing prefix matching iterators."""
1894
_sanity_check_key(index_or_builder, key)
1895
# find what it refers to:
1896
key_dict = nodes_by_key
1897
elements = list(key)
1898
# find the subdict whose contents should be returned.
1900
while len(elements) and elements[0] is not None:
1901
key_dict = key_dict[elements[0]]
1904
# a non-existant lookup.
1909
values_view = viewvalues(dicts.pop())
1910
# can't be empty or would not exist
1911
value = next(iter(values_view))
1912
if isinstance(value, dict):
1913
# still descending, push values
1914
dicts.extend(values_view)
1916
# at leaf tuples, yield values
1917
for value in values_view:
1918
# each value is the key:value:node refs tuple
1920
yield (index_or_builder, ) + value
1922
# the last thing looked up was a terminal element
1923
yield (index_or_builder, ) + key_dict