27
29
from bisect import bisect_right
28
from cStringIO import StringIO
32
from bzrlib.lazy_import import lazy_import
32
from ..lazy_import import lazy_import
33
33
lazy_import(globals(), """
34
from bzrlib import trace
35
from bzrlib.bisect_multi import bisect_multi_bytes
36
from bzrlib.revision import NULL_REVISION
37
from bzrlib.trace import mutter
36
revision as _mod_revision,
43
from bzrlib.static_tuple import StaticTuple
44
from ..sixish import (
51
from ..static_tuple import StaticTuple
45
53
_HEADER_READV = (0, 200)
46
_OPTION_KEY_ELEMENTS = "key_elements="
48
_OPTION_NODE_REFS = "node_ref_lists="
49
_SIGNATURE = "Bazaar Graph Index 1\n"
52
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
53
_newline_null_re = re.compile('[\n\0]')
54
_OPTION_KEY_ELEMENTS = b"key_elements="
56
_OPTION_NODE_REFS = b"node_ref_lists="
57
_SIGNATURE = b"Bazaar Graph Index 1\n"
60
class BadIndexFormatSignature(errors.BzrError):
62
_fmt = "%(value)s is not an index of type %(_type)s."
64
def __init__(self, value, _type):
65
errors.BzrError.__init__(self)
70
class BadIndexData(errors.BzrError):
72
_fmt = "Error in data for index %(value)s."
74
def __init__(self, value):
75
errors.BzrError.__init__(self)
79
class BadIndexDuplicateKey(errors.BzrError):
81
_fmt = "The key '%(key)s' is already in index '%(index)s'."
83
def __init__(self, key, index):
84
errors.BzrError.__init__(self)
89
class BadIndexKey(errors.BzrError):
91
_fmt = "The key '%(key)s' is not a valid key."
93
def __init__(self, key):
94
errors.BzrError.__init__(self)
98
class BadIndexOptions(errors.BzrError):
100
_fmt = "Could not parse options for index %(value)s."
102
def __init__(self, value):
103
errors.BzrError.__init__(self)
107
class BadIndexValue(errors.BzrError):
109
_fmt = "The value '%(value)s' is not a valid value."
111
def __init__(self, value):
112
errors.BzrError.__init__(self)
116
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r\x00 ]')
117
_newline_null_re = re.compile(b'[\n\0]')
56
120
def _has_key_from_parent_map(self, key):
69
133
class GraphIndexBuilder(object):
70
134
"""A builder that can build a GraphIndex.
72
The resulting graph has the structure:
136
The resulting graph has the structure::
74
_SIGNATURE OPTIONS NODES NEWLINE
75
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
76
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
78
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
79
KEY := Not-whitespace-utf8
81
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
82
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
83
REFERENCE := DIGITS ; digits is the byte offset in the index of the
85
VALUE := no-newline-no-null-bytes
138
_SIGNATURE OPTIONS NODES NEWLINE
139
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
140
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
142
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
143
KEY := Not-whitespace-utf8
145
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
146
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
147
REFERENCE := DIGITS ; digits is the byte offset in the index of the
149
VALUE := no-newline-no-null-bytes
88
152
def __init__(self, reference_lists=0, key_elements=1):
105
169
def _check_key(self, key):
106
170
"""Raise BadIndexKey if key is not a valid key for this index."""
107
171
if type(key) not in (tuple, StaticTuple):
108
raise errors.BadIndexKey(key)
172
raise BadIndexKey(key)
109
173
if self._key_length != len(key):
110
raise errors.BadIndexKey(key)
174
raise BadIndexKey(key)
111
175
for element in key:
112
if not element or _whitespace_re.search(element) is not None:
113
raise errors.BadIndexKey(element)
176
if not element or type(element) != bytes or _whitespace_re.search(element) is not None:
177
raise BadIndexKey(key)
115
179
def _external_references(self):
116
180
"""Return references that are not present in this index.
146
210
key_dict = key_dict.setdefault(subkey, {})
147
211
key_dict[key[-1]] = key, value, references
149
for key, (absent, references, value) in self._nodes.iteritems():
213
for key, (absent, references, value) in viewitems(self._nodes):
152
216
key_dict = nodes_by_key
184
248
:param value: The value associate with this key. Must not contain
185
249
newlines or null characters.
186
250
:return: (node_refs, absent_references)
187
node_refs basically a packed form of 'references' where all
189
absent_references reference keys that are not in self._nodes.
190
This may contain duplicates if the same key is
191
referenced in multiple lists.
252
* node_refs: basically a packed form of 'references' where all
254
* absent_references: reference keys that are not in self._nodes.
255
This may contain duplicates if the same key is referenced in
193
258
as_st = StaticTuple.from_sequence
194
259
self._check_key(key)
195
260
if _newline_null_re.search(value) is not None:
196
raise errors.BadIndexValue(value)
261
raise BadIndexValue(value)
197
262
if len(references) != self.reference_lists:
198
raise errors.BadIndexValue(references)
263
raise BadIndexValue(references)
200
265
absent_references = []
201
266
for reference_list in references:
219
284
:param references: An iterable of iterables of keys. Each is a
220
285
reference to another key.
221
286
:param value: The value to associate with the key. It may be any
222
bytes as long as it does not contain \0 or \n.
287
bytes as long as it does not contain \\0 or \\n.
225
290
absent_references) = self._check_key_ref_value(key, references, value)
226
if key in self._nodes and self._nodes[key][0] != 'a':
227
raise errors.BadIndexDuplicateKey(key, self)
291
if key in self._nodes and self._nodes[key][0] != b'a':
292
raise BadIndexDuplicateKey(key, self)
228
293
for reference in absent_references:
229
294
# There may be duplicates, but I don't think it is worth worrying
231
self._nodes[reference] = ('a', (), '')
296
self._nodes[reference] = (b'a', (), b'')
232
297
self._absent_keys.update(absent_references)
233
298
self._absent_keys.discard(key)
234
self._nodes[key] = ('', node_refs, value)
299
self._nodes[key] = (b'', node_refs, value)
235
300
if self._nodes_by_key is not None and self._key_length > 1:
236
301
self._update_nodes_by_key(key, value, node_refs)
245
310
def finish(self):
313
:returns: cBytesIO holding the full context of the index as it
314
should be written to disk.
246
316
lines = [_SIGNATURE]
247
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
248
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
317
lines.append(b'%s%d\n' % (_OPTION_NODE_REFS, self.reference_lists))
318
lines.append(b'%s%d\n' % (_OPTION_KEY_ELEMENTS, self._key_length))
249
319
key_count = len(self._nodes) - len(self._absent_keys)
250
lines.append(_OPTION_LEN + str(key_count) + '\n')
320
lines.append(b'%s%d\n' % (_OPTION_LEN, key_count))
251
321
prefix_length = sum(len(x) for x in lines)
252
322
# references are byte offsets. To avoid having to do nasty
253
323
# polynomial work to resolve offsets (references to later in the
264
334
# forward sorted by key. In future we may consider topological sorting,
265
335
# at the cost of table scans for direct lookup, or a second index for
267
nodes = sorted(self._nodes.items())
337
nodes = sorted(viewitems(self._nodes))
268
338
# if we do not prepass, we don't know how long it will be up front.
269
339
expected_bytes = None
270
340
# we only need to pre-pass if we have reference lists at all.
310
380
for key, non_ref_bytes, total_references in key_offset_info:
311
381
key_addresses[key] = non_ref_bytes + total_references*digits
313
format_string = '%%0%sd' % digits
383
format_string = b'%%0%dd' % digits
314
384
for key, (absent, references, value) in nodes:
315
385
flattened_references = []
316
386
for ref_list in references:
317
387
ref_addresses = []
318
388
for reference in ref_list:
319
389
ref_addresses.append(format_string % key_addresses[reference])
320
flattened_references.append('\r'.join(ref_addresses))
321
string_key = '\x00'.join(key)
322
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
323
'\t'.join(flattened_references), value))
325
result = StringIO(''.join(lines))
390
flattened_references.append(b'\r'.join(ref_addresses))
391
string_key = b'\x00'.join(key)
392
lines.append(b"%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
393
b'\t'.join(flattened_references), value))
395
result = BytesIO(b''.join(lines))
326
396
if expected_bytes and len(result.getvalue()) != expected_bytes:
327
397
raise errors.BzrError('Failed index creation. Internal error:'
328
398
' mismatched output length and expected length: %d %d' %
385
455
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
386
456
"""Open an index called name on transport.
388
:param transport: A bzrlib.transport.Transport.
458
:param transport: A breezy.transport.Transport.
389
459
:param name: A path to provide to transport API calls.
390
460
:param size: The size of the index in bytes. This is used for bisection
391
461
logic to perform partial index reads. While the size could be
431
501
def __ne__(self, other):
432
502
return not self.__eq__(other)
504
def __lt__(self, other):
505
# We don't really care about the order, just that there is an order.
506
if (not isinstance(other, GraphIndex) and
507
not isinstance(other, InMemoryGraphIndex)):
508
raise TypeError(other)
509
return hash(self) < hash(other)
512
return hash((type(self), self._transport, self._name, self._size))
434
514
def __repr__(self):
435
515
return "%s(%r)" % (self.__class__.__name__,
436
516
self._transport.abspath(self._name))
444
524
# We already did this
446
526
if 'index' in debug.debug_flags:
447
mutter('Reading entire index %s', self._transport.abspath(self._name))
527
trace.mutter('Reading entire index %s',
528
self._transport.abspath(self._name))
448
529
if stream is None:
449
530
stream = self._transport.get(self._name)
450
531
if self._base_offset != 0:
451
532
# This is wasteful, but it is better than dealing with
452
533
# adjusting all the offsets, etc.
453
stream = StringIO(stream.read()[self._base_offset:])
454
self._read_prefix(stream)
455
self._expected_elements = 3 + self._key_length
457
# raw data keyed by offset
458
self._keys_by_offset = {}
459
# ready-to-return key:value or key:value, node_ref_lists
461
self._nodes_by_key = None
464
lines = stream.read().split('\n')
534
stream = BytesIO(stream.read()[self._base_offset:])
536
self._read_prefix(stream)
537
self._expected_elements = 3 + self._key_length
539
# raw data keyed by offset
540
self._keys_by_offset = {}
541
# ready-to-return key:value or key:value, node_ref_lists
543
self._nodes_by_key = None
546
lines = stream.read().split(b'\n')
467
550
_, _, _, trailers = self._parse_lines(lines, pos)
468
for key, absent, references, value in self._keys_by_offset.itervalues():
551
for key, absent, references, value in viewvalues(self._keys_by_offset):
471
554
# resolve references:
496
579
% (ref_list_num, self.node_ref_lists))
498
581
nodes = self._nodes
499
for key, (value, ref_lists) in nodes.iteritems():
582
for key, (value, ref_lists) in viewitems(nodes):
500
583
ref_list = ref_lists[ref_list_num]
501
584
refs.update([ref for ref in ref_list if ref not in nodes])
505
588
if self._nodes_by_key is None:
506
589
nodes_by_key = {}
507
590
if self.node_ref_lists:
508
for key, (value, references) in self._nodes.iteritems():
591
for key, (value, references) in viewitems(self._nodes):
509
592
key_dict = nodes_by_key
510
593
for subkey in key[:-1]:
511
594
key_dict = key_dict.setdefault(subkey, {})
512
595
key_dict[key[-1]] = key, value, references
514
for key, value in self._nodes.iteritems():
597
for key, value in viewitems(self._nodes):
515
598
key_dict = nodes_by_key
516
599
for subkey in key[:-1]:
517
600
key_dict = key_dict.setdefault(subkey, {})
534
617
if self._nodes is None:
535
618
self._buffer_all()
536
619
if self.node_ref_lists:
537
for key, (value, node_ref_lists) in self._nodes.iteritems():
620
for key, (value, node_ref_lists) in viewitems(self._nodes):
538
621
yield self, key, value, node_ref_lists
540
for key, value in self._nodes.iteritems():
623
for key, value in viewitems(self._nodes):
541
624
yield self, key, value
543
626
def _read_prefix(self, stream):
544
627
signature = stream.read(len(self._signature()))
545
628
if not signature == self._signature():
546
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
629
raise BadIndexFormatSignature(self._name, GraphIndex)
547
630
options_line = stream.readline()
548
631
if not options_line.startswith(_OPTION_NODE_REFS):
549
raise errors.BadIndexOptions(self)
632
raise BadIndexOptions(self)
551
634
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
552
635
except ValueError:
553
raise errors.BadIndexOptions(self)
636
raise BadIndexOptions(self)
554
637
options_line = stream.readline()
555
638
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
556
raise errors.BadIndexOptions(self)
639
raise BadIndexOptions(self)
558
641
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
559
642
except ValueError:
560
raise errors.BadIndexOptions(self)
643
raise BadIndexOptions(self)
561
644
options_line = stream.readline()
562
645
if not options_line.startswith(_OPTION_LEN):
563
raise errors.BadIndexOptions(self)
646
raise BadIndexOptions(self)
565
648
self._key_count = int(options_line[len(_OPTION_LEN):-1])
566
649
except ValueError:
567
raise errors.BadIndexOptions(self)
650
raise BadIndexOptions(self)
569
652
def _resolve_references(self, references):
570
653
"""Return the resolved key references for references.
670
753
if self._nodes is not None:
671
754
return self._iter_entries_from_total_buffer(keys)
673
return (result[1] for result in bisect_multi_bytes(
756
return (result[1] for result in bisect_multi.bisect_multi_bytes(
674
757
self._lookup_keys_via_location, self._size, keys))
676
759
def iter_entries_prefix(self, keys):
703
786
self._buffer_all()
704
787
if self._key_length == 1:
708
raise errors.BadIndexKey(key)
709
if len(key) != self._key_length:
710
raise errors.BadIndexKey(key)
789
_sanity_check_key(self, key)
711
790
if self.node_ref_lists:
712
791
value, node_refs = self._nodes[key]
713
792
yield self, key, value, node_refs
715
794
yield self, key, self._nodes[key]
717
796
nodes_by_key = self._get_nodes_by_key()
721
raise errors.BadIndexKey(key)
722
if len(key) != self._key_length:
723
raise errors.BadIndexKey(key)
724
# find what it refers to:
725
key_dict = nodes_by_key
727
# find the subdict whose contents should be returned.
729
while len(elements) and elements[0] is not None:
730
key_dict = key_dict[elements[0]]
733
# a non-existant lookup.
738
key_dict = dicts.pop(-1)
739
# can't be empty or would not exist
740
item, value = key_dict.iteritems().next()
741
if type(value) == dict:
743
dicts.extend(key_dict.itervalues())
746
for value in key_dict.itervalues():
747
# each value is the key:value:node refs tuple
749
yield (self, ) + value
751
# the last thing looked up was a terminal element
752
yield (self, ) + key_dict
797
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
754
800
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
755
801
"""See BTreeIndex._find_ancestors."""
929
975
signature = bytes[0:len(self._signature())]
930
976
if not signature == self._signature():
931
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
977
raise BadIndexFormatSignature(self._name, GraphIndex)
932
978
lines = bytes[len(self._signature()):].splitlines()
933
979
options_line = lines[0]
934
980
if not options_line.startswith(_OPTION_NODE_REFS):
935
raise errors.BadIndexOptions(self)
981
raise BadIndexOptions(self)
937
983
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
938
984
except ValueError:
939
raise errors.BadIndexOptions(self)
985
raise BadIndexOptions(self)
940
986
options_line = lines[1]
941
987
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
942
raise errors.BadIndexOptions(self)
988
raise BadIndexOptions(self)
944
990
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
945
991
except ValueError:
946
raise errors.BadIndexOptions(self)
992
raise BadIndexOptions(self)
947
993
options_line = lines[2]
948
994
if not options_line.startswith(_OPTION_LEN):
949
raise errors.BadIndexOptions(self)
995
raise BadIndexOptions(self)
951
997
self._key_count = int(options_line[len(_OPTION_LEN):])
952
998
except ValueError:
953
raise errors.BadIndexOptions(self)
999
raise BadIndexOptions(self)
954
1000
# calculate the bytes we have processed
955
1001
header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
956
1002
len(lines[2]) + 3)
1060
1106
if not start_adjacent:
1061
1107
# work around python bug in rfind
1062
1108
if trim_start is None:
1063
trim_start = data.find('\n') + 1
1109
trim_start = data.find(b'\n') + 1
1065
trim_start = data.find('\n', trim_start) + 1
1111
trim_start = data.find(b'\n', trim_start) + 1
1066
1112
if not (trim_start != 0):
1067
1113
raise AssertionError('no \n was present')
1068
1114
# print 'removing start', offset, trim_start, repr(data[:trim_start])
1069
1115
if not end_adjacent:
1070
1116
# work around python bug in rfind
1071
1117
if trim_end is None:
1072
trim_end = data.rfind('\n') + 1
1118
trim_end = data.rfind(b'\n') + 1
1074
trim_end = data.rfind('\n', None, trim_end) + 1
1120
trim_end = data.rfind(b'\n', None, trim_end) + 1
1075
1121
if not (trim_end != 0):
1076
1122
raise AssertionError('no \n was present')
1077
1123
# print 'removing end', offset, trim_end, repr(data[trim_end:])
1084
1130
offset += trim_start
1085
1131
# print "parsing", repr(trimmed_data)
1086
1132
# splitlines mangles the \r delimiters.. don't use it.
1087
lines = trimmed_data.split('\n')
1133
lines = trimmed_data.split(b'\n')
1090
1136
first_key, last_key, nodes, _ = self._parse_lines(lines, pos)
1102
1148
for line in lines:
1104
1150
# must be at the end
1106
1152
if not (self._size == pos + 1):
1107
1153
raise AssertionError("%s %s" % (self._size, pos))
1110
elements = line.split('\0')
1156
elements = line.split(b'\0')
1111
1157
if len(elements) != self._expected_elements:
1112
raise errors.BadIndexData(self)
1158
raise BadIndexData(self)
1113
1159
# keys are tuples. Each element is a string that may occur many
1114
1160
# times, so we intern them to save space. AB, RC, 200807
1115
key = tuple([intern(element) for element in elements[:self._key_length]])
1161
key = tuple([bytesintern(element) for element in elements[:self._key_length]])
1116
1162
if first_key is None:
1117
1163
first_key = key
1118
1164
absent, references, value = elements[-3:]
1120
for ref_string in references.split('\t'):
1166
for ref_string in references.split(b'\t'):
1121
1167
ref_lists.append(tuple([
1122
int(ref) for ref in ref_string.split('\r') if ref
1168
int(ref) for ref in ref_string.split(b'\r') if ref
1124
1170
ref_lists = tuple(ref_lists)
1125
1171
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
1217
1263
# We read the whole range, most likely because the
1218
1264
# Transport upcast our readv ranges into one long request
1219
1265
# for enough total data to grab the whole index.
1220
self._buffer_all(StringIO(data))
1266
self._buffer_all(BytesIO(data))
1222
1268
if self._bisect_nodes is None:
1223
1269
# this must be the start
1287
1333
def get_parent_map(self, keys):
1288
1334
"""See graph.StackedParentsProvider.get_parent_map"""
1289
1335
search_keys = set(keys)
1290
if NULL_REVISION in search_keys:
1291
search_keys.discard(NULL_REVISION)
1292
found_parents = {NULL_REVISION:[]}
1336
if _mod_revision.NULL_REVISION in search_keys:
1337
search_keys.discard(_mod_revision.NULL_REVISION)
1338
found_parents = {_mod_revision.NULL_REVISION:[]}
1294
1340
found_parents = {}
1295
1341
for index, key, value, refs in self.iter_entries(search_keys):
1296
1342
parents = refs[0]
1297
1343
if not parents:
1298
parents = (NULL_REVISION,)
1344
parents = (_mod_revision.NULL_REVISION,)
1299
1345
found_parents[key] = parents
1300
1346
return found_parents
1302
has_key = _has_key_from_parent_map
1348
__contains__ = _has_key_from_parent_map
1304
1350
def insert_index(self, pos, index, name=None):
1305
1351
"""Insert a new index in the list of indices to query.
1434
1483
indices_info = zip(self._index_names, self._indices)
1435
1484
if 'index' in debug.debug_flags:
1436
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1437
indices_info, hit_indices)
1485
indices_info = list(indices_info)
1486
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1487
'promoting %r', indices_info, hit_indices)
1439
1489
unhit_names = []
1440
1490
new_hit_indices = []
1457
1507
self._indices = new_hit_indices + unhit_indices
1458
1508
self._index_names = hit_names + unhit_names
1459
1509
if 'index' in debug.debug_flags:
1460
mutter('CombinedGraphIndex reordered: %r', self._indices)
1510
trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
1461
1511
return hit_names
1463
1513
def _move_to_front_by_name(self, hit_names):
1550
1600
return sum((index.key_count() for index in self._indices), 0)
1551
except errors.NoSuchFile:
1552
self._reload_or_raise()
1601
except errors.NoSuchFile as e:
1602
if not self._try_reload(e):
1554
1605
missing_keys = _missing_keys_from_parent_map
1556
def _reload_or_raise(self):
1607
def _try_reload(self, error):
1557
1608
"""We just got a NoSuchFile exception.
1559
1610
Try to reload the indices, if it fails, just raise the current
1562
1613
if self._reload_func is None:
1564
exc_type, exc_value, exc_traceback = sys.exc_info()
1565
trace.mutter('Trying to reload after getting exception: %s',
1615
trace.mutter('Trying to reload after getting exception: %s', str(error))
1567
1616
if not self._reload_func():
1568
1617
# We tried to reload, but nothing changed, so we fail anyway
1569
1618
trace.mutter('_reload_func indicated nothing has changed.'
1570
1619
' Raising original exception.')
1571
raise exc_type, exc_value, exc_traceback
1573
1623
def set_sibling_indices(self, sibling_combined_graph_indices):
1574
1624
"""Set the CombinedGraphIndex objects to reorder after reordering self.
1617
1668
trace.mutter_callsite(3,
1618
1669
"iter_all_entries scales with size of history.")
1619
1670
if self.reference_lists:
1620
for key, (absent, references, value) in self._nodes.iteritems():
1671
for key, (absent, references, value) in viewitems(self._nodes):
1622
1673
yield self, key, value, references
1624
for key, (absent, references, value) in self._nodes.iteritems():
1675
for key, (absent, references, value) in viewitems(self._nodes):
1626
1677
yield self, key, value
1665
1716
will be returned, and every match that is in the index will be
1668
# XXX: To much duplication with the GraphIndex class; consider finding
1669
# a good place to pull out the actual common logic.
1670
1719
keys = set(keys)
1673
1722
if self._key_length == 1:
1674
1723
for key in keys:
1677
raise errors.BadIndexKey(key)
1678
if len(key) != self._key_length:
1679
raise errors.BadIndexKey(key)
1724
_sanity_check_key(self, key)
1680
1725
node = self._nodes[key]
1686
1731
yield self, key, node[2]
1688
1733
nodes_by_key = self._get_nodes_by_key()
1692
raise errors.BadIndexKey(key)
1693
if len(key) != self._key_length:
1694
raise errors.BadIndexKey(key)
1695
# find what it refers to:
1696
key_dict = nodes_by_key
1697
elements = list(key)
1698
# find the subdict to return
1700
while len(elements) and elements[0] is not None:
1701
key_dict = key_dict[elements[0]]
1704
# a non-existant lookup.
1709
key_dict = dicts.pop(-1)
1710
# can't be empty or would not exist
1711
item, value = key_dict.iteritems().next()
1712
if type(value) == dict:
1714
dicts.extend(key_dict.itervalues())
1717
for value in key_dict.itervalues():
1718
yield (self, ) + value
1720
yield (self, ) + key_dict
1734
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
1722
1737
def key_count(self):
1723
1738
"""Return an estimate of the number of keys in this index.
1729
1744
def validate(self):
1730
1745
"""In memory index's have no known corruption at the moment."""
1747
def __lt__(self, other):
1748
# We don't really care about the order, just that there is an order.
1749
if (not isinstance(other, GraphIndex) and
1750
not isinstance(other, InMemoryGraphIndex)):
1751
raise TypeError(other)
1752
return hash(self) < hash(other)
1733
1755
class GraphIndexPrefixAdapter(object):
1734
1756
"""An adapter between GraphIndex with different key lengths.
1792
1814
for node in an_iter:
1794
1816
if node[1][:self.prefix_len] != self.prefix:
1795
raise errors.BadIndexData(self)
1817
raise BadIndexData(self)
1796
1818
for ref_list in node[3]:
1797
1819
for ref_node in ref_list:
1798
1820
if ref_node[:self.prefix_len] != self.prefix:
1799
raise errors.BadIndexData(self)
1821
raise BadIndexData(self)
1800
1822
yield node[0], node[1][self.prefix_len:], node[2], (
1801
1823
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
1802
1824
for ref_list in node[3]))
1855
1877
def validate(self):
1856
1878
"""Call the adapted's validate."""
1857
1879
self.adapted.validate()
1882
def _sanity_check_key(index_or_builder, key):
1883
"""Raise BadIndexKey if key cannot be used for prefix matching."""
1885
raise BadIndexKey(key)
1886
if len(key) != index_or_builder._key_length:
1887
raise BadIndexKey(key)
1890
def _iter_entries_prefix(index_or_builder, nodes_by_key, keys):
1891
"""Helper for implementing prefix matching iterators."""
1893
_sanity_check_key(index_or_builder, key)
1894
# find what it refers to:
1895
key_dict = nodes_by_key
1896
elements = list(key)
1897
# find the subdict whose contents should be returned.
1899
while len(elements) and elements[0] is not None:
1900
key_dict = key_dict[elements[0]]
1903
# a non-existant lookup.
1908
values_view = viewvalues(dicts.pop())
1909
# can't be empty or would not exist
1910
value = next(iter(values_view))
1911
if isinstance(value, dict):
1912
# still descending, push values
1913
dicts.extend(values_view)
1915
# at leaf tuples, yield values
1916
for value in values_view:
1917
# each value is the key:value:node refs tuple
1919
yield (index_or_builder, ) + value
1921
# the last thing looked up was a terminal element
1922
yield (index_or_builder, ) + key_dict