1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Indexing facilities."""
23
'GraphIndexPrefixAdapter',
27
from cStringIO import StringIO
30
from bzrlib.lazy_import import lazy_import
31
lazy_import(globals(), """
32
from bzrlib import trace
33
from bzrlib.trace import mutter
35
from bzrlib import debug, errors
37
_OPTION_KEY_ELEMENTS = "key_elements="
39
_OPTION_NODE_REFS = "node_ref_lists="
40
_SIGNATURE = "Bazaar Graph Index 1\n"
43
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
44
_newline_null_re = re.compile('[\n\0]')
47
class GraphIndexBuilder(object):
48
"""A builder that can build a GraphIndex.
50
The resulting graph has the structure:
52
_SIGNATURE OPTIONS NODES NEWLINE
53
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
54
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
56
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
57
KEY := Not-whitespace-utf8
59
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
60
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
61
REFERENCE := DIGITS ; digits is the byte offset in the index of the
63
VALUE := no-newline-no-null-bytes
66
def __init__(self, reference_lists=0, key_elements=1):
67
"""Create a GraphIndex builder.
69
:param reference_lists: The number of node references lists for each
71
:param key_elements: The number of bytestrings in each key.
73
self.reference_lists = reference_lists
76
self._nodes_by_key = {}
77
self._key_length = key_elements
79
def _check_key(self, key):
80
"""Raise BadIndexKey if key is not a valid key for this index."""
81
if type(key) != tuple:
82
raise errors.BadIndexKey(key)
83
if self._key_length != len(key):
84
raise errors.BadIndexKey(key)
86
if not element or _whitespace_re.search(element) is not None:
87
raise errors.BadIndexKey(element)
89
def add_node(self, key, value, references=()):
90
"""Add a node to the index.
92
:param key: The key. keys are non-empty tuples containing
93
as many whitespace-free utf8 bytestrings as the key length
94
defined for this index.
95
:param references: An iterable of iterables of keys. Each is a
96
reference to another key.
97
:param value: The value to associate with the key. It may be any
98
bytes as long as it does not contain \0 or \n.
101
if _newline_null_re.search(value) is not None:
102
raise errors.BadIndexValue(value)
103
if len(references) != self.reference_lists:
104
raise errors.BadIndexValue(references)
106
for reference_list in references:
107
for reference in reference_list:
108
self._check_key(reference)
109
if reference not in self._nodes:
110
self._nodes[reference] = ('a', (), '')
111
node_refs.append(tuple(reference_list))
112
if key in self._nodes and self._nodes[key][0] == '':
113
raise errors.BadIndexDuplicateKey(key, self)
114
self._nodes[key] = ('', tuple(node_refs), value)
116
if self._key_length > 1:
117
key_dict = self._nodes_by_key
118
if self.reference_lists:
119
key_value = key, value, tuple(node_refs)
121
key_value = key, value
122
# possibly should do this on-demand, but it seems likely it is
124
# For a key of (foo, bar, baz) create
125
# _nodes_by_key[foo][bar][baz] = key_value
126
for subkey in key[:-1]:
127
key_dict = key_dict.setdefault(subkey, {})
128
key_dict[key[-1]] = key_value
132
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
133
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
134
lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')
135
prefix_length = sum(len(x) for x in lines)
136
# references are byte offsets. To avoid having to do nasty
137
# polynomial work to resolve offsets (references to later in the
138
# file cannot be determined until all the inbetween references have
139
# been calculated too) we pad the offsets with 0's to make them be
140
# of consistent length. Using binary offsets would break the trivial
142
# to calculate the width of zero's needed we do three passes:
143
# one to gather all the non-reference data and the number of references.
144
# one to pad all the data with reference-length and determine entry
148
# forward sorted by key. In future we may consider topological sorting,
149
# at the cost of table scans for direct lookup, or a second index for
151
nodes = sorted(self._nodes.items())
152
# if we do not prepass, we don't know how long it will be up front.
153
expected_bytes = None
154
# we only need to pre-pass if we have reference lists at all.
155
if self.reference_lists:
157
non_ref_bytes = prefix_length
159
# TODO use simple multiplication for the constants in this loop.
160
for key, (absent, references, value) in nodes:
161
# record the offset known *so far* for this key:
162
# the non reference bytes to date, and the total references to
163
# date - saves reaccumulating on the second pass
164
key_offset_info.append((key, non_ref_bytes, total_references))
165
# key is literal, value is literal, there are 3 null's, 1 NL
166
# key is variable length tuple, \x00 between elements
167
non_ref_bytes += sum(len(element) for element in key)
168
if self._key_length > 1:
169
non_ref_bytes += self._key_length - 1
170
# value is literal bytes, there are 3 null's, 1 NL.
171
non_ref_bytes += len(value) + 3 + 1
172
# one byte for absent if set.
175
elif self.reference_lists:
176
# (ref_lists -1) tabs
177
non_ref_bytes += self.reference_lists - 1
178
# (ref-1 cr's per ref_list)
179
for ref_list in references:
180
# how many references across the whole file?
181
total_references += len(ref_list)
182
# accrue reference separators
184
non_ref_bytes += len(ref_list) - 1
185
# how many digits are needed to represent the total byte count?
187
possible_total_bytes = non_ref_bytes + total_references*digits
188
while 10 ** digits < possible_total_bytes:
190
possible_total_bytes = non_ref_bytes + total_references*digits
191
expected_bytes = possible_total_bytes + 1 # terminating newline
192
# resolve key addresses.
194
for key, non_ref_bytes, total_references in key_offset_info:
195
key_addresses[key] = non_ref_bytes + total_references*digits
197
format_string = '%%0%sd' % digits
198
for key, (absent, references, value) in nodes:
199
flattened_references = []
200
for ref_list in references:
202
for reference in ref_list:
203
ref_addresses.append(format_string % key_addresses[reference])
204
flattened_references.append('\r'.join(ref_addresses))
205
string_key = '\x00'.join(key)
206
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
207
'\t'.join(flattened_references), value))
209
result = StringIO(''.join(lines))
210
if expected_bytes and len(result.getvalue()) != expected_bytes:
211
raise errors.BzrError('Failed index creation. Internal error:'
212
' mismatched output length and expected length: %d %d' %
213
(len(result.getvalue()), expected_bytes))
214
return StringIO(''.join(lines))
217
class GraphIndex(object):
218
"""An index for data with embedded graphs.
220
The index maps keys to a list of key reference lists, and a value.
221
Each node has the same number of key reference lists. Each key reference
222
list can be empty or an arbitrary length. The value is an opaque NULL
223
terminated string without any newlines. The storage of the index is
224
hidden in the interface: keys and key references are always tuples of
225
bytestrings, never the internal representation (e.g. dictionary offsets).
227
It is presumed that the index will not be mutated - it is static data.
229
Successive iter_all_entries calls will read the entire index each time.
230
Additionally, iter_entries calls will read the index linearly until the
231
desired keys are found. XXX: This must be fixed before the index is
232
suitable for production use. :XXX
235
def __init__(self, transport, name, size):
236
"""Open an index called name on transport.
238
:param transport: A bzrlib.transport.Transport.
239
:param name: A path to provide to transport API calls.
240
:param size: The size of the index in bytes. This is used for bisection
241
logic to perform partial index reads. While the size could be
242
obtained by statting the file this introduced an additional round
243
trip that is avoided by having it supplied.
245
self._transport = transport
248
self._key_count = None
249
self._keys_by_offset = None
250
self._nodes_by_key = None
253
def _buffer_all(self):
254
"""Buffer all the index data.
256
Mutates self._nodes and self.keys_by_offset.
258
if 'index' in debug.debug_flags:
259
mutter('Reading entire index %s', self._transport.abspath(self._name))
260
stream = self._transport.get(self._name)
261
self._read_prefix(stream)
262
expected_elements = 3 + self._key_length
264
# raw data keyed by offset
265
self._keys_by_offset = {}
266
# ready-to-return key:value or key:value, node_ref_lists
268
self._nodes_by_key = {}
271
for line in stream.readlines():
275
elements = line.split('\0')
276
if len(elements) != expected_elements:
277
raise errors.BadIndexData(self)
279
key = tuple(elements[:self._key_length])
280
absent, references, value = elements[-3:]
281
value = value[:-1] # remove the newline
283
for ref_string in references.split('\t'):
284
ref_lists.append(tuple([
285
int(ref) for ref in ref_string.split('\r') if ref
287
ref_lists = tuple(ref_lists)
288
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
290
for key, absent, references, value in self._keys_by_offset.itervalues():
293
# resolve references:
294
if self.node_ref_lists:
296
for ref_list in references:
297
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
298
node_value = (value, tuple(node_refs))
301
self._nodes[key] = node_value
302
if self._key_length > 1:
303
subkey = list(reversed(key[:-1]))
304
key_dict = self._nodes_by_key
305
if self.node_ref_lists:
306
key_value = key, node_value[0], node_value[1]
308
key_value = key, node_value
309
# possibly should do this on-demand, but it seems likely it is
311
# For a key of (foo, bar, baz) create
312
# _nodes_by_key[foo][bar][baz] = key_value
313
for subkey in key[:-1]:
314
key_dict = key_dict.setdefault(subkey, {})
315
key_dict[key[-1]] = key_value
316
# cache the keys for quick set intersections
317
self._keys = set(self._nodes)
319
# there must be one line - the empty trailer line.
320
raise errors.BadIndexData(self)
322
def iter_all_entries(self):
323
"""Iterate over all keys within the index.
325
:return: An iterable of (index, key, value) or (index, key, value, reference_lists).
326
The former tuple is used when there are no reference lists in the
327
index, making the API compatible with simple key:value index types.
328
There is no defined order for the result iteration - it will be in
329
the most efficient order for the index.
331
if 'evil' in debug.debug_flags:
332
trace.mutter_callsite(3,
333
"iter_all_entries scales with size of history.")
334
if self._nodes is None:
336
if self.node_ref_lists:
337
for key, (value, node_ref_lists) in self._nodes.iteritems():
338
yield self, key, value, node_ref_lists
340
for key, value in self._nodes.iteritems():
341
yield self, key, value
343
def _read_prefix(self, stream):
344
signature = stream.read(len(self._signature()))
345
if not signature == self._signature():
346
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
347
options_line = stream.readline()
348
if not options_line.startswith(_OPTION_NODE_REFS):
349
raise errors.BadIndexOptions(self)
351
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
353
raise errors.BadIndexOptions(self)
354
options_line = stream.readline()
355
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
356
raise errors.BadIndexOptions(self)
358
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
360
raise errors.BadIndexOptions(self)
361
options_line = stream.readline()
362
if not options_line.startswith(_OPTION_LEN):
363
raise errors.BadIndexOptions(self)
365
self._key_count = int(options_line[len(_OPTION_LEN):-1])
367
raise errors.BadIndexOptions(self)
369
def iter_entries(self, keys):
370
"""Iterate over keys within the index.
372
:param keys: An iterable providing the keys to be retrieved.
373
:return: An iterable as per iter_all_entries, but restricted to the
374
keys supplied. No additional keys will be returned, and every
375
key supplied that is in the index will be returned.
380
if self._nodes is None:
382
keys = keys.intersection(self._keys)
383
if self.node_ref_lists:
385
value, node_refs = self._nodes[key]
386
yield self, key, value, node_refs
389
yield self, key, self._nodes[key]
391
def iter_entries_prefix(self, keys):
392
"""Iterate over keys within the index using prefix matching.
394
Prefix matching is applied within the tuple of a key, not to within
395
the bytestring of each key element. e.g. if you have the keys ('foo',
396
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
397
only the former key is returned.
399
:param keys: An iterable providing the key prefixes to be retrieved.
400
Each key prefix takes the form of a tuple the length of a key, but
401
with the last N elements 'None' rather than a regular bytestring.
402
The first element cannot be 'None'.
403
:return: An iterable as per iter_all_entries, but restricted to the
404
keys with a matching prefix to those supplied. No additional keys
405
will be returned, and every match that is in the index will be
411
# load data - also finds key lengths
412
if self._nodes is None:
414
if self._key_length == 1:
418
raise errors.BadIndexKey(key)
419
if len(key) != self._key_length:
420
raise errors.BadIndexKey(key)
421
if self.node_ref_lists:
422
value, node_refs = self._nodes[key]
423
yield self, key, value, node_refs
425
yield self, key, self._nodes[key]
430
raise errors.BadIndexKey(key)
431
if len(key) != self._key_length:
432
raise errors.BadIndexKey(key)
433
# find what it refers to:
434
key_dict = self._nodes_by_key
436
# find the subdict whose contents should be returned.
438
while len(elements) and elements[0] is not None:
439
key_dict = key_dict[elements[0]]
442
# a non-existant lookup.
447
key_dict = dicts.pop(-1)
448
# can't be empty or would not exist
449
item, value = key_dict.iteritems().next()
450
if type(value) == dict:
452
dicts.extend(key_dict.itervalues())
455
for value in key_dict.itervalues():
456
# each value is the key:value:node refs tuple
458
yield (self, ) + value
460
# the last thing looked up was a terminal element
461
yield (self, ) + key_dict
464
"""Return an estimate of the number of keys in this index.
466
For GraphIndex the estimate is exact.
468
if self._key_count is None:
469
# really this should just read the prefix
471
return self._key_count
473
def _signature(self):
474
"""The file signature for this index type."""
478
"""Validate that everything in the index can be accessed."""
479
# iter_all validates completely at the moment, so just do that.
480
for node in self.iter_all_entries():
484
class CombinedGraphIndex(object):
485
"""A GraphIndex made up from smaller GraphIndices.
487
The backing indices must implement GraphIndex, and are presumed to be
490
Queries against the combined index will be made against the first index,
491
and then the second and so on. The order of index's can thus influence
492
performance significantly. For example, if one index is on local disk and a
493
second on a remote server, the local disk index should be before the other
497
def __init__(self, indices):
498
"""Create a CombinedGraphIndex backed by indices.
500
:param indices: An ordered list of indices to query for data.
502
self._indices = indices
506
self.__class__.__name__,
507
', '.join(map(repr, self._indices)))
509
def insert_index(self, pos, index):
510
"""Insert a new index in the list of indices to query.
512
:param pos: The position to insert the index.
513
:param index: The index to insert.
515
self._indices.insert(pos, index)
517
def iter_all_entries(self):
518
"""Iterate over all keys within the index
520
Duplicate keys across child indices are presumed to have the same
521
value and are only reported once.
523
:return: An iterable of (index, key, reference_lists, value).
524
There is no defined order for the result iteration - it will be in
525
the most efficient order for the index.
528
for index in self._indices:
529
for node in index.iter_all_entries():
530
if node[1] not in seen_keys:
532
seen_keys.add(node[1])
534
def iter_entries(self, keys):
535
"""Iterate over keys within the index.
537
Duplicate keys across child indices are presumed to have the same
538
value and are only reported once.
540
:param keys: An iterable providing the keys to be retrieved.
541
:return: An iterable of (index, key, reference_lists, value). There is no
542
defined order for the result iteration - it will be in the most
543
efficient order for the index.
546
for index in self._indices:
549
for node in index.iter_entries(keys):
553
def iter_entries_prefix(self, keys):
554
"""Iterate over keys within the index using prefix matching.
556
Duplicate keys across child indices are presumed to have the same
557
value and are only reported once.
559
Prefix matching is applied within the tuple of a key, not to within
560
the bytestring of each key element. e.g. if you have the keys ('foo',
561
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
562
only the former key is returned.
564
:param keys: An iterable providing the key prefixes to be retrieved.
565
Each key prefix takes the form of a tuple the length of a key, but
566
with the last N elements 'None' rather than a regular bytestring.
567
The first element cannot be 'None'.
568
:return: An iterable as per iter_all_entries, but restricted to the
569
keys with a matching prefix to those supplied. No additional keys
570
will be returned, and every match that is in the index will be
577
for index in self._indices:
578
for node in index.iter_entries_prefix(keys):
579
if node[1] in seen_keys:
581
seen_keys.add(node[1])
585
"""Return an estimate of the number of keys in this index.
587
For CombinedGraphIndex this is approximated by the sum of the keys of
588
the child indices. As child indices may have duplicate keys this can
589
have a maximum error of the number of child indices * largest number of
592
return sum((index.key_count() for index in self._indices), 0)
595
"""Validate that everything in the index can be accessed."""
596
for index in self._indices:
600
class InMemoryGraphIndex(GraphIndexBuilder):
601
"""A GraphIndex which operates entirely out of memory and is mutable.
603
This is designed to allow the accumulation of GraphIndex entries during a
604
single write operation, where the accumulated entries need to be immediately
605
available - for example via a CombinedGraphIndex.
608
def add_nodes(self, nodes):
609
"""Add nodes to the index.
611
:param nodes: An iterable of (key, node_refs, value) entries to add.
613
if self.reference_lists:
614
for (key, value, node_refs) in nodes:
615
self.add_node(key, value, node_refs)
617
for (key, value) in nodes:
618
self.add_node(key, value)
620
def iter_all_entries(self):
621
"""Iterate over all keys within the index
623
:return: An iterable of (index, key, reference_lists, value). There is no
624
defined order for the result iteration - it will be in the most
625
efficient order for the index (in this case dictionary hash order).
627
if 'evil' in debug.debug_flags:
628
trace.mutter_callsite(3,
629
"iter_all_entries scales with size of history.")
630
if self.reference_lists:
631
for key, (absent, references, value) in self._nodes.iteritems():
633
yield self, key, value, references
635
for key, (absent, references, value) in self._nodes.iteritems():
637
yield self, key, value
639
def iter_entries(self, keys):
640
"""Iterate over keys within the index.
642
:param keys: An iterable providing the keys to be retrieved.
643
:return: An iterable of (index, key, reference_lists, value). There is no
644
defined order for the result iteration - it will be in the most
645
efficient order for the index (keys iteration order in this case).
648
if self.reference_lists:
649
for key in keys.intersection(self._keys):
650
node = self._nodes[key]
652
yield self, key, node[2], node[1]
654
for key in keys.intersection(self._keys):
655
node = self._nodes[key]
657
yield self, key, node[2]
659
def iter_entries_prefix(self, keys):
660
"""Iterate over keys within the index using prefix matching.
662
Prefix matching is applied within the tuple of a key, not to within
663
the bytestring of each key element. e.g. if you have the keys ('foo',
664
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
665
only the former key is returned.
667
:param keys: An iterable providing the key prefixes to be retrieved.
668
Each key prefix takes the form of a tuple the length of a key, but
669
with the last N elements 'None' rather than a regular bytestring.
670
The first element cannot be 'None'.
671
:return: An iterable as per iter_all_entries, but restricted to the
672
keys with a matching prefix to those supplied. No additional keys
673
will be returned, and every match that is in the index will be
676
# XXX: To much duplication with the GraphIndex class; consider finding
677
# a good place to pull out the actual common logic.
681
if self._key_length == 1:
685
raise errors.BadIndexKey(key)
686
if len(key) != self._key_length:
687
raise errors.BadIndexKey(key)
688
node = self._nodes[key]
691
if self.reference_lists:
692
yield self, key, node[2], node[1]
694
yield self, key, node[2]
699
raise errors.BadIndexKey(key)
700
if len(key) != self._key_length:
701
raise errors.BadIndexKey(key)
702
# find what it refers to:
703
key_dict = self._nodes_by_key
705
# find the subdict to return
707
while len(elements) and elements[0] is not None:
708
key_dict = key_dict[elements[0]]
711
# a non-existant lookup.
716
key_dict = dicts.pop(-1)
717
# can't be empty or would not exist
718
item, value = key_dict.iteritems().next()
719
if type(value) == dict:
721
dicts.extend(key_dict.itervalues())
724
for value in key_dict.itervalues():
725
yield (self, ) + value
727
yield (self, ) + key_dict
730
"""Return an estimate of the number of keys in this index.
732
For InMemoryGraphIndex the estimate is exact.
734
return len(self._keys)
737
"""In memory index's have no known corruption at the moment."""
740
class GraphIndexPrefixAdapter(object):
741
"""An adapter between GraphIndex with different key lengths.
743
Queries against this will emit queries against the adapted Graph with the
744
prefix added, queries for all items use iter_entries_prefix. The returned
745
nodes will have their keys and node references adjusted to remove the
746
prefix. Finally, an add_nodes_callback can be supplied - when called the
747
nodes and references being added will have prefix prepended.
750
def __init__(self, adapted, prefix, missing_key_length,
751
add_nodes_callback=None):
752
"""Construct an adapter against adapted with prefix."""
753
self.adapted = adapted
754
self.prefix_key = prefix + (None,)*missing_key_length
756
self.prefix_len = len(prefix)
757
self.add_nodes_callback = add_nodes_callback
759
def add_nodes(self, nodes):
760
"""Add nodes to the index.
762
:param nodes: An iterable of (key, node_refs, value) entries to add.
764
# save nodes in case its an iterator
766
translated_nodes = []
768
# Add prefix_key to each reference node_refs is a tuple of tuples,
769
# so split it apart, and add prefix_key to the internal reference
770
for (key, value, node_refs) in nodes:
771
adjusted_references = (
772
tuple(tuple(self.prefix + ref_node for ref_node in ref_list)
773
for ref_list in node_refs))
774
translated_nodes.append((self.prefix + key, value,
775
adjusted_references))
777
# XXX: TODO add an explicit interface for getting the reference list
778
# status, to handle this bit of user-friendliness in the API more
780
for (key, value) in nodes:
781
translated_nodes.append((self.prefix + key, value))
782
self.add_nodes_callback(translated_nodes)
784
def add_node(self, key, value, references=()):
785
"""Add a node to the index.
787
:param key: The key. keys are non-empty tuples containing
788
as many whitespace-free utf8 bytestrings as the key length
789
defined for this index.
790
:param references: An iterable of iterables of keys. Each is a
791
reference to another key.
792
:param value: The value to associate with the key. It may be any
793
bytes as long as it does not contain \0 or \n.
795
self.add_nodes(((key, value, references), ))
797
def _strip_prefix(self, an_iter):
798
"""Strip prefix data from nodes and return it."""
801
if node[1][:self.prefix_len] != self.prefix:
802
raise errors.BadIndexData(self)
803
for ref_list in node[3]:
804
for ref_node in ref_list:
805
if ref_node[:self.prefix_len] != self.prefix:
806
raise errors.BadIndexData(self)
807
yield node[0], node[1][self.prefix_len:], node[2], (
808
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
809
for ref_list in node[3]))
811
def iter_all_entries(self):
812
"""Iterate over all keys within the index
814
iter_all_entries is implemented against the adapted index using
817
:return: An iterable of (index, key, reference_lists, value). There is no
818
defined order for the result iteration - it will be in the most
819
efficient order for the index (in this case dictionary hash order).
821
return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))
823
def iter_entries(self, keys):
824
"""Iterate over keys within the index.
826
:param keys: An iterable providing the keys to be retrieved.
827
:return: An iterable of (key, reference_lists, value). There is no
828
defined order for the result iteration - it will be in the most
829
efficient order for the index (keys iteration order in this case).
831
return self._strip_prefix(self.adapted.iter_entries(
832
self.prefix + key for key in keys))
834
def iter_entries_prefix(self, keys):
835
"""Iterate over keys within the index using prefix matching.
837
Prefix matching is applied within the tuple of a key, not to within
838
the bytestring of each key element. e.g. if you have the keys ('foo',
839
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
840
only the former key is returned.
842
:param keys: An iterable providing the key prefixes to be retrieved.
843
Each key prefix takes the form of a tuple the length of a key, but
844
with the last N elements 'None' rather than a regular bytestring.
845
The first element cannot be 'None'.
846
:return: An iterable as per iter_all_entries, but restricted to the
847
keys with a matching prefix to those supplied. No additional keys
848
will be returned, and every match that is in the index will be
851
return self._strip_prefix(self.adapted.iter_entries_prefix(
852
self.prefix + key for key in keys))
855
"""Return an estimate of the number of keys in this index.
857
For GraphIndexPrefixAdapter this is relatively expensive - key
858
iteration with the prefix is done.
860
return len(list(self.iter_all_entries()))
863
"""Call the adapted's validate."""
864
self.adapted.validate()