1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Indexing facilities."""
23
'GraphIndexPrefixAdapter',
27
from cStringIO import StringIO
30
from bzrlib.lazy_import import lazy_import
31
lazy_import(globals(), """
32
from bzrlib import trace
33
from bzrlib.trace import mutter
35
from bzrlib import debug, errors
37
_OPTION_KEY_ELEMENTS = "key_elements="
39
_OPTION_NODE_REFS = "node_ref_lists="
40
_SIGNATURE = "Bazaar Graph Index 1\n"
43
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
44
_newline_null_re = re.compile('[\n\0]')
47
class GraphIndexBuilder(object):
48
"""A builder that can build a GraphIndex.
50
The resulting graph has the structure:
52
_SIGNATURE OPTIONS NODES NEWLINE
53
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
54
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
56
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
57
KEY := Not-whitespace-utf8
59
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
60
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
61
REFERENCE := DIGITS ; digits is the byte offset in the index of the
63
VALUE := no-newline-no-null-bytes
66
def __init__(self, reference_lists=0, key_elements=1):
67
"""Create a GraphIndex builder.
69
:param reference_lists: The number of node references lists for each
71
:param key_elements: The number of bytestrings in each key.
73
self.reference_lists = reference_lists
76
self._nodes_by_key = {}
77
self._key_length = key_elements
79
def _check_key(self, key):
80
"""Raise BadIndexKey if key is not a valid key for this index."""
81
if type(key) != tuple:
82
raise errors.BadIndexKey(key)
83
if self._key_length != len(key):
84
raise errors.BadIndexKey(key)
86
if not element or _whitespace_re.search(element) is not None:
87
raise errors.BadIndexKey(element)
89
def add_node(self, key, value, references=()):
90
"""Add a node to the index.
92
:param key: The key. keys are non-empty tuples containing
93
as many whitespace-free utf8 bytestrings as the key length
94
defined for this index.
95
:param references: An iterable of iterables of keys. Each is a
96
reference to another key.
97
:param value: The value to associate with the key. It may be any
98
bytes as long as it does not contain \0 or \n.
101
if _newline_null_re.search(value) is not None:
102
raise errors.BadIndexValue(value)
103
if len(references) != self.reference_lists:
104
raise errors.BadIndexValue(references)
106
for reference_list in references:
107
for reference in reference_list:
108
self._check_key(reference)
109
if reference not in self._nodes:
110
self._nodes[reference] = ('a', (), '')
111
node_refs.append(tuple(reference_list))
112
if key in self._nodes and self._nodes[key][0] == '':
113
raise errors.BadIndexDuplicateKey(key, self)
114
self._nodes[key] = ('', tuple(node_refs), value)
116
if self._key_length > 1:
117
key_dict = self._nodes_by_key
118
if self.reference_lists:
119
key_value = key, value, tuple(node_refs)
121
key_value = key, value
122
# possibly should do this on-demand, but it seems likely it is
124
# For a key of (foo, bar, baz) create
125
# _nodes_by_key[foo][bar][baz] = key_value
126
for subkey in key[:-1]:
127
key_dict = key_dict.setdefault(subkey, {})
128
key_dict[key[-1]] = key_value
132
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
133
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
134
lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')
135
prefix_length = sum(len(x) for x in lines)
136
# references are byte offsets. To avoid having to do nasty
137
# polynomial work to resolve offsets (references to later in the
138
# file cannot be determined until all the inbetween references have
139
# been calculated too) we pad the offsets with 0's to make them be
140
# of consistent length. Using binary offsets would break the trivial
142
# to calculate the width of zero's needed we do three passes:
143
# one to gather all the non-reference data and the number of references.
144
# one to pad all the data with reference-length and determine entry
148
# forward sorted by key. In future we may consider topological sorting,
149
# at the cost of table scans for direct lookup, or a second index for
151
nodes = sorted(self._nodes.items())
152
# if we do not prepass, we don't know how long it will be up front.
153
expected_bytes = None
154
# we only need to pre-pass if we have reference lists at all.
155
if self.reference_lists:
157
non_ref_bytes = prefix_length
159
# TODO use simple multiplication for the constants in this loop.
160
for key, (absent, references, value) in nodes:
161
# record the offset known *so far* for this key:
162
# the non reference bytes to date, and the total references to
163
# date - saves reaccumulating on the second pass
164
key_offset_info.append((key, non_ref_bytes, total_references))
165
# key is literal, value is literal, there are 3 null's, 1 NL
166
# key is variable length tuple, \x00 between elements
167
non_ref_bytes += sum(len(element) for element in key)
168
if self._key_length > 1:
169
non_ref_bytes += self._key_length - 1
170
# value is literal bytes, there are 3 null's, 1 NL.
171
non_ref_bytes += len(value) + 3 + 1
172
# one byte for absent if set.
175
elif self.reference_lists:
176
# (ref_lists -1) tabs
177
non_ref_bytes += self.reference_lists - 1
178
# (ref-1 cr's per ref_list)
179
for ref_list in references:
180
# how many references across the whole file?
181
total_references += len(ref_list)
182
# accrue reference separators
184
non_ref_bytes += len(ref_list) - 1
185
# how many digits are needed to represent the total byte count?
187
possible_total_bytes = non_ref_bytes + total_references*digits
188
while 10 ** digits < possible_total_bytes:
190
possible_total_bytes = non_ref_bytes + total_references*digits
191
expected_bytes = possible_total_bytes + 1 # terminating newline
192
# resolve key addresses.
194
for key, non_ref_bytes, total_references in key_offset_info:
195
key_addresses[key] = non_ref_bytes + total_references*digits
197
format_string = '%%0%sd' % digits
198
for key, (absent, references, value) in nodes:
199
flattened_references = []
200
for ref_list in references:
202
for reference in ref_list:
203
ref_addresses.append(format_string % key_addresses[reference])
204
flattened_references.append('\r'.join(ref_addresses))
205
string_key = '\x00'.join(key)
206
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
207
'\t'.join(flattened_references), value))
209
result = StringIO(''.join(lines))
210
if expected_bytes and len(result.getvalue()) != expected_bytes:
211
raise errors.BzrError('Failed index creation. Internal error:'
212
' mismatched output length and expected length: %d %d' %
213
(len(result.getvalue()), expected_bytes))
214
return StringIO(''.join(lines))
217
class GraphIndex(object):
218
"""An index for data with embedded graphs.
220
The index maps keys to a list of key reference lists, and a value.
221
Each node has the same number of key reference lists. Each key reference
222
list can be empty or an arbitrary length. The value is an opaque NULL
223
terminated string without any newlines. The storage of the index is
224
hidden in the interface: keys and key references are always tuples of
225
bytestrings, never the internal representation (e.g. dictionary offsets).
227
It is presumed that the index will not be mutated - it is static data.
229
Successive iter_all_entries calls will read the entire index each time.
230
Additionally, iter_entries calls will read the index linearly until the
231
desired keys are found. XXX: This must be fixed before the index is
232
suitable for production use. :XXX
235
def __init__(self, transport, name, size):
236
"""Open an index called name on transport.
238
:param transport: A bzrlib.transport.Transport.
239
:param name: A path to provide to transport API calls.
240
:param size: The size of the index in bytes. This is used for bisection
241
logic to perform partial index reads. While the size could be
242
obtained by statting the file this introduced an additional round
243
trip that is avoided by having it supplied.
245
self._transport = transport
248
self._parsed_byte_map = []
249
self._key_count = None
250
self._keys_by_offset = None
251
self._nodes_by_key = None
254
def _buffer_all(self):
255
"""Buffer all the index data.
257
Mutates self._nodes and self.keys_by_offset.
259
if 'index' in debug.debug_flags:
260
mutter('Reading entire index %s', self._transport.abspath(self._name))
261
stream = self._transport.get(self._name)
262
self._read_prefix(stream)
263
expected_elements = 3 + self._key_length
265
# raw data keyed by offset
266
self._keys_by_offset = {}
267
# ready-to-return key:value or key:value, node_ref_lists
269
self._nodes_by_key = {}
272
for line in stream.readlines():
276
elements = line.split('\0')
277
if len(elements) != expected_elements:
278
raise errors.BadIndexData(self)
280
key = tuple(elements[:self._key_length])
281
absent, references, value = elements[-3:]
282
value = value[:-1] # remove the newline
284
for ref_string in references.split('\t'):
285
ref_lists.append(tuple([
286
int(ref) for ref in ref_string.split('\r') if ref
288
ref_lists = tuple(ref_lists)
289
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
291
for key, absent, references, value in self._keys_by_offset.itervalues():
294
# resolve references:
295
if self.node_ref_lists:
297
for ref_list in references:
298
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
299
node_value = (value, tuple(node_refs))
302
self._nodes[key] = node_value
303
if self._key_length > 1:
304
subkey = list(reversed(key[:-1]))
305
key_dict = self._nodes_by_key
306
if self.node_ref_lists:
307
key_value = key, node_value[0], node_value[1]
309
key_value = key, node_value
310
# possibly should do this on-demand, but it seems likely it is
312
# For a key of (foo, bar, baz) create
313
# _nodes_by_key[foo][bar][baz] = key_value
314
for subkey in key[:-1]:
315
key_dict = key_dict.setdefault(subkey, {})
316
key_dict[key[-1]] = key_value
317
# cache the keys for quick set intersections
318
self._keys = set(self._nodes)
320
# there must be one line - the empty trailer line.
321
raise errors.BadIndexData(self)
323
def iter_all_entries(self):
324
"""Iterate over all keys within the index.
326
:return: An iterable of (index, key, value) or (index, key, value, reference_lists).
327
The former tuple is used when there are no reference lists in the
328
index, making the API compatible with simple key:value index types.
329
There is no defined order for the result iteration - it will be in
330
the most efficient order for the index.
332
if 'evil' in debug.debug_flags:
333
trace.mutter_callsite(3,
334
"iter_all_entries scales with size of history.")
335
if self._nodes is None:
337
if self.node_ref_lists:
338
for key, (value, node_ref_lists) in self._nodes.iteritems():
339
yield self, key, value, node_ref_lists
341
for key, value in self._nodes.iteritems():
342
yield self, key, value
344
def _read_prefix(self, stream):
345
signature = stream.read(len(self._signature()))
346
if not signature == self._signature():
347
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
348
options_line = stream.readline()
349
if not options_line.startswith(_OPTION_NODE_REFS):
350
raise errors.BadIndexOptions(self)
352
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
354
raise errors.BadIndexOptions(self)
355
options_line = stream.readline()
356
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
357
raise errors.BadIndexOptions(self)
359
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
361
raise errors.BadIndexOptions(self)
362
options_line = stream.readline()
363
if not options_line.startswith(_OPTION_LEN):
364
raise errors.BadIndexOptions(self)
366
self._key_count = int(options_line[len(_OPTION_LEN):-1])
368
raise errors.BadIndexOptions(self)
370
def iter_entries(self, keys):
371
"""Iterate over keys within the index.
373
:param keys: An iterable providing the keys to be retrieved.
374
:return: An iterable as per iter_all_entries, but restricted to the
375
keys supplied. No additional keys will be returned, and every
376
key supplied that is in the index will be returned.
381
if self._nodes is None:
383
keys = keys.intersection(self._keys)
384
if self.node_ref_lists:
386
value, node_refs = self._nodes[key]
387
yield self, key, value, node_refs
390
yield self, key, self._nodes[key]
392
def iter_entries_prefix(self, keys):
393
"""Iterate over keys within the index using prefix matching.
395
Prefix matching is applied within the tuple of a key, not to within
396
the bytestring of each key element. e.g. if you have the keys ('foo',
397
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
398
only the former key is returned.
400
:param keys: An iterable providing the key prefixes to be retrieved.
401
Each key prefix takes the form of a tuple the length of a key, but
402
with the last N elements 'None' rather than a regular bytestring.
403
The first element cannot be 'None'.
404
:return: An iterable as per iter_all_entries, but restricted to the
405
keys with a matching prefix to those supplied. No additional keys
406
will be returned, and every match that is in the index will be
412
# load data - also finds key lengths
413
if self._nodes is None:
415
if self._key_length == 1:
419
raise errors.BadIndexKey(key)
420
if len(key) != self._key_length:
421
raise errors.BadIndexKey(key)
422
if self.node_ref_lists:
423
value, node_refs = self._nodes[key]
424
yield self, key, value, node_refs
426
yield self, key, self._nodes[key]
431
raise errors.BadIndexKey(key)
432
if len(key) != self._key_length:
433
raise errors.BadIndexKey(key)
434
# find what it refers to:
435
key_dict = self._nodes_by_key
437
# find the subdict whose contents should be returned.
439
while len(elements) and elements[0] is not None:
440
key_dict = key_dict[elements[0]]
443
# a non-existant lookup.
448
key_dict = dicts.pop(-1)
449
# can't be empty or would not exist
450
item, value = key_dict.iteritems().next()
451
if type(value) == dict:
453
dicts.extend(key_dict.itervalues())
456
for value in key_dict.itervalues():
457
# each value is the key:value:node refs tuple
459
yield (self, ) + value
461
# the last thing looked up was a terminal element
462
yield (self, ) + key_dict
465
"""Return an estimate of the number of keys in this index.
467
For GraphIndex the estimate is exact.
469
if self._key_count is None:
470
# really this should just read the prefix
472
return self._key_count
474
def _signature(self):
475
"""The file signature for this index type."""
479
"""Validate that everything in the index can be accessed."""
480
# iter_all validates completely at the moment, so just do that.
481
for node in self.iter_all_entries():
485
class CombinedGraphIndex(object):
486
"""A GraphIndex made up from smaller GraphIndices.
488
The backing indices must implement GraphIndex, and are presumed to be
491
Queries against the combined index will be made against the first index,
492
and then the second and so on. The order of index's can thus influence
493
performance significantly. For example, if one index is on local disk and a
494
second on a remote server, the local disk index should be before the other
498
def __init__(self, indices):
499
"""Create a CombinedGraphIndex backed by indices.
501
:param indices: An ordered list of indices to query for data.
503
self._indices = indices
507
self.__class__.__name__,
508
', '.join(map(repr, self._indices)))
510
def insert_index(self, pos, index):
511
"""Insert a new index in the list of indices to query.
513
:param pos: The position to insert the index.
514
:param index: The index to insert.
516
self._indices.insert(pos, index)
518
def iter_all_entries(self):
519
"""Iterate over all keys within the index
521
Duplicate keys across child indices are presumed to have the same
522
value and are only reported once.
524
:return: An iterable of (index, key, reference_lists, value).
525
There is no defined order for the result iteration - it will be in
526
the most efficient order for the index.
529
for index in self._indices:
530
for node in index.iter_all_entries():
531
if node[1] not in seen_keys:
533
seen_keys.add(node[1])
535
def iter_entries(self, keys):
536
"""Iterate over keys within the index.
538
Duplicate keys across child indices are presumed to have the same
539
value and are only reported once.
541
:param keys: An iterable providing the keys to be retrieved.
542
:return: An iterable of (index, key, reference_lists, value). There is no
543
defined order for the result iteration - it will be in the most
544
efficient order for the index.
547
for index in self._indices:
550
for node in index.iter_entries(keys):
554
def iter_entries_prefix(self, keys):
555
"""Iterate over keys within the index using prefix matching.
557
Duplicate keys across child indices are presumed to have the same
558
value and are only reported once.
560
Prefix matching is applied within the tuple of a key, not to within
561
the bytestring of each key element. e.g. if you have the keys ('foo',
562
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
563
only the former key is returned.
565
:param keys: An iterable providing the key prefixes to be retrieved.
566
Each key prefix takes the form of a tuple the length of a key, but
567
with the last N elements 'None' rather than a regular bytestring.
568
The first element cannot be 'None'.
569
:return: An iterable as per iter_all_entries, but restricted to the
570
keys with a matching prefix to those supplied. No additional keys
571
will be returned, and every match that is in the index will be
578
for index in self._indices:
579
for node in index.iter_entries_prefix(keys):
580
if node[1] in seen_keys:
582
seen_keys.add(node[1])
586
"""Return an estimate of the number of keys in this index.
588
For CombinedGraphIndex this is approximated by the sum of the keys of
589
the child indices. As child indices may have duplicate keys this can
590
have a maximum error of the number of child indices * largest number of
593
return sum((index.key_count() for index in self._indices), 0)
596
"""Validate that everything in the index can be accessed."""
597
for index in self._indices:
601
class InMemoryGraphIndex(GraphIndexBuilder):
602
"""A GraphIndex which operates entirely out of memory and is mutable.
604
This is designed to allow the accumulation of GraphIndex entries during a
605
single write operation, where the accumulated entries need to be immediately
606
available - for example via a CombinedGraphIndex.
609
def add_nodes(self, nodes):
610
"""Add nodes to the index.
612
:param nodes: An iterable of (key, node_refs, value) entries to add.
614
if self.reference_lists:
615
for (key, value, node_refs) in nodes:
616
self.add_node(key, value, node_refs)
618
for (key, value) in nodes:
619
self.add_node(key, value)
621
def iter_all_entries(self):
622
"""Iterate over all keys within the index
624
:return: An iterable of (index, key, reference_lists, value). There is no
625
defined order for the result iteration - it will be in the most
626
efficient order for the index (in this case dictionary hash order).
628
if 'evil' in debug.debug_flags:
629
trace.mutter_callsite(3,
630
"iter_all_entries scales with size of history.")
631
if self.reference_lists:
632
for key, (absent, references, value) in self._nodes.iteritems():
634
yield self, key, value, references
636
for key, (absent, references, value) in self._nodes.iteritems():
638
yield self, key, value
640
def iter_entries(self, keys):
641
"""Iterate over keys within the index.
643
:param keys: An iterable providing the keys to be retrieved.
644
:return: An iterable of (index, key, reference_lists, value). There is no
645
defined order for the result iteration - it will be in the most
646
efficient order for the index (keys iteration order in this case).
649
if self.reference_lists:
650
for key in keys.intersection(self._keys):
651
node = self._nodes[key]
653
yield self, key, node[2], node[1]
655
for key in keys.intersection(self._keys):
656
node = self._nodes[key]
658
yield self, key, node[2]
660
def iter_entries_prefix(self, keys):
661
"""Iterate over keys within the index using prefix matching.
663
Prefix matching is applied within the tuple of a key, not to within
664
the bytestring of each key element. e.g. if you have the keys ('foo',
665
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
666
only the former key is returned.
668
:param keys: An iterable providing the key prefixes to be retrieved.
669
Each key prefix takes the form of a tuple the length of a key, but
670
with the last N elements 'None' rather than a regular bytestring.
671
The first element cannot be 'None'.
672
:return: An iterable as per iter_all_entries, but restricted to the
673
keys with a matching prefix to those supplied. No additional keys
674
will be returned, and every match that is in the index will be
677
# XXX: To much duplication with the GraphIndex class; consider finding
678
# a good place to pull out the actual common logic.
682
if self._key_length == 1:
686
raise errors.BadIndexKey(key)
687
if len(key) != self._key_length:
688
raise errors.BadIndexKey(key)
689
node = self._nodes[key]
692
if self.reference_lists:
693
yield self, key, node[2], node[1]
695
yield self, key, node[2]
700
raise errors.BadIndexKey(key)
701
if len(key) != self._key_length:
702
raise errors.BadIndexKey(key)
703
# find what it refers to:
704
key_dict = self._nodes_by_key
706
# find the subdict to return
708
while len(elements) and elements[0] is not None:
709
key_dict = key_dict[elements[0]]
712
# a non-existant lookup.
717
key_dict = dicts.pop(-1)
718
# can't be empty or would not exist
719
item, value = key_dict.iteritems().next()
720
if type(value) == dict:
722
dicts.extend(key_dict.itervalues())
725
for value in key_dict.itervalues():
726
yield (self, ) + value
728
yield (self, ) + key_dict
731
"""Return an estimate of the number of keys in this index.
733
For InMemoryGraphIndex the estimate is exact.
735
return len(self._keys)
738
"""In memory index's have no known corruption at the moment."""
741
class GraphIndexPrefixAdapter(object):
742
"""An adapter between GraphIndex with different key lengths.
744
Queries against this will emit queries against the adapted Graph with the
745
prefix added, queries for all items use iter_entries_prefix. The returned
746
nodes will have their keys and node references adjusted to remove the
747
prefix. Finally, an add_nodes_callback can be supplied - when called the
748
nodes and references being added will have prefix prepended.
751
def __init__(self, adapted, prefix, missing_key_length,
752
add_nodes_callback=None):
753
"""Construct an adapter against adapted with prefix."""
754
self.adapted = adapted
755
self.prefix_key = prefix + (None,)*missing_key_length
757
self.prefix_len = len(prefix)
758
self.add_nodes_callback = add_nodes_callback
760
def add_nodes(self, nodes):
761
"""Add nodes to the index.
763
:param nodes: An iterable of (key, node_refs, value) entries to add.
765
# save nodes in case its an iterator
767
translated_nodes = []
769
# Add prefix_key to each reference node_refs is a tuple of tuples,
770
# so split it apart, and add prefix_key to the internal reference
771
for (key, value, node_refs) in nodes:
772
adjusted_references = (
773
tuple(tuple(self.prefix + ref_node for ref_node in ref_list)
774
for ref_list in node_refs))
775
translated_nodes.append((self.prefix + key, value,
776
adjusted_references))
778
# XXX: TODO add an explicit interface for getting the reference list
779
# status, to handle this bit of user-friendliness in the API more
781
for (key, value) in nodes:
782
translated_nodes.append((self.prefix + key, value))
783
self.add_nodes_callback(translated_nodes)
785
def add_node(self, key, value, references=()):
786
"""Add a node to the index.
788
:param key: The key. keys are non-empty tuples containing
789
as many whitespace-free utf8 bytestrings as the key length
790
defined for this index.
791
:param references: An iterable of iterables of keys. Each is a
792
reference to another key.
793
:param value: The value to associate with the key. It may be any
794
bytes as long as it does not contain \0 or \n.
796
self.add_nodes(((key, value, references), ))
798
def _strip_prefix(self, an_iter):
799
"""Strip prefix data from nodes and return it."""
802
if node[1][:self.prefix_len] != self.prefix:
803
raise errors.BadIndexData(self)
804
for ref_list in node[3]:
805
for ref_node in ref_list:
806
if ref_node[:self.prefix_len] != self.prefix:
807
raise errors.BadIndexData(self)
808
yield node[0], node[1][self.prefix_len:], node[2], (
809
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
810
for ref_list in node[3]))
812
def iter_all_entries(self):
813
"""Iterate over all keys within the index
815
iter_all_entries is implemented against the adapted index using
818
:return: An iterable of (index, key, reference_lists, value). There is no
819
defined order for the result iteration - it will be in the most
820
efficient order for the index (in this case dictionary hash order).
822
return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))
824
def iter_entries(self, keys):
825
"""Iterate over keys within the index.
827
:param keys: An iterable providing the keys to be retrieved.
828
:return: An iterable of (key, reference_lists, value). There is no
829
defined order for the result iteration - it will be in the most
830
efficient order for the index (keys iteration order in this case).
832
return self._strip_prefix(self.adapted.iter_entries(
833
self.prefix + key for key in keys))
835
def iter_entries_prefix(self, keys):
836
"""Iterate over keys within the index using prefix matching.
838
Prefix matching is applied within the tuple of a key, not to within
839
the bytestring of each key element. e.g. if you have the keys ('foo',
840
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
841
only the former key is returned.
843
:param keys: An iterable providing the key prefixes to be retrieved.
844
Each key prefix takes the form of a tuple the length of a key, but
845
with the last N elements 'None' rather than a regular bytestring.
846
The first element cannot be 'None'.
847
:return: An iterable as per iter_all_entries, but restricted to the
848
keys with a matching prefix to those supplied. No additional keys
849
will be returned, and every match that is in the index will be
852
return self._strip_prefix(self.adapted.iter_entries_prefix(
853
self.prefix + key for key in keys))
856
"""Return an estimate of the number of keys in this index.
858
For GraphIndexPrefixAdapter this is relatively expensive - key
859
iteration with the prefix is done.
861
return len(list(self.iter_all_entries()))
864
"""Call the adapted's validate."""
865
self.adapted.validate()