bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
1  | 
# Copyright (C) 2007 Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
16  | 
||
17  | 
"""Indexing facilities."""
 | 
|
18  | 
||
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
19  | 
__all__ = [  | 
20  | 
'CombinedGraphIndex',  | 
|
21  | 
'GraphIndex',  | 
|
22  | 
'GraphIndexBuilder',  | 
|
23  | 
'InMemoryGraphIndex',  | 
|
24  | 
    ]
 | 
|
| 
2592.1.32
by Robert Collins
 Add __all__ to index.  | 
25  | 
|
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
26  | 
from cStringIO import StringIO  | 
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
27  | 
import re  | 
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
28  | 
|
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
29  | 
from bzrlib import errors  | 
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
30  | 
|
| 
2624.2.8
by Robert Collins
 Explicitly mark the number of keys elements in use in GraphIndex files.  | 
31  | 
_OPTION_KEY_ELEMENTS = "key_elements="  | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
32  | 
_OPTION_NODE_REFS = "node_ref_lists="  | 
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
33  | 
_SIGNATURE = "Bazaar Graph Index 1\n"  | 
34  | 
||
35  | 
||
| 
2592.1.14
by Robert Collins
 Detect bad reference key values.  | 
36  | 
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')  | 
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
37  | 
_newline_null_re = re.compile('[\n\0]')  | 
38  | 
||
39  | 
||
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
40  | 
class GraphIndexBuilder(object):  | 
| 
2592.1.18
by Robert Collins
 Add space to mark absent nodes.  | 
41  | 
"""A builder that can build a GraphIndex.  | 
42  | 
    
 | 
|
43  | 
    The resulting graph has the structure:
 | 
|
44  | 
    
 | 
|
45  | 
    _SIGNATURE OPTIONS NODES NEWLINE
 | 
|
46  | 
    _SIGNATURE     := 'Bazaar Graph Index 1' NEWLINE
 | 
|
47  | 
    OPTIONS        := 'node_ref_lists=' DIGITS NEWLINE
 | 
|
48  | 
    NODES          := NODE*
 | 
|
49  | 
    NODE           := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
 | 
|
50  | 
    KEY            := Not-whitespace-utf8
 | 
|
51  | 
    ABSENT         := 'a'
 | 
|
| 
2592.1.19
by Robert Collins
 Node references are tab separated.  | 
52  | 
    REFERENCES     := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
 | 
53  | 
    REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
 | 
|
54  | 
    REFERENCE      := DIGITS  ; digits is the byte offset in the index of the
 | 
|
55  | 
                              ; referenced key.
 | 
|
| 
2592.1.18
by Robert Collins
 Add space to mark absent nodes.  | 
56  | 
    VALUE          := no-newline-no-null-bytes
 | 
57  | 
    """
 | 
|
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
58  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
59  | 
def __init__(self, reference_lists=0, key_elements=1):  | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
60  | 
"""Create a GraphIndex builder.  | 
61  | 
||
62  | 
        :param reference_lists: The number of node references lists for each
 | 
|
63  | 
            entry.
 | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
64  | 
        :param key_elements: The number of bytestrings in each key.
 | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
65  | 
        """
 | 
66  | 
self.reference_lists = reference_lists  | 
|
| 
2592.1.15
by Robert Collins
 Detect duplicate key insertion.  | 
67  | 
self._nodes = {}  | 
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
68  | 
self._nodes_by_key = {}  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
69  | 
self._key_length = key_elements  | 
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
70  | 
|
71  | 
def _check_key(self, key):  | 
|
72  | 
"""Raise BadIndexKey if key is not a valid key for this index."""  | 
|
73  | 
if type(key) != tuple:  | 
|
74  | 
raise errors.BadIndexKey(key)  | 
|
75  | 
if self._key_length != len(key):  | 
|
76  | 
raise errors.BadIndexKey(key)  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
77  | 
for element in key:  | 
78  | 
if not element or _whitespace_re.search(element) is not None:  | 
|
79  | 
raise errors.BadIndexKey(element)  | 
|
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
80  | 
|
| 
2592.1.46
by Robert Collins
 Make GraphIndex accept nodes as key, value, references, so that the method  | 
81  | 
def add_node(self, key, value, references=()):  | 
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
82  | 
"""Add a node to the index.  | 
83  | 
||
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
84  | 
        :param key: The key. keys are non-empty tuples containing
 | 
85  | 
            as many whitespace-free utf8 bytestrings as the key length
 | 
|
86  | 
            defined for this index.
 | 
|
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
87  | 
        :param references: An iterable of iterables of keys. Each is a
 | 
88  | 
            reference to another key.
 | 
|
89  | 
        :param value: The value to associate with the key. It may be any
 | 
|
90  | 
            bytes as long as it does not contain \0 or \n.
 | 
|
91  | 
        """
 | 
|
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
92  | 
self._check_key(key)  | 
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
93  | 
if _newline_null_re.search(value) is not None:  | 
94  | 
raise errors.BadIndexValue(value)  | 
|
| 
2592.1.13
by Robert Collins
 Handle mismatched numbers of reference lists.  | 
95  | 
if len(references) != self.reference_lists:  | 
96  | 
raise errors.BadIndexValue(references)  | 
|
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
97  | 
node_refs = []  | 
| 
2592.1.14
by Robert Collins
 Detect bad reference key values.  | 
98  | 
for reference_list in references:  | 
99  | 
for reference in reference_list:  | 
|
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
100  | 
self._check_key(reference)  | 
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
101  | 
if reference not in self._nodes:  | 
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
102  | 
self._nodes[reference] = ('a', (), '')  | 
103  | 
node_refs.append(tuple(reference_list))  | 
|
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
104  | 
if key in self._nodes and self._nodes[key][0] == '':  | 
| 
2592.1.15
by Robert Collins
 Detect duplicate key insertion.  | 
105  | 
raise errors.BadIndexDuplicateKey(key, self)  | 
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
106  | 
self._nodes[key] = ('', tuple(node_refs), value)  | 
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
107  | 
if self._key_length > 1:  | 
108  | 
key_dict = self._nodes_by_key  | 
|
109  | 
if self.reference_lists:  | 
|
110  | 
key_value = key, value, tuple(node_refs)  | 
|
111  | 
else:  | 
|
112  | 
key_value = key, value  | 
|
113  | 
            # possibly should do this on-demand, but it seems likely it is 
 | 
|
114  | 
            # always wanted
 | 
|
| 
2624.2.11
by Robert Collins
 Review comments.  | 
115  | 
            # For a key of (foo, bar, baz) create
 | 
116  | 
            # _nodes_by_key[foo][bar][baz] = key_value
 | 
|
117  | 
for subkey in key[:-1]:  | 
|
118  | 
key_dict = key_dict.setdefault(subkey, {})  | 
|
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
119  | 
key_dict[key[-1]] = key_value  | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
120  | 
|
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
121  | 
def finish(self):  | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
122  | 
lines = [_SIGNATURE]  | 
123  | 
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')  | 
|
| 
2624.2.8
by Robert Collins
 Explicitly mark the number of keys elements in use in GraphIndex files.  | 
124  | 
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')  | 
| 
2624.2.11
by Robert Collins
 Review comments.  | 
125  | 
prefix_length = sum(len(x) for x in lines)  | 
| 
2592.1.22
by Robert Collins
 Node references are byte offsets.  | 
126  | 
        # references are byte offsets. To avoid having to do nasty
 | 
127  | 
        # polynomial work to resolve offsets (references to later in the 
 | 
|
128  | 
        # file cannot be determined until all the inbetween references have
 | 
|
129  | 
        # been calculated too) we pad the offsets with 0's to make them be
 | 
|
130  | 
        # of consistent length. Using binary offsets would break the trivial
 | 
|
131  | 
        # file parsing.
 | 
|
132  | 
        # to calculate the width of zero's needed we do three passes:
 | 
|
133  | 
        # one to gather all the non-reference data and the number of references.
 | 
|
134  | 
        # one to pad all the data with reference-length and determine entry
 | 
|
135  | 
        # addresses.
 | 
|
136  | 
        # One to serialise.
 | 
|
| 
2592.1.40
by Robert Collins
 Reverse index ordering - we do not have date prefixed revids.  | 
137  | 
|
138  | 
        # forward sorted by key. In future we may consider topological sorting,
 | 
|
139  | 
        # at the cost of table scans for direct lookup, or a second index for
 | 
|
140  | 
        # direct lookup
 | 
|
141  | 
nodes = sorted(self._nodes.items())  | 
|
| 
2592.1.42
by Robert Collins
 Check the index length is as expected, when we have done preprocessing.  | 
142  | 
        # if we do not prepass, we don't know how long it will be up front.
 | 
143  | 
expected_bytes = None  | 
|
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
144  | 
        # we only need to pre-pass if we have reference lists at all.
 | 
145  | 
if self.reference_lists:  | 
|
| 
2592.1.41
by Robert Collins
 Remove duplication in the index serialisation logic with John's suggestion.  | 
146  | 
key_offset_info = []  | 
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
147  | 
non_ref_bytes = prefix_length  | 
148  | 
total_references = 0  | 
|
149  | 
            # TODO use simple multiplication for the constants in this loop.
 | 
|
150  | 
for key, (absent, references, value) in nodes:  | 
|
| 
2592.1.41
by Robert Collins
 Remove duplication in the index serialisation logic with John's suggestion.  | 
151  | 
                # record the offset known *so far* for this key:
 | 
152  | 
                # the non reference bytes to date, and the total references to
 | 
|
153  | 
                # date - saves reaccumulating on the second pass
 | 
|
154  | 
key_offset_info.append((key, non_ref_bytes, total_references))  | 
|
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
155  | 
                # key is literal, value is literal, there are 3 null's, 1 NL
 | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
156  | 
                # key is variable length tuple, \x00 between elements
 | 
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
157  | 
non_ref_bytes += sum(len(element) for element in key)  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
158  | 
if self._key_length > 1:  | 
159  | 
non_ref_bytes += self._key_length - 1  | 
|
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
160  | 
                # value is literal bytes, there are 3 null's, 1 NL.
 | 
161  | 
non_ref_bytes += len(value) + 3 + 1  | 
|
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
162  | 
                # one byte for absent if set.
 | 
163  | 
if absent:  | 
|
164  | 
non_ref_bytes += 1  | 
|
| 
2592.1.36
by Robert Collins
 Bugfix incorrect offset generation when an absent record is before a referenced record.  | 
165  | 
elif self.reference_lists:  | 
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
166  | 
                    # (ref_lists -1) tabs
 | 
167  | 
non_ref_bytes += self.reference_lists - 1  | 
|
168  | 
                    # (ref-1 cr's per ref_list)
 | 
|
169  | 
for ref_list in references:  | 
|
170  | 
                        # how many references across the whole file?
 | 
|
171  | 
total_references += len(ref_list)  | 
|
172  | 
                        # accrue reference separators
 | 
|
173  | 
if ref_list:  | 
|
174  | 
non_ref_bytes += len(ref_list) - 1  | 
|
175  | 
            # how many digits are needed to represent the total byte count?
 | 
|
176  | 
digits = 1  | 
|
| 
2592.1.22
by Robert Collins
 Node references are byte offsets.  | 
177  | 
possible_total_bytes = non_ref_bytes + total_references*digits  | 
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
178  | 
while 10 ** digits < possible_total_bytes:  | 
179  | 
digits += 1  | 
|
180  | 
possible_total_bytes = non_ref_bytes + total_references*digits  | 
|
| 
2592.1.42
by Robert Collins
 Check the index length is as expected, when we have done preprocessing.  | 
181  | 
expected_bytes = possible_total_bytes + 1 # terminating newline  | 
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
182  | 
            # resolve key addresses.
 | 
183  | 
key_addresses = {}  | 
|
| 
2592.1.41
by Robert Collins
 Remove duplication in the index serialisation logic with John's suggestion.  | 
184  | 
for key, non_ref_bytes, total_references in key_offset_info:  | 
185  | 
key_addresses[key] = non_ref_bytes + total_references*digits  | 
|
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
186  | 
            # serialise
 | 
187  | 
format_string = '%%0%sd' % digits  | 
|
188  | 
for key, (absent, references, value) in nodes:  | 
|
| 
2592.1.19
by Robert Collins
 Node references are tab separated.  | 
189  | 
flattened_references = []  | 
190  | 
for ref_list in references:  | 
|
| 
2592.1.22
by Robert Collins
 Node references are byte offsets.  | 
191  | 
ref_addresses = []  | 
192  | 
for reference in ref_list:  | 
|
193  | 
ref_addresses.append(format_string % key_addresses[reference])  | 
|
194  | 
flattened_references.append('\r'.join(ref_addresses))  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
195  | 
string_key = '\x00'.join(key)  | 
| 
2624.2.11
by Robert Collins
 Review comments.  | 
196  | 
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,  | 
| 
2592.1.19
by Robert Collins
 Node references are tab separated.  | 
197  | 
'\t'.join(flattened_references), value))  | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
198  | 
lines.append('\n')  | 
| 
2592.1.42
by Robert Collins
 Check the index length is as expected, when we have done preprocessing.  | 
199  | 
result = StringIO(''.join(lines))  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
200  | 
if expected_bytes and len(result.getvalue()) != expected_bytes:  | 
201  | 
raise errors.BzrError('Failed index creation. Internal error:'  | 
|
202  | 
' mismatched output length and expected length: %d %d' %  | 
|
203  | 
(len(result.getvalue()), expected_bytes))  | 
|
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
204  | 
return StringIO(''.join(lines))  | 
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
205  | 
|
206  | 
||
207  | 
class GraphIndex(object):  | 
|
208  | 
"""An index for data with embedded graphs.  | 
|
| 
2592.1.10
by Robert Collins
 Make validate detect node reference parsing errors.  | 
209  | 
 
 | 
210  | 
    The index maps keys to a list of key reference lists, and a value.
 | 
|
211  | 
    Each node has the same number of key reference lists. Each key reference
 | 
|
212  | 
    list can be empty or an arbitrary length. The value is an opaque NULL
 | 
|
| 
2592.1.45
by Robert Collins
 Tweak documentation as per Aaron's review.  | 
213  | 
    terminated string without any newlines. The storage of the index is 
 | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
214  | 
    hidden in the interface: keys and key references are always tuples of
 | 
215  | 
    bytestrings, never the internal representation (e.g. dictionary offsets).
 | 
|
| 
2592.1.30
by Robert Collins
 Absent entries are not yeilded.  | 
216  | 
|
217  | 
    It is presumed that the index will not be mutated - it is static data.
 | 
|
| 
2592.1.34
by Robert Collins
 Cleanup docs.  | 
218  | 
|
| 
2592.1.44
by Robert Collins
 Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.  | 
219  | 
    Successive iter_all_entries calls will read the entire index each time.
 | 
220  | 
    Additionally, iter_entries calls will read the index linearly until the
 | 
|
221  | 
    desired keys are found. XXX: This must be fixed before the index is
 | 
|
| 
2592.1.34
by Robert Collins
 Cleanup docs.  | 
222  | 
    suitable for production use. :XXX
 | 
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
223  | 
    """
 | 
224  | 
||
225  | 
def __init__(self, transport, name):  | 
|
226  | 
"""Open an index called name on transport.  | 
|
227  | 
||
228  | 
        :param transport: A bzrlib.transport.Transport.
 | 
|
229  | 
        :param name: A path to provide to transport API calls.
 | 
|
230  | 
        """
 | 
|
231  | 
self._transport = transport  | 
|
232  | 
self._name = name  | 
|
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
233  | 
self._nodes = None  | 
234  | 
self._keys_by_offset = None  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
235  | 
self._nodes_by_key = None  | 
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
236  | 
|
237  | 
def _buffer_all(self):  | 
|
238  | 
"""Buffer all the index data.  | 
|
239  | 
||
240  | 
        Mutates self._nodes and self.keys_by_offset.
 | 
|
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
241  | 
        """
 | 
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
242  | 
stream = self._transport.get(self._name)  | 
243  | 
self._read_prefix(stream)  | 
|
| 
2624.2.11
by Robert Collins
 Review comments.  | 
244  | 
expected_elements = 3 + self._key_length  | 
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
245  | 
line_count = 0  | 
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
246  | 
        # raw data keyed by offset
 | 
247  | 
self._keys_by_offset = {}  | 
|
248  | 
        # ready-to-return key:value or key:value, node_ref_lists
 | 
|
249  | 
self._nodes = {}  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
250  | 
self._nodes_by_key = {}  | 
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
251  | 
trailers = 0  | 
252  | 
pos = stream.tell()  | 
|
253  | 
for line in stream.readlines():  | 
|
254  | 
if line == '\n':  | 
|
255  | 
trailers += 1  | 
|
256  | 
                continue
 | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
257  | 
elements = line.split('\0')  | 
| 
2624.2.11
by Robert Collins
 Review comments.  | 
258  | 
if len(elements) != expected_elements:  | 
259  | 
raise errors.BadIndexData(self)  | 
|
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
260  | 
            # keys are tuples
 | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
261  | 
key = tuple(elements[:self._key_length])  | 
262  | 
absent, references, value = elements[-3:]  | 
|
| 
2592.1.43
by Robert Collins
 Various index tweaks and test clarity from John's review.  | 
263  | 
value = value[:-1] # remove the newline  | 
| 
2592.1.28
by Robert Collins
 Basic two pass iter_all_entries.  | 
264  | 
ref_lists = []  | 
265  | 
for ref_string in references.split('\t'):  | 
|
266  | 
ref_lists.append(tuple([  | 
|
267  | 
int(ref) for ref in ref_string.split('\r') if ref  | 
|
268  | 
                    ]))
 | 
|
269  | 
ref_lists = tuple(ref_lists)  | 
|
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
270  | 
self._keys_by_offset[pos] = (key, absent, ref_lists, value)  | 
| 
2592.1.28
by Robert Collins
 Basic two pass iter_all_entries.  | 
271  | 
pos += len(line)  | 
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
272  | 
for key, absent, references, value in self._keys_by_offset.itervalues():  | 
| 
2592.1.30
by Robert Collins
 Absent entries are not yeilded.  | 
273  | 
if absent:  | 
274  | 
                continue
 | 
|
| 
2592.1.28
by Robert Collins
 Basic two pass iter_all_entries.  | 
275  | 
            # resolve references:
 | 
276  | 
if self.node_ref_lists:  | 
|
277  | 
node_refs = []  | 
|
278  | 
for ref_list in references:  | 
|
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
279  | 
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
280  | 
node_value = (value, tuple(node_refs))  | 
| 
2592.1.28
by Robert Collins
 Basic two pass iter_all_entries.  | 
281  | 
else:  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
282  | 
node_value = value  | 
283  | 
self._nodes[key] = node_value  | 
|
284  | 
if self._key_length > 1:  | 
|
285  | 
subkey = list(reversed(key[:-1]))  | 
|
286  | 
key_dict = self._nodes_by_key  | 
|
287  | 
if self.node_ref_lists:  | 
|
288  | 
key_value = key, node_value[0], node_value[1]  | 
|
289  | 
else:  | 
|
290  | 
key_value = key, node_value  | 
|
291  | 
                # possibly should do this on-demand, but it seems likely it is 
 | 
|
292  | 
                # always wanted
 | 
|
| 
2624.2.11
by Robert Collins
 Review comments.  | 
293  | 
                # For a key of (foo, bar, baz) create
 | 
294  | 
                # _nodes_by_key[foo][bar][baz] = key_value
 | 
|
295  | 
for subkey in key[:-1]:  | 
|
296  | 
key_dict = key_dict.setdefault(subkey, {})  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
297  | 
key_dict[key[-1]] = key_value  | 
| 
2624.2.6
by Robert Collins
 Remove performance overhead of set intersection against dicts in index iteraction.  | 
298  | 
self._keys = set(self._nodes)  | 
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
299  | 
if trailers != 1:  | 
300  | 
            # there must be one line - the empty trailer line.
 | 
|
301  | 
raise errors.BadIndexData(self)  | 
|
302  | 
||
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
303  | 
def iter_all_entries(self):  | 
304  | 
"""Iterate over all keys within the index.  | 
|
305  | 
||
306  | 
        :return: An iterable of (key, value) or (key, value, reference_lists).
 | 
|
307  | 
            The former tuple is used when there are no reference lists in the
 | 
|
308  | 
            index, making the API compatible with simple key:value index types.
 | 
|
309  | 
            There is no defined order for the result iteration - it will be in
 | 
|
310  | 
            the most efficient order for the index.
 | 
|
311  | 
        """
 | 
|
312  | 
if self._nodes is None:  | 
|
313  | 
self._buffer_all()  | 
|
314  | 
if self.node_ref_lists:  | 
|
315  | 
for key, (value, node_ref_lists) in self._nodes.iteritems():  | 
|
316  | 
yield key, value, node_ref_lists  | 
|
317  | 
else:  | 
|
318  | 
for key, value in self._nodes.iteritems():  | 
|
319  | 
yield key, value  | 
|
320  | 
||
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
321  | 
def _read_prefix(self, stream):  | 
322  | 
signature = stream.read(len(self._signature()))  | 
|
323  | 
if not signature == self._signature():  | 
|
324  | 
raise errors.BadIndexFormatSignature(self._name, GraphIndex)  | 
|
325  | 
options_line = stream.readline()  | 
|
326  | 
if not options_line.startswith(_OPTION_NODE_REFS):  | 
|
327  | 
raise errors.BadIndexOptions(self)  | 
|
328  | 
try:  | 
|
329  | 
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])  | 
|
330  | 
except ValueError:  | 
|
331  | 
raise errors.BadIndexOptions(self)  | 
|
| 
2624.2.8
by Robert Collins
 Explicitly mark the number of keys elements in use in GraphIndex files.  | 
332  | 
options_line = stream.readline()  | 
333  | 
if not options_line.startswith(_OPTION_KEY_ELEMENTS):  | 
|
334  | 
raise errors.BadIndexOptions(self)  | 
|
335  | 
try:  | 
|
336  | 
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])  | 
|
337  | 
except ValueError:  | 
|
338  | 
raise errors.BadIndexOptions(self)  | 
|
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
339  | 
|
340  | 
def iter_entries(self, keys):  | 
|
341  | 
"""Iterate over keys within the index.  | 
|
342  | 
||
343  | 
        :param keys: An iterable providing the keys to be retrieved.
 | 
|
| 
2592.1.46
by Robert Collins
 Make GraphIndex accept nodes as key, value, references, so that the method  | 
344  | 
        :return: An iterable as per iter_all_entries, but restricted to the
 | 
345  | 
            keys supplied. No additional keys will be returned, and every
 | 
|
346  | 
            key supplied that is in the index will be returned.
 | 
|
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
347  | 
        """
 | 
| 
2592.1.29
by Robert Collins
 Basic iter_entries working.  | 
348  | 
keys = set(keys)  | 
| 
2592.1.44
by Robert Collins
 Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.  | 
349  | 
if not keys:  | 
350  | 
            return
 | 
|
| 
2624.2.3
by Robert Collins
 Make GraphIndex.iter_entries do hash lookups rather than table scans.  | 
351  | 
if self._nodes is None:  | 
352  | 
self._buffer_all()  | 
|
| 
2624.2.6
by Robert Collins
 Remove performance overhead of set intersection against dicts in index iteraction.  | 
353  | 
keys = keys.intersection(self._keys)  | 
| 
2624.2.3
by Robert Collins
 Make GraphIndex.iter_entries do hash lookups rather than table scans.  | 
354  | 
if self.node_ref_lists:  | 
355  | 
for key in keys:  | 
|
356  | 
value, node_refs = self._nodes[key]  | 
|
357  | 
yield key, value, node_refs  | 
|
358  | 
else:  | 
|
359  | 
for key in keys:  | 
|
360  | 
yield key, self._nodes[key]  | 
|
| 
2592.1.7
by Robert Collins
 A validate that goes boom.  | 
361  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
362  | 
def iter_entries_prefix(self, keys):  | 
363  | 
"""Iterate over keys within the index using prefix matching.  | 
|
364  | 
||
365  | 
        Prefix matching is applied within the tuple of a key, not to within
 | 
|
366  | 
        the bytestring of each key element. e.g. if you have the keys ('foo',
 | 
|
367  | 
        'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
 | 
|
368  | 
        only the former key is returned.
 | 
|
369  | 
||
370  | 
        :param keys: An iterable providing the key prefixes to be retrieved.
 | 
|
371  | 
            Each key prefix takes the form of a tuple the length of a key, but
 | 
|
372  | 
            with the last N elements 'None' rather than a regular bytestring.
 | 
|
373  | 
            The first element cannot be 'None'.
 | 
|
374  | 
        :return: An iterable as per iter_all_entries, but restricted to the
 | 
|
375  | 
            keys with a matching prefix to those supplied. No additional keys
 | 
|
376  | 
            will be returned, and every match that is in the index will be
 | 
|
377  | 
            returned.
 | 
|
378  | 
        """
 | 
|
379  | 
keys = set(keys)  | 
|
380  | 
if not keys:  | 
|
381  | 
            return
 | 
|
382  | 
        # load data - also finds key lengths
 | 
|
383  | 
if self._nodes is None:  | 
|
384  | 
self._buffer_all()  | 
|
385  | 
if self._key_length == 1:  | 
|
386  | 
for key in keys:  | 
|
387  | 
                # sanity check
 | 
|
388  | 
if key[0] is None:  | 
|
389  | 
raise errors.BadIndexKey(key)  | 
|
390  | 
if len(key) != self._key_length:  | 
|
391  | 
raise errors.BadIndexKey(key)  | 
|
392  | 
if self.node_ref_lists:  | 
|
393  | 
value, node_refs = self._nodes[key]  | 
|
394  | 
yield key, value, node_refs  | 
|
395  | 
else:  | 
|
396  | 
yield key, self._nodes[key]  | 
|
397  | 
            return
 | 
|
398  | 
for key in keys:  | 
|
399  | 
            # sanity check
 | 
|
400  | 
if key[0] is None:  | 
|
401  | 
raise errors.BadIndexKey(key)  | 
|
402  | 
if len(key) != self._key_length:  | 
|
403  | 
raise errors.BadIndexKey(key)  | 
|
404  | 
            # find what it refers to:
 | 
|
405  | 
key_dict = self._nodes_by_key  | 
|
406  | 
elements = list(key)  | 
|
| 
2624.2.11
by Robert Collins
 Review comments.  | 
407  | 
            # find the subdict whose contents should be returned.
 | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
408  | 
try:  | 
409  | 
while len(elements) and elements[0] is not None:  | 
|
410  | 
key_dict = key_dict[elements[0]]  | 
|
411  | 
elements.pop(0)  | 
|
412  | 
except KeyError:  | 
|
413  | 
                # a non-existant lookup.
 | 
|
414  | 
                continue
 | 
|
415  | 
if len(elements):  | 
|
416  | 
dicts = [key_dict]  | 
|
417  | 
while dicts:  | 
|
418  | 
key_dict = dicts.pop(-1)  | 
|
419  | 
                    # can't be empty or would not exist
 | 
|
420  | 
item, value = key_dict.iteritems().next()  | 
|
421  | 
if type(value) == dict:  | 
|
422  | 
                        # push keys 
 | 
|
423  | 
dicts.extend(key_dict.itervalues())  | 
|
424  | 
else:  | 
|
425  | 
                        # yield keys
 | 
|
426  | 
for value in key_dict.itervalues():  | 
|
| 
2624.2.11
by Robert Collins
 Review comments.  | 
427  | 
                            # each value is the key:value:node refs tuple
 | 
428  | 
                            # ready to yield.
 | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
429  | 
yield value  | 
430  | 
else:  | 
|
| 
2624.2.11
by Robert Collins
 Review comments.  | 
431  | 
                # the last thing looked up was a terminal element
 | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
432  | 
yield key_dict  | 
433  | 
||
| 
2592.1.8
by Robert Collins
 Empty files should validate ok.  | 
434  | 
def _signature(self):  | 
435  | 
"""The file signature for this index type."""  | 
|
436  | 
return _SIGNATURE  | 
|
437  | 
||
| 
2592.1.7
by Robert Collins
 A validate that goes boom.  | 
438  | 
def validate(self):  | 
439  | 
"""Validate that everything in the index can be accessed."""  | 
|
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
440  | 
        # iter_all validates completely at the moment, so just do that.
 | 
441  | 
for node in self.iter_all_entries():  | 
|
442  | 
            pass
 | 
|
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
443  | 
|
444  | 
||
445  | 
class CombinedGraphIndex(object):  | 
|
446  | 
"""A GraphIndex made up from smaller GraphIndices.  | 
|
447  | 
    
 | 
|
448  | 
    The backing indices must implement GraphIndex, and are presumed to be
 | 
|
449  | 
    static data.
 | 
|
| 
2592.1.45
by Robert Collins
 Tweak documentation as per Aaron's review.  | 
450  | 
|
451  | 
    Queries against the combined index will be made against the first index,
 | 
|
452  | 
    and then the second and so on. The order of index's can thus influence
 | 
|
453  | 
    performance significantly. For example, if one index is on local disk and a
 | 
|
454  | 
    second on a remote server, the local disk index should be before the other
 | 
|
455  | 
    in the index list.
 | 
|
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
456  | 
    """
 | 
457  | 
||
458  | 
def __init__(self, indices):  | 
|
459  | 
"""Create a CombinedGraphIndex backed by indices.  | 
|
460  | 
||
| 
2592.1.45
by Robert Collins
 Tweak documentation as per Aaron's review.  | 
461  | 
        :param indices: An ordered list of indices to query for data.
 | 
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
462  | 
        """
 | 
463  | 
self._indices = indices  | 
|
| 
2592.1.37
by Robert Collins
 Add CombinedGraphIndex.insert_index.  | 
464  | 
|
465  | 
def insert_index(self, pos, index):  | 
|
466  | 
"""Insert a new index in the list of indices to query.  | 
|
467  | 
||
468  | 
        :param pos: The position to insert the index.
 | 
|
469  | 
        :param index: The index to insert.
 | 
|
470  | 
        """
 | 
|
471  | 
self._indices.insert(pos, index)  | 
|
472  | 
||
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
473  | 
def iter_all_entries(self):  | 
474  | 
"""Iterate over all keys within the index  | 
|
475  | 
||
| 
2592.1.44
by Robert Collins
 Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.  | 
476  | 
        Duplicate keys across child indices are presumed to have the same
 | 
477  | 
        value and are only reported once.
 | 
|
478  | 
||
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
479  | 
        :return: An iterable of (key, reference_lists, value). There is no
 | 
480  | 
            defined order for the result iteration - it will be in the most
 | 
|
481  | 
            efficient order for the index.
 | 
|
482  | 
        """
 | 
|
483  | 
seen_keys = set()  | 
|
484  | 
for index in self._indices:  | 
|
485  | 
for node in index.iter_all_entries():  | 
|
486  | 
if node[0] not in seen_keys:  | 
|
487  | 
yield node  | 
|
488  | 
seen_keys.add(node[0])  | 
|
489  | 
||
490  | 
def iter_entries(self, keys):  | 
|
491  | 
"""Iterate over keys within the index.  | 
|
492  | 
||
| 
2592.1.44
by Robert Collins
 Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.  | 
493  | 
        Duplicate keys across child indices are presumed to have the same
 | 
494  | 
        value and are only reported once.
 | 
|
495  | 
||
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
496  | 
        :param keys: An iterable providing the keys to be retrieved.
 | 
497  | 
        :return: An iterable of (key, reference_lists, value). There is no
 | 
|
498  | 
            defined order for the result iteration - it will be in the most
 | 
|
499  | 
            efficient order for the index.
 | 
|
500  | 
        """
 | 
|
501  | 
keys = set(keys)  | 
|
| 
2592.1.39
by Robert Collins
 CombinedGraphIndex.iter_entries does not need to see all entries.  | 
502  | 
for index in self._indices:  | 
| 
2592.1.44
by Robert Collins
 Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.  | 
503  | 
if not keys:  | 
504  | 
                return
 | 
|
| 
2592.1.39
by Robert Collins
 CombinedGraphIndex.iter_entries does not need to see all entries.  | 
505  | 
for node in index.iter_entries(keys):  | 
506  | 
keys.remove(node[0])  | 
|
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
507  | 
yield node  | 
508  | 
||
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
509  | 
def iter_entries_prefix(self, keys):  | 
510  | 
"""Iterate over keys within the index using prefix matching.  | 
|
511  | 
||
512  | 
        Duplicate keys across child indices are presumed to have the same
 | 
|
513  | 
        value and are only reported once.
 | 
|
514  | 
||
515  | 
        Prefix matching is applied within the tuple of a key, not to within
 | 
|
516  | 
        the bytestring of each key element. e.g. if you have the keys ('foo',
 | 
|
517  | 
        'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
 | 
|
518  | 
        only the former key is returned.
 | 
|
519  | 
||
520  | 
        :param keys: An iterable providing the key prefixes to be retrieved.
 | 
|
521  | 
            Each key prefix takes the form of a tuple the length of a key, but
 | 
|
522  | 
            with the last N elements 'None' rather than a regular bytestring.
 | 
|
523  | 
            The first element cannot be 'None'.
 | 
|
524  | 
        :return: An iterable as per iter_all_entries, but restricted to the
 | 
|
525  | 
            keys with a matching prefix to those supplied. No additional keys
 | 
|
526  | 
            will be returned, and every match that is in the index will be
 | 
|
527  | 
            returned.
 | 
|
528  | 
        """
 | 
|
529  | 
keys = set(keys)  | 
|
530  | 
if not keys:  | 
|
531  | 
            return
 | 
|
532  | 
seen_keys = set()  | 
|
533  | 
for index in self._indices:  | 
|
534  | 
for node in index.iter_entries_prefix(keys):  | 
|
535  | 
if node[0] in seen_keys:  | 
|
536  | 
                    continue
 | 
|
537  | 
seen_keys.add(node[0])  | 
|
538  | 
yield node  | 
|
539  | 
||
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
540  | 
def validate(self):  | 
541  | 
"""Validate that everything in the index can be accessed."""  | 
|
542  | 
for index in self._indices:  | 
|
543  | 
index.validate()  | 
|
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
544  | 
|
545  | 
||
546  | 
class InMemoryGraphIndex(GraphIndexBuilder):  | 
|
547  | 
"""A GraphIndex which operates entirely out of memory and is mutable.  | 
|
548  | 
||
549  | 
    This is designed to allow the accumulation of GraphIndex entries during a
 | 
|
550  | 
    single write operation, where the accumulated entries need to be immediately
 | 
|
551  | 
    available - for example via a CombinedGraphIndex.
 | 
|
552  | 
    """
 | 
|
553  | 
||
554  | 
def add_nodes(self, nodes):  | 
|
555  | 
"""Add nodes to the index.  | 
|
556  | 
||
557  | 
        :param nodes: An iterable of (key, node_refs, value) entries to add.
 | 
|
558  | 
        """
 | 
|
| 
2624.2.1
by Robert Collins
 InMemoryGraphIndex.add_nodes was inconsistent with other metods for non-node-reference indices.  | 
559  | 
if self.reference_lists:  | 
560  | 
for (key, value, node_refs) in nodes:  | 
|
561  | 
self.add_node(key, value, node_refs)  | 
|
562  | 
else:  | 
|
563  | 
for (key, value) in nodes:  | 
|
564  | 
self.add_node(key, value)  | 
|
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
565  | 
|
566  | 
def iter_all_entries(self):  | 
|
567  | 
"""Iterate over all keys within the index  | 
|
568  | 
||
569  | 
        :return: An iterable of (key, reference_lists, value). There is no
 | 
|
570  | 
            defined order for the result iteration - it will be in the most
 | 
|
571  | 
            efficient order for the index (in this case dictionary hash order).
 | 
|
572  | 
        """
 | 
|
| 
2592.1.46
by Robert Collins
 Make GraphIndex accept nodes as key, value, references, so that the method  | 
573  | 
if self.reference_lists:  | 
574  | 
for key, (absent, references, value) in self._nodes.iteritems():  | 
|
575  | 
if not absent:  | 
|
576  | 
yield key, value, references  | 
|
577  | 
else:  | 
|
578  | 
for key, (absent, references, value) in self._nodes.iteritems():  | 
|
579  | 
if not absent:  | 
|
580  | 
yield key, value  | 
|
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
581  | 
|
582  | 
def iter_entries(self, keys):  | 
|
583  | 
"""Iterate over keys within the index.  | 
|
584  | 
||
585  | 
        :param keys: An iterable providing the keys to be retrieved.
 | 
|
586  | 
        :return: An iterable of (key, reference_lists, value). There is no
 | 
|
587  | 
            defined order for the result iteration - it will be in the most
 | 
|
588  | 
            efficient order for the index (keys iteration order in this case).
 | 
|
589  | 
        """
 | 
|
590  | 
keys = set(keys)  | 
|
| 
2592.1.46
by Robert Collins
 Make GraphIndex accept nodes as key, value, references, so that the method  | 
591  | 
if self.reference_lists:  | 
592  | 
for key in keys.intersection(self._nodes):  | 
|
593  | 
node = self._nodes[key]  | 
|
594  | 
if not node[0]:  | 
|
595  | 
yield key, node[2], node[1]  | 
|
596  | 
else:  | 
|
597  | 
for key in keys.intersection(self._nodes):  | 
|
598  | 
node = self._nodes[key]  | 
|
599  | 
if not node[0]:  | 
|
600  | 
yield key, node[2]  | 
|
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
601  | 
|
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
602  | 
def iter_entries_prefix(self, keys):  | 
603  | 
"""Iterate over keys within the index using prefix matching.  | 
|
604  | 
||
605  | 
        Prefix matching is applied within the tuple of a key, not to within
 | 
|
606  | 
        the bytestring of each key element. e.g. if you have the keys ('foo',
 | 
|
607  | 
        'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
 | 
|
608  | 
        only the former key is returned.
 | 
|
609  | 
||
610  | 
        :param keys: An iterable providing the key prefixes to be retrieved.
 | 
|
611  | 
            Each key prefix takes the form of a tuple the length of a key, but
 | 
|
612  | 
            with the last N elements 'None' rather than a regular bytestring.
 | 
|
613  | 
            The first element cannot be 'None'.
 | 
|
614  | 
        :return: An iterable as per iter_all_entries, but restricted to the
 | 
|
615  | 
            keys with a matching prefix to those supplied. No additional keys
 | 
|
616  | 
            will be returned, and every match that is in the index will be
 | 
|
617  | 
            returned.
 | 
|
618  | 
        """
 | 
|
619  | 
        # XXX: To much duplication with the GraphIndex class; consider finding
 | 
|
620  | 
        # a good place to pull out the actual common logic.
 | 
|
621  | 
keys = set(keys)  | 
|
622  | 
if not keys:  | 
|
623  | 
            return
 | 
|
624  | 
if self._key_length == 1:  | 
|
625  | 
for key in keys:  | 
|
626  | 
                # sanity check
 | 
|
627  | 
if key[0] is None:  | 
|
628  | 
raise errors.BadIndexKey(key)  | 
|
629  | 
if len(key) != self._key_length:  | 
|
630  | 
raise errors.BadIndexKey(key)  | 
|
631  | 
node = self._nodes[key]  | 
|
632  | 
if node[0]:  | 
|
633  | 
                    continue 
 | 
|
634  | 
if self.reference_lists:  | 
|
635  | 
yield key, node[2], node[1]  | 
|
636  | 
else:  | 
|
637  | 
yield key, node[2]  | 
|
638  | 
            return
 | 
|
639  | 
for key in keys:  | 
|
640  | 
            # sanity check
 | 
|
641  | 
if key[0] is None:  | 
|
642  | 
raise errors.BadIndexKey(key)  | 
|
643  | 
if len(key) != self._key_length:  | 
|
644  | 
raise errors.BadIndexKey(key)  | 
|
645  | 
            # find what it refers to:
 | 
|
646  | 
key_dict = self._nodes_by_key  | 
|
647  | 
elements = list(key)  | 
|
648  | 
            # find the subdict to return
 | 
|
649  | 
try:  | 
|
650  | 
while len(elements) and elements[0] is not None:  | 
|
651  | 
key_dict = key_dict[elements[0]]  | 
|
652  | 
elements.pop(0)  | 
|
653  | 
except KeyError:  | 
|
654  | 
                # a non-existant lookup.
 | 
|
655  | 
                continue
 | 
|
656  | 
if len(elements):  | 
|
657  | 
dicts = [key_dict]  | 
|
658  | 
while dicts:  | 
|
659  | 
key_dict = dicts.pop(-1)  | 
|
660  | 
                    # can't be empty or would not exist
 | 
|
661  | 
item, value = key_dict.iteritems().next()  | 
|
662  | 
if type(value) == dict:  | 
|
663  | 
                        # push keys 
 | 
|
664  | 
dicts.extend(key_dict.itervalues())  | 
|
665  | 
else:  | 
|
666  | 
                        # yield keys
 | 
|
667  | 
for value in key_dict.itervalues():  | 
|
668  | 
yield value  | 
|
669  | 
else:  | 
|
670  | 
yield key_dict  | 
|
671  | 
||
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
672  | 
def validate(self):  | 
673  | 
"""In memory index's have no known corruption at the moment."""  |