bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
1 |
# Copyright (C) 2007 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""Indexing facilities."""
|
|
18 |
||
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
19 |
__all__ = [ |
20 |
'CombinedGraphIndex', |
|
21 |
'GraphIndex', |
|
22 |
'GraphIndexBuilder', |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
23 |
'GraphIndexPrefixAdapter', |
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
24 |
'InMemoryGraphIndex', |
25 |
]
|
|
2592.1.32
by Robert Collins
Add __all__ to index. |
26 |
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
27 |
from cStringIO import StringIO |
2592.1.12
by Robert Collins
Handle basic node adds. |
28 |
import re |
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
29 |
|
2624.2.15
by Robert Collins
Add useful -Dindex flag. |
30 |
from bzrlib.lazy_import import lazy_import |
31 |
lazy_import(globals(), """ |
|
2745.1.2
by Robert Collins
Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. |
32 |
from bzrlib import trace
|
33 |
from bzrlib.trace import mutter
|
|
2624.2.15
by Robert Collins
Add useful -Dindex flag. |
34 |
""") |
35 |
from bzrlib import debug, errors |
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
36 |
|
2624.2.8
by Robert Collins
Explicitly mark the number of keys elements in use in GraphIndex files. |
37 |
_OPTION_KEY_ELEMENTS = "key_elements=" |
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
38 |
_OPTION_LEN = "len=" |
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
39 |
_OPTION_NODE_REFS = "node_ref_lists=" |
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
40 |
_SIGNATURE = "Bazaar Graph Index 1\n" |
41 |
||
42 |
||
2592.1.14
by Robert Collins
Detect bad reference key values. |
43 |
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]') |
2592.1.12
by Robert Collins
Handle basic node adds. |
44 |
_newline_null_re = re.compile('[\n\0]') |
45 |
||
46 |
||
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
47 |
class GraphIndexBuilder(object): |
2592.1.18
by Robert Collins
Add space to mark absent nodes. |
48 |
"""A builder that can build a GraphIndex. |
49 |
|
|
50 |
The resulting graph has the structure:
|
|
51 |
|
|
52 |
_SIGNATURE OPTIONS NODES NEWLINE
|
|
53 |
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
|
|
54 |
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
|
|
55 |
NODES := NODE*
|
|
56 |
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
|
|
57 |
KEY := Not-whitespace-utf8
|
|
58 |
ABSENT := 'a'
|
|
2592.1.19
by Robert Collins
Node references are tab separated. |
59 |
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
|
60 |
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
|
|
61 |
REFERENCE := DIGITS ; digits is the byte offset in the index of the
|
|
62 |
; referenced key.
|
|
2592.1.18
by Robert Collins
Add space to mark absent nodes. |
63 |
VALUE := no-newline-no-null-bytes
|
64 |
"""
|
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
65 |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
66 |
def __init__(self, reference_lists=0, key_elements=1): |
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
67 |
"""Create a GraphIndex builder. |
68 |
||
69 |
:param reference_lists: The number of node references lists for each
|
|
70 |
entry.
|
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
71 |
:param key_elements: The number of bytestrings in each key.
|
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
72 |
"""
|
73 |
self.reference_lists = reference_lists |
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
74 |
self._keys = set() |
2592.1.15
by Robert Collins
Detect duplicate key insertion. |
75 |
self._nodes = {} |
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
76 |
self._nodes_by_key = {} |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
77 |
self._key_length = key_elements |
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
78 |
|
79 |
def _check_key(self, key): |
|
80 |
"""Raise BadIndexKey if key is not a valid key for this index.""" |
|
81 |
if type(key) != tuple: |
|
82 |
raise errors.BadIndexKey(key) |
|
83 |
if self._key_length != len(key): |
|
84 |
raise errors.BadIndexKey(key) |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
85 |
for element in key: |
86 |
if not element or _whitespace_re.search(element) is not None: |
|
87 |
raise errors.BadIndexKey(element) |
|
2592.1.12
by Robert Collins
Handle basic node adds. |
88 |
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
89 |
def add_node(self, key, value, references=()): |
2592.1.12
by Robert Collins
Handle basic node adds. |
90 |
"""Add a node to the index. |
91 |
||
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
92 |
:param key: The key. keys are non-empty tuples containing
|
93 |
as many whitespace-free utf8 bytestrings as the key length
|
|
94 |
defined for this index.
|
|
2592.1.12
by Robert Collins
Handle basic node adds. |
95 |
:param references: An iterable of iterables of keys. Each is a
|
96 |
reference to another key.
|
|
97 |
:param value: The value to associate with the key. It may be any
|
|
98 |
bytes as long as it does not contain \0 or \n.
|
|
99 |
"""
|
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
100 |
self._check_key(key) |
2592.1.12
by Robert Collins
Handle basic node adds. |
101 |
if _newline_null_re.search(value) is not None: |
102 |
raise errors.BadIndexValue(value) |
|
2592.1.13
by Robert Collins
Handle mismatched numbers of reference lists. |
103 |
if len(references) != self.reference_lists: |
104 |
raise errors.BadIndexValue(references) |
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
105 |
node_refs = [] |
2592.1.14
by Robert Collins
Detect bad reference key values. |
106 |
for reference_list in references: |
107 |
for reference in reference_list: |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
108 |
self._check_key(reference) |
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
109 |
if reference not in self._nodes: |
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
110 |
self._nodes[reference] = ('a', (), '') |
111 |
node_refs.append(tuple(reference_list)) |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
112 |
if key in self._nodes and self._nodes[key][0] == '': |
2592.1.15
by Robert Collins
Detect duplicate key insertion. |
113 |
raise errors.BadIndexDuplicateKey(key, self) |
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
114 |
self._nodes[key] = ('', tuple(node_refs), value) |
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
115 |
self._keys.add(key) |
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
116 |
if self._key_length > 1: |
117 |
key_dict = self._nodes_by_key |
|
118 |
if self.reference_lists: |
|
119 |
key_value = key, value, tuple(node_refs) |
|
120 |
else: |
|
121 |
key_value = key, value |
|
122 |
# possibly should do this on-demand, but it seems likely it is
|
|
123 |
# always wanted
|
|
2624.2.11
by Robert Collins
Review comments. |
124 |
# For a key of (foo, bar, baz) create
|
125 |
# _nodes_by_key[foo][bar][baz] = key_value
|
|
126 |
for subkey in key[:-1]: |
|
127 |
key_dict = key_dict.setdefault(subkey, {}) |
|
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
128 |
key_dict[key[-1]] = key_value |
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
129 |
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
130 |
def finish(self): |
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
131 |
lines = [_SIGNATURE] |
132 |
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n') |
|
2624.2.8
by Robert Collins
Explicitly mark the number of keys elements in use in GraphIndex files. |
133 |
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n') |
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
134 |
lines.append(_OPTION_LEN + str(len(self._keys)) + '\n') |
2624.2.11
by Robert Collins
Review comments. |
135 |
prefix_length = sum(len(x) for x in lines) |
2592.1.22
by Robert Collins
Node references are byte offsets. |
136 |
# references are byte offsets. To avoid having to do nasty
|
137 |
# polynomial work to resolve offsets (references to later in the
|
|
138 |
# file cannot be determined until all the inbetween references have
|
|
139 |
# been calculated too) we pad the offsets with 0's to make them be
|
|
140 |
# of consistent length. Using binary offsets would break the trivial
|
|
141 |
# file parsing.
|
|
142 |
# to calculate the width of zero's needed we do three passes:
|
|
143 |
# one to gather all the non-reference data and the number of references.
|
|
144 |
# one to pad all the data with reference-length and determine entry
|
|
145 |
# addresses.
|
|
146 |
# One to serialise.
|
|
2592.1.40
by Robert Collins
Reverse index ordering - we do not have date prefixed revids. |
147 |
|
148 |
# forward sorted by key. In future we may consider topological sorting,
|
|
149 |
# at the cost of table scans for direct lookup, or a second index for
|
|
150 |
# direct lookup
|
|
151 |
nodes = sorted(self._nodes.items()) |
|
2592.1.42
by Robert Collins
Check the index length is as expected, when we have done preprocessing. |
152 |
# if we do not prepass, we don't know how long it will be up front.
|
153 |
expected_bytes = None |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
154 |
# we only need to pre-pass if we have reference lists at all.
|
155 |
if self.reference_lists: |
|
2592.1.41
by Robert Collins
Remove duplication in the index serialisation logic with John's suggestion. |
156 |
key_offset_info = [] |
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
157 |
non_ref_bytes = prefix_length |
158 |
total_references = 0 |
|
159 |
# TODO use simple multiplication for the constants in this loop.
|
|
160 |
for key, (absent, references, value) in nodes: |
|
2592.1.41
by Robert Collins
Remove duplication in the index serialisation logic with John's suggestion. |
161 |
# record the offset known *so far* for this key:
|
162 |
# the non reference bytes to date, and the total references to
|
|
163 |
# date - saves reaccumulating on the second pass
|
|
164 |
key_offset_info.append((key, non_ref_bytes, total_references)) |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
165 |
# key is literal, value is literal, there are 3 null's, 1 NL
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
166 |
# key is variable length tuple, \x00 between elements
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
167 |
non_ref_bytes += sum(len(element) for element in key) |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
168 |
if self._key_length > 1: |
169 |
non_ref_bytes += self._key_length - 1 |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
170 |
# value is literal bytes, there are 3 null's, 1 NL.
|
171 |
non_ref_bytes += len(value) + 3 + 1 |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
172 |
# one byte for absent if set.
|
173 |
if absent: |
|
174 |
non_ref_bytes += 1 |
|
2592.1.36
by Robert Collins
Bugfix incorrect offset generation when an absent record is before a referenced record. |
175 |
elif self.reference_lists: |
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
176 |
# (ref_lists -1) tabs
|
177 |
non_ref_bytes += self.reference_lists - 1 |
|
178 |
# (ref-1 cr's per ref_list)
|
|
179 |
for ref_list in references: |
|
180 |
# how many references across the whole file?
|
|
181 |
total_references += len(ref_list) |
|
182 |
# accrue reference separators
|
|
183 |
if ref_list: |
|
184 |
non_ref_bytes += len(ref_list) - 1 |
|
185 |
# how many digits are needed to represent the total byte count?
|
|
186 |
digits = 1 |
|
2592.1.22
by Robert Collins
Node references are byte offsets. |
187 |
possible_total_bytes = non_ref_bytes + total_references*digits |
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
188 |
while 10 ** digits < possible_total_bytes: |
189 |
digits += 1 |
|
190 |
possible_total_bytes = non_ref_bytes + total_references*digits |
|
2592.1.42
by Robert Collins
Check the index length is as expected, when we have done preprocessing. |
191 |
expected_bytes = possible_total_bytes + 1 # terminating newline |
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
192 |
# resolve key addresses.
|
193 |
key_addresses = {} |
|
2592.1.41
by Robert Collins
Remove duplication in the index serialisation logic with John's suggestion. |
194 |
for key, non_ref_bytes, total_references in key_offset_info: |
195 |
key_addresses[key] = non_ref_bytes + total_references*digits |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
196 |
# serialise
|
197 |
format_string = '%%0%sd' % digits |
|
198 |
for key, (absent, references, value) in nodes: |
|
2592.1.19
by Robert Collins
Node references are tab separated. |
199 |
flattened_references = [] |
200 |
for ref_list in references: |
|
2592.1.22
by Robert Collins
Node references are byte offsets. |
201 |
ref_addresses = [] |
202 |
for reference in ref_list: |
|
203 |
ref_addresses.append(format_string % key_addresses[reference]) |
|
204 |
flattened_references.append('\r'.join(ref_addresses)) |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
205 |
string_key = '\x00'.join(key) |
2624.2.11
by Robert Collins
Review comments. |
206 |
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent, |
2592.1.19
by Robert Collins
Node references are tab separated. |
207 |
'\t'.join(flattened_references), value)) |
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
208 |
lines.append('\n') |
2592.1.42
by Robert Collins
Check the index length is as expected, when we have done preprocessing. |
209 |
result = StringIO(''.join(lines)) |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
210 |
if expected_bytes and len(result.getvalue()) != expected_bytes: |
211 |
raise errors.BzrError('Failed index creation. Internal error:' |
|
212 |
' mismatched output length and expected length: %d %d' % |
|
213 |
(len(result.getvalue()), expected_bytes)) |
|
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
214 |
return StringIO(''.join(lines)) |
2592.1.5
by Robert Collins
Trivial index reading. |
215 |
|
216 |
||
217 |
class GraphIndex(object): |
|
218 |
"""An index for data with embedded graphs. |
|
2592.1.10
by Robert Collins
Make validate detect node reference parsing errors. |
219 |
|
220 |
The index maps keys to a list of key reference lists, and a value.
|
|
221 |
Each node has the same number of key reference lists. Each key reference
|
|
222 |
list can be empty or an arbitrary length. The value is an opaque NULL
|
|
2592.1.45
by Robert Collins
Tweak documentation as per Aaron's review. |
223 |
terminated string without any newlines. The storage of the index is
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
224 |
hidden in the interface: keys and key references are always tuples of
|
225 |
bytestrings, never the internal representation (e.g. dictionary offsets).
|
|
2592.1.30
by Robert Collins
Absent entries are not yeilded. |
226 |
|
227 |
It is presumed that the index will not be mutated - it is static data.
|
|
2592.1.34
by Robert Collins
Cleanup docs. |
228 |
|
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
229 |
Successive iter_all_entries calls will read the entire index each time.
|
230 |
Additionally, iter_entries calls will read the index linearly until the
|
|
231 |
desired keys are found. XXX: This must be fixed before the index is
|
|
2592.1.34
by Robert Collins
Cleanup docs. |
232 |
suitable for production use. :XXX
|
2592.1.5
by Robert Collins
Trivial index reading. |
233 |
"""
|
234 |
||
235 |
def __init__(self, transport, name): |
|
236 |
"""Open an index called name on transport. |
|
237 |
||
238 |
:param transport: A bzrlib.transport.Transport.
|
|
239 |
:param name: A path to provide to transport API calls.
|
|
240 |
"""
|
|
241 |
self._transport = transport |
|
242 |
self._name = name |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
243 |
self._nodes = None |
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
244 |
self._key_count = None |
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
245 |
self._keys_by_offset = None |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
246 |
self._nodes_by_key = None |
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
247 |
|
248 |
def _buffer_all(self): |
|
249 |
"""Buffer all the index data. |
|
250 |
||
251 |
Mutates self._nodes and self.keys_by_offset.
|
|
2592.1.5
by Robert Collins
Trivial index reading. |
252 |
"""
|
2624.2.15
by Robert Collins
Add useful -Dindex flag. |
253 |
if 'index' in debug.debug_flags: |
254 |
mutter('Reading entire index %s', self._transport.abspath(self._name)) |
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
255 |
stream = self._transport.get(self._name) |
256 |
self._read_prefix(stream) |
|
2624.2.11
by Robert Collins
Review comments. |
257 |
expected_elements = 3 + self._key_length |
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
258 |
line_count = 0 |
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
259 |
# raw data keyed by offset
|
260 |
self._keys_by_offset = {} |
|
261 |
# ready-to-return key:value or key:value, node_ref_lists
|
|
262 |
self._nodes = {} |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
263 |
self._nodes_by_key = {} |
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
264 |
trailers = 0 |
265 |
pos = stream.tell() |
|
266 |
for line in stream.readlines(): |
|
267 |
if line == '\n': |
|
268 |
trailers += 1 |
|
269 |
continue
|
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
270 |
elements = line.split('\0') |
2624.2.11
by Robert Collins
Review comments. |
271 |
if len(elements) != expected_elements: |
272 |
raise errors.BadIndexData(self) |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
273 |
# keys are tuples
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
274 |
key = tuple(elements[:self._key_length]) |
275 |
absent, references, value = elements[-3:] |
|
2592.1.43
by Robert Collins
Various index tweaks and test clarity from John's review. |
276 |
value = value[:-1] # remove the newline |
2592.1.28
by Robert Collins
Basic two pass iter_all_entries. |
277 |
ref_lists = [] |
278 |
for ref_string in references.split('\t'): |
|
279 |
ref_lists.append(tuple([ |
|
280 |
int(ref) for ref in ref_string.split('\r') if ref |
|
281 |
]))
|
|
282 |
ref_lists = tuple(ref_lists) |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
283 |
self._keys_by_offset[pos] = (key, absent, ref_lists, value) |
2592.1.28
by Robert Collins
Basic two pass iter_all_entries. |
284 |
pos += len(line) |
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
285 |
for key, absent, references, value in self._keys_by_offset.itervalues(): |
2592.1.30
by Robert Collins
Absent entries are not yeilded. |
286 |
if absent: |
287 |
continue
|
|
2592.1.28
by Robert Collins
Basic two pass iter_all_entries. |
288 |
# resolve references:
|
289 |
if self.node_ref_lists: |
|
290 |
node_refs = [] |
|
291 |
for ref_list in references: |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
292 |
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list])) |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
293 |
node_value = (value, tuple(node_refs)) |
2592.1.28
by Robert Collins
Basic two pass iter_all_entries. |
294 |
else: |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
295 |
node_value = value |
296 |
self._nodes[key] = node_value |
|
297 |
if self._key_length > 1: |
|
298 |
subkey = list(reversed(key[:-1])) |
|
299 |
key_dict = self._nodes_by_key |
|
300 |
if self.node_ref_lists: |
|
301 |
key_value = key, node_value[0], node_value[1] |
|
302 |
else: |
|
303 |
key_value = key, node_value |
|
304 |
# possibly should do this on-demand, but it seems likely it is
|
|
305 |
# always wanted
|
|
2624.2.11
by Robert Collins
Review comments. |
306 |
# For a key of (foo, bar, baz) create
|
307 |
# _nodes_by_key[foo][bar][baz] = key_value
|
|
308 |
for subkey in key[:-1]: |
|
309 |
key_dict = key_dict.setdefault(subkey, {}) |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
310 |
key_dict[key[-1]] = key_value |
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
311 |
# cache the keys for quick set intersections
|
2624.2.6
by Robert Collins
Remove performance overhead of set intersection against dicts in index iteraction. |
312 |
self._keys = set(self._nodes) |
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
313 |
if trailers != 1: |
314 |
# there must be one line - the empty trailer line.
|
|
315 |
raise errors.BadIndexData(self) |
|
316 |
||
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
317 |
def iter_all_entries(self): |
318 |
"""Iterate over all keys within the index. |
|
319 |
||
2831.4.1
by Martin Pool
Doc corrections for index object |
320 |
:return: An iterable of (index, key, value) or (index, key, value, reference_lists).
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
321 |
The former tuple is used when there are no reference lists in the
|
322 |
index, making the API compatible with simple key:value index types.
|
|
323 |
There is no defined order for the result iteration - it will be in
|
|
324 |
the most efficient order for the index.
|
|
325 |
"""
|
|
2745.1.1
by Robert Collins
Add a number of -Devil checkpoints. |
326 |
if 'evil' in debug.debug_flags: |
2849.1.1
by Robert Collins
Tweak index -Devil tracing. |
327 |
trace.mutter_callsite(3, |
2745.1.2
by Robert Collins
Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. |
328 |
"iter_all_entries scales with size of history.") |
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
329 |
if self._nodes is None: |
330 |
self._buffer_all() |
|
331 |
if self.node_ref_lists: |
|
332 |
for key, (value, node_ref_lists) in self._nodes.iteritems(): |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
333 |
yield self, key, value, node_ref_lists |
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
334 |
else: |
335 |
for key, value in self._nodes.iteritems(): |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
336 |
yield self, key, value |
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
337 |
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
338 |
def _read_prefix(self, stream): |
339 |
signature = stream.read(len(self._signature())) |
|
340 |
if not signature == self._signature(): |
|
341 |
raise errors.BadIndexFormatSignature(self._name, GraphIndex) |
|
342 |
options_line = stream.readline() |
|
343 |
if not options_line.startswith(_OPTION_NODE_REFS): |
|
344 |
raise errors.BadIndexOptions(self) |
|
345 |
try: |
|
346 |
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1]) |
|
347 |
except ValueError: |
|
348 |
raise errors.BadIndexOptions(self) |
|
2624.2.8
by Robert Collins
Explicitly mark the number of keys elements in use in GraphIndex files. |
349 |
options_line = stream.readline() |
350 |
if not options_line.startswith(_OPTION_KEY_ELEMENTS): |
|
351 |
raise errors.BadIndexOptions(self) |
|
352 |
try: |
|
353 |
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1]) |
|
354 |
except ValueError: |
|
355 |
raise errors.BadIndexOptions(self) |
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
356 |
options_line = stream.readline() |
357 |
if not options_line.startswith(_OPTION_LEN): |
|
358 |
raise errors.BadIndexOptions(self) |
|
359 |
try: |
|
360 |
self._key_count = int(options_line[len(_OPTION_LEN):-1]) |
|
361 |
except ValueError: |
|
362 |
raise errors.BadIndexOptions(self) |
|
2592.1.5
by Robert Collins
Trivial index reading. |
363 |
|
364 |
def iter_entries(self, keys): |
|
365 |
"""Iterate over keys within the index. |
|
366 |
||
367 |
:param keys: An iterable providing the keys to be retrieved.
|
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
368 |
:return: An iterable as per iter_all_entries, but restricted to the
|
369 |
keys supplied. No additional keys will be returned, and every
|
|
370 |
key supplied that is in the index will be returned.
|
|
2592.1.5
by Robert Collins
Trivial index reading. |
371 |
"""
|
2592.1.29
by Robert Collins
Basic iter_entries working. |
372 |
keys = set(keys) |
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
373 |
if not keys: |
374 |
return
|
|
2624.2.3
by Robert Collins
Make GraphIndex.iter_entries do hash lookups rather than table scans. |
375 |
if self._nodes is None: |
376 |
self._buffer_all() |
|
2624.2.6
by Robert Collins
Remove performance overhead of set intersection against dicts in index iteraction. |
377 |
keys = keys.intersection(self._keys) |
2624.2.3
by Robert Collins
Make GraphIndex.iter_entries do hash lookups rather than table scans. |
378 |
if self.node_ref_lists: |
379 |
for key in keys: |
|
380 |
value, node_refs = self._nodes[key] |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
381 |
yield self, key, value, node_refs |
2624.2.3
by Robert Collins
Make GraphIndex.iter_entries do hash lookups rather than table scans. |
382 |
else: |
383 |
for key in keys: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
384 |
yield self, key, self._nodes[key] |
2592.1.7
by Robert Collins
A validate that goes boom. |
385 |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
386 |
def iter_entries_prefix(self, keys): |
387 |
"""Iterate over keys within the index using prefix matching. |
|
388 |
||
389 |
Prefix matching is applied within the tuple of a key, not to within
|
|
390 |
the bytestring of each key element. e.g. if you have the keys ('foo',
|
|
391 |
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
|
|
392 |
only the former key is returned.
|
|
393 |
||
394 |
:param keys: An iterable providing the key prefixes to be retrieved.
|
|
395 |
Each key prefix takes the form of a tuple the length of a key, but
|
|
396 |
with the last N elements 'None' rather than a regular bytestring.
|
|
397 |
The first element cannot be 'None'.
|
|
398 |
:return: An iterable as per iter_all_entries, but restricted to the
|
|
399 |
keys with a matching prefix to those supplied. No additional keys
|
|
400 |
will be returned, and every match that is in the index will be
|
|
401 |
returned.
|
|
402 |
"""
|
|
403 |
keys = set(keys) |
|
404 |
if not keys: |
|
405 |
return
|
|
406 |
# load data - also finds key lengths
|
|
407 |
if self._nodes is None: |
|
408 |
self._buffer_all() |
|
409 |
if self._key_length == 1: |
|
410 |
for key in keys: |
|
411 |
# sanity check
|
|
412 |
if key[0] is None: |
|
413 |
raise errors.BadIndexKey(key) |
|
414 |
if len(key) != self._key_length: |
|
415 |
raise errors.BadIndexKey(key) |
|
416 |
if self.node_ref_lists: |
|
417 |
value, node_refs = self._nodes[key] |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
418 |
yield self, key, value, node_refs |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
419 |
else: |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
420 |
yield self, key, self._nodes[key] |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
421 |
return
|
422 |
for key in keys: |
|
423 |
# sanity check
|
|
424 |
if key[0] is None: |
|
425 |
raise errors.BadIndexKey(key) |
|
426 |
if len(key) != self._key_length: |
|
427 |
raise errors.BadIndexKey(key) |
|
428 |
# find what it refers to:
|
|
429 |
key_dict = self._nodes_by_key |
|
430 |
elements = list(key) |
|
2624.2.11
by Robert Collins
Review comments. |
431 |
# find the subdict whose contents should be returned.
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
432 |
try: |
433 |
while len(elements) and elements[0] is not None: |
|
434 |
key_dict = key_dict[elements[0]] |
|
435 |
elements.pop(0) |
|
436 |
except KeyError: |
|
437 |
# a non-existant lookup.
|
|
438 |
continue
|
|
439 |
if len(elements): |
|
440 |
dicts = [key_dict] |
|
441 |
while dicts: |
|
442 |
key_dict = dicts.pop(-1) |
|
443 |
# can't be empty or would not exist
|
|
444 |
item, value = key_dict.iteritems().next() |
|
445 |
if type(value) == dict: |
|
446 |
# push keys
|
|
447 |
dicts.extend(key_dict.itervalues()) |
|
448 |
else: |
|
449 |
# yield keys
|
|
450 |
for value in key_dict.itervalues(): |
|
2624.2.11
by Robert Collins
Review comments. |
451 |
# each value is the key:value:node refs tuple
|
452 |
# ready to yield.
|
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
453 |
yield (self, ) + value |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
454 |
else: |
2624.2.11
by Robert Collins
Review comments. |
455 |
# the last thing looked up was a terminal element
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
456 |
yield (self, ) + key_dict |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
457 |
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
458 |
def key_count(self): |
459 |
"""Return an estimate of the number of keys in this index. |
|
460 |
|
|
461 |
For GraphIndex the estimate is exact.
|
|
462 |
"""
|
|
463 |
if self._key_count is None: |
|
464 |
# really this should just read the prefix
|
|
465 |
self._buffer_all() |
|
466 |
return self._key_count |
|
467 |
||
2592.1.8
by Robert Collins
Empty files should validate ok. |
468 |
def _signature(self): |
469 |
"""The file signature for this index type.""" |
|
470 |
return _SIGNATURE |
|
471 |
||
2592.1.7
by Robert Collins
A validate that goes boom. |
472 |
def validate(self): |
473 |
"""Validate that everything in the index can be accessed.""" |
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
474 |
# iter_all validates completely at the moment, so just do that.
|
475 |
for node in self.iter_all_entries(): |
|
476 |
pass
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
477 |
|
478 |
||
479 |
class CombinedGraphIndex(object): |
|
480 |
"""A GraphIndex made up from smaller GraphIndices. |
|
481 |
|
|
482 |
The backing indices must implement GraphIndex, and are presumed to be
|
|
483 |
static data.
|
|
2592.1.45
by Robert Collins
Tweak documentation as per Aaron's review. |
484 |
|
485 |
Queries against the combined index will be made against the first index,
|
|
486 |
and then the second and so on. The order of index's can thus influence
|
|
487 |
performance significantly. For example, if one index is on local disk and a
|
|
488 |
second on a remote server, the local disk index should be before the other
|
|
489 |
in the index list.
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
490 |
"""
|
491 |
||
492 |
def __init__(self, indices): |
|
493 |
"""Create a CombinedGraphIndex backed by indices. |
|
494 |
||
2592.1.45
by Robert Collins
Tweak documentation as per Aaron's review. |
495 |
:param indices: An ordered list of indices to query for data.
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
496 |
"""
|
497 |
self._indices = indices |
|
2592.1.37
by Robert Collins
Add CombinedGraphIndex.insert_index. |
498 |
|
2855.1.1
by Robert Collins
(robertc) Trivial repr for CombinedGraphIndex. (Martin Pool). |
499 |
def __repr__(self): |
500 |
return "%s(%s)" % ( |
|
501 |
self.__class__.__name__, |
|
502 |
', '.join(map(repr, self._indices))) |
|
503 |
||
2592.1.37
by Robert Collins
Add CombinedGraphIndex.insert_index. |
504 |
def insert_index(self, pos, index): |
505 |
"""Insert a new index in the list of indices to query. |
|
506 |
||
507 |
:param pos: The position to insert the index.
|
|
508 |
:param index: The index to insert.
|
|
509 |
"""
|
|
510 |
self._indices.insert(pos, index) |
|
511 |
||
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
512 |
def iter_all_entries(self): |
513 |
"""Iterate over all keys within the index |
|
514 |
||
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
515 |
Duplicate keys across child indices are presumed to have the same
|
516 |
value and are only reported once.
|
|
517 |
||
2831.4.1
by Martin Pool
Doc corrections for index object |
518 |
:return: An iterable of (index, key, reference_lists, value).
|
519 |
There is no defined order for the result iteration - it will be in
|
|
520 |
the most efficient order for the index.
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
521 |
"""
|
522 |
seen_keys = set() |
|
523 |
for index in self._indices: |
|
524 |
for node in index.iter_all_entries(): |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
525 |
if node[1] not in seen_keys: |
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
526 |
yield node |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
527 |
seen_keys.add(node[1]) |
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
528 |
|
529 |
def iter_entries(self, keys): |
|
530 |
"""Iterate over keys within the index. |
|
531 |
||
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
532 |
Duplicate keys across child indices are presumed to have the same
|
533 |
value and are only reported once.
|
|
534 |
||
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
535 |
:param keys: An iterable providing the keys to be retrieved.
|
2831.4.1
by Martin Pool
Doc corrections for index object |
536 |
:return: An iterable of (index, key, reference_lists, value). There is no
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
537 |
defined order for the result iteration - it will be in the most
|
538 |
efficient order for the index.
|
|
539 |
"""
|
|
540 |
keys = set(keys) |
|
2592.1.39
by Robert Collins
CombinedGraphIndex.iter_entries does not need to see all entries. |
541 |
for index in self._indices: |
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
542 |
if not keys: |
543 |
return
|
|
2592.1.39
by Robert Collins
CombinedGraphIndex.iter_entries does not need to see all entries. |
544 |
for node in index.iter_entries(keys): |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
545 |
keys.remove(node[1]) |
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
546 |
yield node |
547 |
||
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
548 |
def iter_entries_prefix(self, keys): |
549 |
"""Iterate over keys within the index using prefix matching. |
|
550 |
||
551 |
Duplicate keys across child indices are presumed to have the same
|
|
552 |
value and are only reported once.
|
|
553 |
||
554 |
Prefix matching is applied within the tuple of a key, not to within
|
|
555 |
the bytestring of each key element. e.g. if you have the keys ('foo',
|
|
556 |
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
|
|
557 |
only the former key is returned.
|
|
558 |
||
559 |
:param keys: An iterable providing the key prefixes to be retrieved.
|
|
560 |
Each key prefix takes the form of a tuple the length of a key, but
|
|
561 |
with the last N elements 'None' rather than a regular bytestring.
|
|
562 |
The first element cannot be 'None'.
|
|
563 |
:return: An iterable as per iter_all_entries, but restricted to the
|
|
564 |
keys with a matching prefix to those supplied. No additional keys
|
|
565 |
will be returned, and every match that is in the index will be
|
|
566 |
returned.
|
|
567 |
"""
|
|
568 |
keys = set(keys) |
|
569 |
if not keys: |
|
570 |
return
|
|
571 |
seen_keys = set() |
|
572 |
for index in self._indices: |
|
573 |
for node in index.iter_entries_prefix(keys): |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
574 |
if node[1] in seen_keys: |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
575 |
continue
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
576 |
seen_keys.add(node[1]) |
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
577 |
yield node |
578 |
||
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
579 |
def key_count(self): |
580 |
"""Return an estimate of the number of keys in this index. |
|
581 |
|
|
582 |
For CombinedGraphIndex this is approximated by the sum of the keys of
|
|
583 |
the child indices. As child indices may have duplicate keys this can
|
|
584 |
have a maximum error of the number of child indices * largest number of
|
|
585 |
keys in any index.
|
|
586 |
"""
|
|
587 |
return sum((index.key_count() for index in self._indices), 0) |
|
588 |
||
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
589 |
def validate(self): |
590 |
"""Validate that everything in the index can be accessed.""" |
|
591 |
for index in self._indices: |
|
592 |
index.validate() |
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
593 |
|
594 |
||
595 |
class InMemoryGraphIndex(GraphIndexBuilder): |
|
596 |
"""A GraphIndex which operates entirely out of memory and is mutable. |
|
597 |
||
598 |
This is designed to allow the accumulation of GraphIndex entries during a
|
|
599 |
single write operation, where the accumulated entries need to be immediately
|
|
600 |
available - for example via a CombinedGraphIndex.
|
|
601 |
"""
|
|
602 |
||
603 |
def add_nodes(self, nodes): |
|
604 |
"""Add nodes to the index. |
|
605 |
||
606 |
:param nodes: An iterable of (key, node_refs, value) entries to add.
|
|
607 |
"""
|
|
2624.2.1
by Robert Collins
InMemoryGraphIndex.add_nodes was inconsistent with other metods for non-node-reference indices. |
608 |
if self.reference_lists: |
609 |
for (key, value, node_refs) in nodes: |
|
610 |
self.add_node(key, value, node_refs) |
|
611 |
else: |
|
612 |
for (key, value) in nodes: |
|
613 |
self.add_node(key, value) |
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
614 |
|
615 |
def iter_all_entries(self): |
|
616 |
"""Iterate over all keys within the index |
|
617 |
||
2831.4.1
by Martin Pool
Doc corrections for index object |
618 |
:return: An iterable of (index, key, reference_lists, value). There is no
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
619 |
defined order for the result iteration - it will be in the most
|
620 |
efficient order for the index (in this case dictionary hash order).
|
|
621 |
"""
|
|
2745.1.1
by Robert Collins
Add a number of -Devil checkpoints. |
622 |
if 'evil' in debug.debug_flags: |
2849.1.1
by Robert Collins
Tweak index -Devil tracing. |
623 |
trace.mutter_callsite(3, |
2745.1.2
by Robert Collins
Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. |
624 |
"iter_all_entries scales with size of history.") |
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
625 |
if self.reference_lists: |
626 |
for key, (absent, references, value) in self._nodes.iteritems(): |
|
627 |
if not absent: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
628 |
yield self, key, value, references |
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
629 |
else: |
630 |
for key, (absent, references, value) in self._nodes.iteritems(): |
|
631 |
if not absent: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
632 |
yield self, key, value |
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
633 |
|
634 |
def iter_entries(self, keys): |
|
635 |
"""Iterate over keys within the index. |
|
636 |
||
637 |
:param keys: An iterable providing the keys to be retrieved.
|
|
2831.4.1
by Martin Pool
Doc corrections for index object |
638 |
:return: An iterable of (index, key, reference_lists, value). There is no
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
639 |
defined order for the result iteration - it will be in the most
|
640 |
efficient order for the index (keys iteration order in this case).
|
|
641 |
"""
|
|
642 |
keys = set(keys) |
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
643 |
if self.reference_lists: |
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
644 |
for key in keys.intersection(self._keys): |
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
645 |
node = self._nodes[key] |
646 |
if not node[0]: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
647 |
yield self, key, node[2], node[1] |
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
648 |
else: |
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
649 |
for key in keys.intersection(self._keys): |
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
650 |
node = self._nodes[key] |
651 |
if not node[0]: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
652 |
yield self, key, node[2] |
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
653 |
|
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
654 |
def iter_entries_prefix(self, keys): |
655 |
"""Iterate over keys within the index using prefix matching. |
|
656 |
||
657 |
Prefix matching is applied within the tuple of a key, not to within
|
|
658 |
the bytestring of each key element. e.g. if you have the keys ('foo',
|
|
659 |
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
|
|
660 |
only the former key is returned.
|
|
661 |
||
662 |
:param keys: An iterable providing the key prefixes to be retrieved.
|
|
663 |
Each key prefix takes the form of a tuple the length of a key, but
|
|
664 |
with the last N elements 'None' rather than a regular bytestring.
|
|
665 |
The first element cannot be 'None'.
|
|
666 |
:return: An iterable as per iter_all_entries, but restricted to the
|
|
667 |
keys with a matching prefix to those supplied. No additional keys
|
|
668 |
will be returned, and every match that is in the index will be
|
|
669 |
returned.
|
|
670 |
"""
|
|
671 |
# XXX: To much duplication with the GraphIndex class; consider finding
|
|
672 |
# a good place to pull out the actual common logic.
|
|
673 |
keys = set(keys) |
|
674 |
if not keys: |
|
675 |
return
|
|
676 |
if self._key_length == 1: |
|
677 |
for key in keys: |
|
678 |
# sanity check
|
|
679 |
if key[0] is None: |
|
680 |
raise errors.BadIndexKey(key) |
|
681 |
if len(key) != self._key_length: |
|
682 |
raise errors.BadIndexKey(key) |
|
683 |
node = self._nodes[key] |
|
684 |
if node[0]: |
|
685 |
continue
|
|
686 |
if self.reference_lists: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
687 |
yield self, key, node[2], node[1] |
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
688 |
else: |
2624.2.17
by Robert Collins
Review feedback. |
689 |
yield self, key, node[2] |
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
690 |
return
|
691 |
for key in keys: |
|
692 |
# sanity check
|
|
693 |
if key[0] is None: |
|
694 |
raise errors.BadIndexKey(key) |
|
695 |
if len(key) != self._key_length: |
|
696 |
raise errors.BadIndexKey(key) |
|
697 |
# find what it refers to:
|
|
698 |
key_dict = self._nodes_by_key |
|
699 |
elements = list(key) |
|
700 |
# find the subdict to return
|
|
701 |
try: |
|
702 |
while len(elements) and elements[0] is not None: |
|
703 |
key_dict = key_dict[elements[0]] |
|
704 |
elements.pop(0) |
|
705 |
except KeyError: |
|
706 |
# a non-existant lookup.
|
|
707 |
continue
|
|
708 |
if len(elements): |
|
709 |
dicts = [key_dict] |
|
710 |
while dicts: |
|
711 |
key_dict = dicts.pop(-1) |
|
712 |
# can't be empty or would not exist
|
|
713 |
item, value = key_dict.iteritems().next() |
|
714 |
if type(value) == dict: |
|
715 |
# push keys
|
|
716 |
dicts.extend(key_dict.itervalues()) |
|
717 |
else: |
|
718 |
# yield keys
|
|
719 |
for value in key_dict.itervalues(): |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
720 |
yield (self, ) + value |
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
721 |
else: |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
722 |
yield (self, ) + key_dict |
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
723 |
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
724 |
def key_count(self): |
725 |
"""Return an estimate of the number of keys in this index. |
|
726 |
|
|
727 |
For InMemoryGraphIndex the estimate is exact.
|
|
728 |
"""
|
|
729 |
return len(self._keys) |
|
730 |
||
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
731 |
def validate(self): |
732 |
"""In memory index's have no known corruption at the moment.""" |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
733 |
|
734 |
||
735 |
class GraphIndexPrefixAdapter(object): |
|
736 |
"""An adapter between GraphIndex with different key lengths. |
|
737 |
||
738 |
Queries against this will emit queries against the adapted Graph with the
|
|
739 |
prefix added, queries for all items use iter_entries_prefix. The returned
|
|
740 |
nodes will have their keys and node references adjusted to remove the
|
|
741 |
prefix. Finally, an add_nodes_callback can be supplied - when called the
|
|
742 |
nodes and references being added will have prefix prepended.
|
|
743 |
"""
|
|
744 |
||
2624.2.17
by Robert Collins
Review feedback. |
745 |
def __init__(self, adapted, prefix, missing_key_length, |
746 |
add_nodes_callback=None): |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
747 |
"""Construct an adapter against adapted with prefix.""" |
748 |
self.adapted = adapted |
|
2624.2.19
by Robert Collins
Why we should always test before committing. |
749 |
self.prefix_key = prefix + (None,)*missing_key_length |
2624.2.17
by Robert Collins
Review feedback. |
750 |
self.prefix = prefix |
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
751 |
self.prefix_len = len(prefix) |
752 |
self.add_nodes_callback = add_nodes_callback |
|
753 |
||
2624.2.13
by Robert Collins
Implement add_node/add_nodes to the GraphIndexPrefixAdapter. |
754 |
def add_nodes(self, nodes): |
755 |
"""Add nodes to the index. |
|
756 |
||
757 |
:param nodes: An iterable of (key, node_refs, value) entries to add.
|
|
758 |
"""
|
|
759 |
# save nodes in case its an iterator
|
|
760 |
nodes = tuple(nodes) |
|
761 |
translated_nodes = [] |
|
762 |
try: |
|
2624.2.17
by Robert Collins
Review feedback. |
763 |
# Add prefix_key to each reference node_refs is a tuple of tuples,
|
764 |
# so split it apart, and add prefix_key to the internal reference
|
|
2624.2.13
by Robert Collins
Implement add_node/add_nodes to the GraphIndexPrefixAdapter. |
765 |
for (key, value, node_refs) in nodes: |
766 |
adjusted_references = ( |
|
2624.2.17
by Robert Collins
Review feedback. |
767 |
tuple(tuple(self.prefix + ref_node for ref_node in ref_list) |
2624.2.13
by Robert Collins
Implement add_node/add_nodes to the GraphIndexPrefixAdapter. |
768 |
for ref_list in node_refs)) |
2624.2.17
by Robert Collins
Review feedback. |
769 |
translated_nodes.append((self.prefix + key, value, |
2624.2.13
by Robert Collins
Implement add_node/add_nodes to the GraphIndexPrefixAdapter. |
770 |
adjusted_references)) |
771 |
except ValueError: |
|
772 |
# XXX: TODO add an explicit interface for getting the reference list
|
|
773 |
# status, to handle this bit of user-friendliness in the API more
|
|
774 |
# explicitly.
|
|
775 |
for (key, value) in nodes: |
|
2624.2.17
by Robert Collins
Review feedback. |
776 |
translated_nodes.append((self.prefix + key, value)) |
2624.2.13
by Robert Collins
Implement add_node/add_nodes to the GraphIndexPrefixAdapter. |
777 |
self.add_nodes_callback(translated_nodes) |
778 |
||
779 |
def add_node(self, key, value, references=()): |
|
780 |
"""Add a node to the index. |
|
781 |
||
782 |
:param key: The key. keys are non-empty tuples containing
|
|
783 |
as many whitespace-free utf8 bytestrings as the key length
|
|
784 |
defined for this index.
|
|
785 |
:param references: An iterable of iterables of keys. Each is a
|
|
786 |
reference to another key.
|
|
787 |
:param value: The value to associate with the key. It may be any
|
|
788 |
bytes as long as it does not contain \0 or \n.
|
|
789 |
"""
|
|
790 |
self.add_nodes(((key, value, references), )) |
|
791 |
||
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
792 |
def _strip_prefix(self, an_iter): |
793 |
"""Strip prefix data from nodes and return it.""" |
|
794 |
for node in an_iter: |
|
795 |
# cross checks
|
|
2624.2.17
by Robert Collins
Review feedback. |
796 |
if node[1][:self.prefix_len] != self.prefix: |
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
797 |
raise errors.BadIndexData(self) |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
798 |
for ref_list in node[3]: |
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
799 |
for ref_node in ref_list: |
2624.2.17
by Robert Collins
Review feedback. |
800 |
if ref_node[:self.prefix_len] != self.prefix: |
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
801 |
raise errors.BadIndexData(self) |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
802 |
yield node[0], node[1][self.prefix_len:], node[2], ( |
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
803 |
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list) |
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
804 |
for ref_list in node[3])) |
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
805 |
|
806 |
def iter_all_entries(self): |
|
807 |
"""Iterate over all keys within the index |
|
808 |
||
809 |
iter_all_entries is implemented against the adapted index using
|
|
810 |
iter_entries_prefix.
|
|
811 |
||
2831.4.1
by Martin Pool
Doc corrections for index object |
812 |
:return: An iterable of (index, key, reference_lists, value). There is no
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
813 |
defined order for the result iteration - it will be in the most
|
814 |
efficient order for the index (in this case dictionary hash order).
|
|
815 |
"""
|
|
2624.2.19
by Robert Collins
Why we should always test before committing. |
816 |
return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key])) |
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
817 |
|
818 |
def iter_entries(self, keys): |
|
819 |
"""Iterate over keys within the index. |
|
820 |
||
821 |
:param keys: An iterable providing the keys to be retrieved.
|
|
822 |
:return: An iterable of (key, reference_lists, value). There is no
|
|
823 |
defined order for the result iteration - it will be in the most
|
|
824 |
efficient order for the index (keys iteration order in this case).
|
|
825 |
"""
|
|
826 |
return self._strip_prefix(self.adapted.iter_entries( |
|
2624.2.17
by Robert Collins
Review feedback. |
827 |
self.prefix + key for key in keys)) |
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
828 |
|
829 |
def iter_entries_prefix(self, keys): |
|
830 |
"""Iterate over keys within the index using prefix matching. |
|
831 |
||
832 |
Prefix matching is applied within the tuple of a key, not to within
|
|
833 |
the bytestring of each key element. e.g. if you have the keys ('foo',
|
|
834 |
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
|
|
835 |
only the former key is returned.
|
|
836 |
||
837 |
:param keys: An iterable providing the key prefixes to be retrieved.
|
|
838 |
Each key prefix takes the form of a tuple the length of a key, but
|
|
839 |
with the last N elements 'None' rather than a regular bytestring.
|
|
840 |
The first element cannot be 'None'.
|
|
841 |
:return: An iterable as per iter_all_entries, but restricted to the
|
|
842 |
keys with a matching prefix to those supplied. No additional keys
|
|
843 |
will be returned, and every match that is in the index will be
|
|
844 |
returned.
|
|
845 |
"""
|
|
846 |
return self._strip_prefix(self.adapted.iter_entries_prefix( |
|
2624.2.17
by Robert Collins
Review feedback. |
847 |
self.prefix + key for key in keys)) |
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
848 |
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
849 |
def key_count(self): |
850 |
"""Return an estimate of the number of keys in this index. |
|
851 |
|
|
852 |
For GraphIndexPrefixAdapter this is relatively expensive - key
|
|
853 |
iteration with the prefix is done.
|
|
854 |
"""
|
|
855 |
return len(list(self.iter_all_entries())) |
|
856 |
||
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
857 |
def validate(self): |
858 |
"""Call the adapted's validate.""" |
|
859 |
self.adapted.validate() |