bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
5752.3.8
by John Arbash Meinel
Merge bzr.dev 5764 to resolve release-notes (aka NEWS) conflicts |
1 |
# Copyright (C) 2007-2011 Canonical Ltd
|
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
16 |
|
|
6379.6.7
by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear. |
17 |
"""Indexing facilities."""
|
18 |
||
|
6379.6.1
by Jelmer Vernooij
Import absolute_import in a few places. |
19 |
from __future__ import absolute_import |
20 |
||
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
21 |
__all__ = [ |
22 |
'CombinedGraphIndex', |
|
23 |
'GraphIndex', |
|
24 |
'GraphIndexBuilder', |
|
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
25 |
'GraphIndexPrefixAdapter', |
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
26 |
'InMemoryGraphIndex', |
27 |
]
|
|
|
2592.1.32
by Robert Collins
Add __all__ to index. |
28 |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
29 |
from bisect import bisect_right |
|
2592.1.12
by Robert Collins
Handle basic node adds. |
30 |
import re |
|
3789.1.3
by John Arbash Meinel
CombinedGraphIndex can now reload when calling key_count(). |
31 |
import sys |
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
32 |
|
|
6624
by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes') |
33 |
from .lazy_import import lazy_import |
|
2624.2.15
by Robert Collins
Add useful -Dindex flag. |
34 |
lazy_import(globals(), """ |
|
6622.1.34
by Jelmer Vernooij
Rename brzlib => breezy. |
35 |
from breezy import (
|
|
5753.2.2
by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports. |
36 |
bisect_multi,
|
37 |
revision as _mod_revision,
|
|
38 |
trace,
|
|
39 |
)
|
|
|
2624.2.15
by Robert Collins
Add useful -Dindex flag. |
40 |
""") |
|
6624
by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes') |
41 |
from . import ( |
|
3099.3.3
by John Arbash Meinel
Deprecate get_parents() in favor of get_parent_map() |
42 |
debug, |
43 |
errors, |
|
44 |
)
|
|
|
6624
by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes') |
45 |
from .sixish import ( |
|
6621.22.2
by Martin
Use BytesIO or StringIO from bzrlib.sixish |
46 |
BytesIO, |
|
6654.1.1
by Martin
Factor out some copycode in iter_entries_prefix implementations |
47 |
viewvalues, |
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
48 |
viewitems, |
|
6621.22.2
by Martin
Use BytesIO or StringIO from bzrlib.sixish |
49 |
)
|
|
6624
by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes') |
50 |
from .static_tuple import StaticTuple |
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
51 |
|
|
2979.1.1
by Robert Collins
Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily. |
52 |
_HEADER_READV = (0, 200) |
|
2624.2.8
by Robert Collins
Explicitly mark the number of keys elements in use in GraphIndex files. |
53 |
_OPTION_KEY_ELEMENTS = "key_elements=" |
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
54 |
_OPTION_LEN = "len=" |
|
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
55 |
_OPTION_NODE_REFS = "node_ref_lists=" |
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
56 |
_SIGNATURE = "Bazaar Graph Index 1\n" |
57 |
||
58 |
||
|
2592.1.14
by Robert Collins
Detect bad reference key values. |
59 |
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]') |
|
2592.1.12
by Robert Collins
Handle basic node adds. |
60 |
_newline_null_re = re.compile('[\n\0]') |
61 |
||
62 |
||
|
3830.3.12
by Martin Pool
Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks |
63 |
def _has_key_from_parent_map(self, key): |
64 |
"""Check if this index has one key. |
|
65 |
||
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
66 |
If it's possible to check for multiple keys at once through
|
|
3830.3.12
by Martin Pool
Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks |
67 |
calling get_parent_map that should be faster.
|
68 |
"""
|
|
69 |
return (key in self.get_parent_map([key])) |
|
70 |
||
|
3830.3.20
by John Arbash Meinel
Minor PEP8 and copyright updates. |
71 |
|
|
3830.3.12
by Martin Pool
Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks |
72 |
def _missing_keys_from_parent_map(self, keys): |
73 |
return set(keys) - set(self.get_parent_map(keys)) |
|
74 |
||
75 |
||
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
76 |
class GraphIndexBuilder(object): |
|
2592.1.18
by Robert Collins
Add space to mark absent nodes. |
77 |
"""A builder that can build a GraphIndex. |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
78 |
|
|
5891.1.3
by Andrew Bennetts
Move docstring formatting fixes. |
79 |
The resulting graph has the structure::
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
80 |
|
|
5891.1.3
by Andrew Bennetts
Move docstring formatting fixes. |
81 |
_SIGNATURE OPTIONS NODES NEWLINE
|
82 |
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
|
|
83 |
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
|
|
84 |
NODES := NODE*
|
|
85 |
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
|
|
86 |
KEY := Not-whitespace-utf8
|
|
87 |
ABSENT := 'a'
|
|
88 |
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
|
|
89 |
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
|
|
90 |
REFERENCE := DIGITS ; digits is the byte offset in the index of the
|
|
91 |
; referenced key.
|
|
92 |
VALUE := no-newline-no-null-bytes
|
|
|
2592.1.18
by Robert Collins
Add space to mark absent nodes. |
93 |
"""
|
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
94 |
|
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
95 |
def __init__(self, reference_lists=0, key_elements=1): |
|
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
96 |
"""Create a GraphIndex builder. |
97 |
||
98 |
:param reference_lists: The number of node references lists for each
|
|
99 |
entry.
|
|
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
100 |
:param key_elements: The number of bytestrings in each key.
|
|
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
101 |
"""
|
102 |
self.reference_lists = reference_lists |
|
|
3644.2.1
by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed. |
103 |
# A dict of {key: (absent, ref_lists, value)}
|
|
2592.1.15
by Robert Collins
Detect duplicate key insertion. |
104 |
self._nodes = {} |
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
105 |
# Keys that are referenced but not actually present in this index
|
106 |
self._absent_keys = set() |
|
|
3644.2.1
by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed. |
107 |
self._nodes_by_key = None |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
108 |
self._key_length = key_elements |
|
3777.5.3
by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder. |
109 |
self._optimize_for_size = False |
|
4168.3.6
by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize(). |
110 |
self._combine_backing_indices = True |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
111 |
|
112 |
def _check_key(self, key): |
|
113 |
"""Raise BadIndexKey if key is not a valid key for this index.""" |
|
|
4679.7.1
by John Arbash Meinel
Merge the 2.1-static-tuple-no-use branch, but restore the |
114 |
if type(key) not in (tuple, StaticTuple): |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
115 |
raise errors.BadIndexKey(key) |
116 |
if self._key_length != len(key): |
|
117 |
raise errors.BadIndexKey(key) |
|
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
118 |
for element in key: |
119 |
if not element or _whitespace_re.search(element) is not None: |
|
120 |
raise errors.BadIndexKey(element) |
|
|
2592.1.12
by Robert Collins
Handle basic node adds. |
121 |
|
|
3830.3.5
by Martin Pool
GraphIndexBuilder shouldn't know references are for compression so rename |
122 |
def _external_references(self): |
123 |
"""Return references that are not present in this index. |
|
|
3830.3.4
by Martin Pool
Move _external_compression_references onto the GraphIndexBuilder, and check them for inventories too |
124 |
"""
|
125 |
keys = set() |
|
126 |
refs = set() |
|
|
3830.3.19
by John Arbash Meinel
Small update to GraphIndexBuilder._external_references |
127 |
# TODO: JAM 2008-11-21 This makes an assumption about how the reference
|
128 |
# lists are used. It is currently correct for pack-0.92 through
|
|
129 |
# 1.9, which use the node references (3rd column) second
|
|
130 |
# reference list as the compression parent. Perhaps this should
|
|
131 |
# be moved into something higher up the stack, since it
|
|
132 |
# makes assumptions about how the index is used.
|
|
133 |
if self.reference_lists > 1: |
|
134 |
for node in self.iter_all_entries(): |
|
135 |
keys.add(node[1]) |
|
136 |
refs.update(node[3][1]) |
|
137 |
return refs - keys |
|
138 |
else: |
|
139 |
# If reference_lists == 0 there can be no external references, and
|
|
140 |
# if reference_lists == 1, then there isn't a place to store the
|
|
141 |
# compression parent
|
|
142 |
return set() |
|
|
3830.3.4
by Martin Pool
Move _external_compression_references onto the GraphIndexBuilder, and check them for inventories too |
143 |
|
|
3644.2.4
by John Arbash Meinel
Change GraphIndex to also have a _get_nodes_by_key |
144 |
def _get_nodes_by_key(self): |
145 |
if self._nodes_by_key is None: |
|
146 |
nodes_by_key = {} |
|
147 |
if self.reference_lists: |
|
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
148 |
for key, (absent, references, value) in viewitems(self._nodes): |
|
3644.2.4
by John Arbash Meinel
Change GraphIndex to also have a _get_nodes_by_key |
149 |
if absent: |
150 |
continue
|
|
151 |
key_dict = nodes_by_key |
|
152 |
for subkey in key[:-1]: |
|
153 |
key_dict = key_dict.setdefault(subkey, {}) |
|
154 |
key_dict[key[-1]] = key, value, references |
|
155 |
else: |
|
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
156 |
for key, (absent, references, value) in viewitems(self._nodes): |
|
3644.2.4
by John Arbash Meinel
Change GraphIndex to also have a _get_nodes_by_key |
157 |
if absent: |
158 |
continue
|
|
159 |
key_dict = nodes_by_key |
|
160 |
for subkey in key[:-1]: |
|
161 |
key_dict = key_dict.setdefault(subkey, {}) |
|
162 |
key_dict[key[-1]] = key, value |
|
163 |
self._nodes_by_key = nodes_by_key |
|
164 |
return self._nodes_by_key |
|
165 |
||
|
3644.2.3
by John Arbash Meinel
Do a bit more work to get all the tests to pass. |
166 |
def _update_nodes_by_key(self, key, value, node_refs): |
167 |
"""Update the _nodes_by_key dict with a new key. |
|
168 |
||
169 |
For a key of (foo, bar, baz) create
|
|
170 |
_nodes_by_key[foo][bar][baz] = key_value
|
|
171 |
"""
|
|
172 |
if self._nodes_by_key is None: |
|
173 |
return
|
|
174 |
key_dict = self._nodes_by_key |
|
175 |
if self.reference_lists: |
|
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
176 |
key_value = StaticTuple(key, value, node_refs) |
|
3644.2.3
by John Arbash Meinel
Do a bit more work to get all the tests to pass. |
177 |
else: |
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
178 |
key_value = StaticTuple(key, value) |
|
3644.2.3
by John Arbash Meinel
Do a bit more work to get all the tests to pass. |
179 |
for subkey in key[:-1]: |
180 |
key_dict = key_dict.setdefault(subkey, {}) |
|
181 |
key_dict[key[-1]] = key_value |
|
182 |
||
|
3644.2.9
by John Arbash Meinel
Refactor some code. |
183 |
def _check_key_ref_value(self, key, references, value): |
184 |
"""Check that 'key' and 'references' are all valid. |
|
|
2592.1.12
by Robert Collins
Handle basic node adds. |
185 |
|
|
3644.2.9
by John Arbash Meinel
Refactor some code. |
186 |
:param key: A key tuple. Must conform to the key interface (be a tuple,
|
187 |
be of the right length, not have any whitespace or nulls in any key
|
|
188 |
element.)
|
|
189 |
:param references: An iterable of reference lists. Something like
|
|
190 |
[[(ref, key)], [(ref, key), (other, key)]]
|
|
191 |
:param value: The value associate with this key. Must not contain
|
|
192 |
newlines or null characters.
|
|
193 |
:return: (node_refs, absent_references)
|
|
|
5891.1.3
by Andrew Bennetts
Move docstring formatting fixes. |
194 |
|
195 |
* node_refs: basically a packed form of 'references' where all
|
|
196 |
iterables are tuples
|
|
197 |
* absent_references: reference keys that are not in self._nodes.
|
|
198 |
This may contain duplicates if the same key is referenced in
|
|
199 |
multiple lists.
|
|
|
2592.1.12
by Robert Collins
Handle basic node adds. |
200 |
"""
|
|
4789.28.1
by John Arbash Meinel
Use StaticTuple as part of the builder process. |
201 |
as_st = StaticTuple.from_sequence |
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
202 |
self._check_key(key) |
|
2592.1.12
by Robert Collins
Handle basic node adds. |
203 |
if _newline_null_re.search(value) is not None: |
204 |
raise errors.BadIndexValue(value) |
|
|
2592.1.13
by Robert Collins
Handle mismatched numbers of reference lists. |
205 |
if len(references) != self.reference_lists: |
206 |
raise errors.BadIndexValue(references) |
|
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
207 |
node_refs = [] |
|
3644.2.9
by John Arbash Meinel
Refactor some code. |
208 |
absent_references = [] |
|
2592.1.14
by Robert Collins
Detect bad reference key values. |
209 |
for reference_list in references: |
210 |
for reference in reference_list: |
|
|
3644.2.9
by John Arbash Meinel
Refactor some code. |
211 |
# If reference *is* in self._nodes, then we know it has already
|
212 |
# been checked.
|
|
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
213 |
if reference not in self._nodes: |
|
3644.2.9
by John Arbash Meinel
Refactor some code. |
214 |
self._check_key(reference) |
215 |
absent_references.append(reference) |
|
|
4848.1.1
by John Arbash Meinel
Track down one more location that needs casting to static tuple for the new builder code |
216 |
reference_list = as_st([as_st(ref).intern() |
217 |
for ref in reference_list]) |
|
218 |
node_refs.append(reference_list) |
|
|
4789.28.1
by John Arbash Meinel
Use StaticTuple as part of the builder process. |
219 |
return as_st(node_refs), absent_references |
|
3644.2.9
by John Arbash Meinel
Refactor some code. |
220 |
|
221 |
def add_node(self, key, value, references=()): |
|
222 |
"""Add a node to the index. |
|
223 |
||
224 |
:param key: The key. keys are non-empty tuples containing
|
|
225 |
as many whitespace-free utf8 bytestrings as the key length
|
|
226 |
defined for this index.
|
|
227 |
:param references: An iterable of iterables of keys. Each is a
|
|
228 |
reference to another key.
|
|
229 |
:param value: The value to associate with the key. It may be any
|
|
|
5891.1.3
by Andrew Bennetts
Move docstring formatting fixes. |
230 |
bytes as long as it does not contain \\0 or \\n.
|
|
3644.2.9
by John Arbash Meinel
Refactor some code. |
231 |
"""
|
232 |
(node_refs, |
|
233 |
absent_references) = self._check_key_ref_value(key, references, value) |
|
234 |
if key in self._nodes and self._nodes[key][0] != 'a': |
|
|
2592.1.15
by Robert Collins
Detect duplicate key insertion. |
235 |
raise errors.BadIndexDuplicateKey(key, self) |
|
3644.2.9
by John Arbash Meinel
Refactor some code. |
236 |
for reference in absent_references: |
237 |
# There may be duplicates, but I don't think it is worth worrying
|
|
238 |
# about
|
|
239 |
self._nodes[reference] = ('a', (), '') |
|
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
240 |
self._absent_keys.update(absent_references) |
241 |
self._absent_keys.discard(key) |
|
|
3644.2.3
by John Arbash Meinel
Do a bit more work to get all the tests to pass. |
242 |
self._nodes[key] = ('', node_refs, value) |
|
3644.2.9
by John Arbash Meinel
Refactor some code. |
243 |
if self._nodes_by_key is not None and self._key_length > 1: |
|
3644.2.3
by John Arbash Meinel
Do a bit more work to get all the tests to pass. |
244 |
self._update_nodes_by_key(key, value, node_refs) |
|
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
245 |
|
|
4744.2.7
by John Arbash Meinel
Add .clear_cache() members to GraphIndexBuilder and BTreeBuilder. |
246 |
def clear_cache(self): |
247 |
"""See GraphIndex.clear_cache() |
|
248 |
||
249 |
This is a no-op, but we need the api to conform to a generic 'Index'
|
|
250 |
abstraction.
|
|
251 |
"""
|
|
252 |
||
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
253 |
def finish(self): |
|
6006.4.5
by Martin Pool
Flush pack, index, and dirstate files to disk on closing |
254 |
"""Finish the index. |
255 |
||
|
6621.22.2
by Martin
Use BytesIO or StringIO from bzrlib.sixish |
256 |
:returns: cBytesIO holding the full context of the index as it
|
|
6006.4.5
by Martin Pool
Flush pack, index, and dirstate files to disk on closing |
257 |
should be written to disk.
|
258 |
"""
|
|
|
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
259 |
lines = [_SIGNATURE] |
260 |
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n') |
|
|
2624.2.8
by Robert Collins
Explicitly mark the number of keys elements in use in GraphIndex files. |
261 |
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n') |
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
262 |
key_count = len(self._nodes) - len(self._absent_keys) |
263 |
lines.append(_OPTION_LEN + str(key_count) + '\n') |
|
|
2624.2.11
by Robert Collins
Review comments. |
264 |
prefix_length = sum(len(x) for x in lines) |
|
2592.1.22
by Robert Collins
Node references are byte offsets. |
265 |
# references are byte offsets. To avoid having to do nasty
|
|
3644.2.9
by John Arbash Meinel
Refactor some code. |
266 |
# polynomial work to resolve offsets (references to later in the
|
|
2592.1.22
by Robert Collins
Node references are byte offsets. |
267 |
# file cannot be determined until all the inbetween references have
|
268 |
# been calculated too) we pad the offsets with 0's to make them be
|
|
269 |
# of consistent length. Using binary offsets would break the trivial
|
|
270 |
# file parsing.
|
|
271 |
# to calculate the width of zero's needed we do three passes:
|
|
272 |
# one to gather all the non-reference data and the number of references.
|
|
273 |
# one to pad all the data with reference-length and determine entry
|
|
274 |
# addresses.
|
|
275 |
# One to serialise.
|
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
276 |
|
|
2592.1.40
by Robert Collins
Reverse index ordering - we do not have date prefixed revids. |
277 |
# forward sorted by key. In future we may consider topological sorting,
|
278 |
# at the cost of table scans for direct lookup, or a second index for
|
|
279 |
# direct lookup
|
|
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
280 |
nodes = sorted(viewitems(self._nodes)) |
|
2592.1.42
by Robert Collins
Check the index length is as expected, when we have done preprocessing. |
281 |
# if we do not prepass, we don't know how long it will be up front.
|
282 |
expected_bytes = None |
|
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
283 |
# we only need to pre-pass if we have reference lists at all.
|
284 |
if self.reference_lists: |
|
|
2592.1.41
by Robert Collins
Remove duplication in the index serialisation logic with John's suggestion. |
285 |
key_offset_info = [] |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
286 |
non_ref_bytes = prefix_length |
287 |
total_references = 0 |
|
288 |
# TODO use simple multiplication for the constants in this loop.
|
|
289 |
for key, (absent, references, value) in nodes: |
|
|
2592.1.41
by Robert Collins
Remove duplication in the index serialisation logic with John's suggestion. |
290 |
# record the offset known *so far* for this key:
|
291 |
# the non reference bytes to date, and the total references to
|
|
292 |
# date - saves reaccumulating on the second pass
|
|
293 |
key_offset_info.append((key, non_ref_bytes, total_references)) |
|
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
294 |
# key is literal, value is literal, there are 3 null's, 1 NL
|
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
295 |
# key is variable length tuple, \x00 between elements
|
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
296 |
non_ref_bytes += sum(len(element) for element in key) |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
297 |
if self._key_length > 1: |
298 |
non_ref_bytes += self._key_length - 1 |
|
|
2624.2.5
by Robert Collins
Change bzrlib.index.Index keys to be 1-tuples, not strings. |
299 |
# value is literal bytes, there are 3 null's, 1 NL.
|
300 |
non_ref_bytes += len(value) + 3 + 1 |
|
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
301 |
# one byte for absent if set.
|
302 |
if absent: |
|
303 |
non_ref_bytes += 1 |
|
|
2592.1.36
by Robert Collins
Bugfix incorrect offset generation when an absent record is before a referenced record. |
304 |
elif self.reference_lists: |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
305 |
# (ref_lists -1) tabs
|
306 |
non_ref_bytes += self.reference_lists - 1 |
|
307 |
# (ref-1 cr's per ref_list)
|
|
308 |
for ref_list in references: |
|
309 |
# how many references across the whole file?
|
|
310 |
total_references += len(ref_list) |
|
311 |
# accrue reference separators
|
|
312 |
if ref_list: |
|
313 |
non_ref_bytes += len(ref_list) - 1 |
|
314 |
# how many digits are needed to represent the total byte count?
|
|
315 |
digits = 1 |
|
|
2592.1.22
by Robert Collins
Node references are byte offsets. |
316 |
possible_total_bytes = non_ref_bytes + total_references*digits |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
317 |
while 10 ** digits < possible_total_bytes: |
318 |
digits += 1 |
|
319 |
possible_total_bytes = non_ref_bytes + total_references*digits |
|
|
2592.1.42
by Robert Collins
Check the index length is as expected, when we have done preprocessing. |
320 |
expected_bytes = possible_total_bytes + 1 # terminating newline |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
321 |
# resolve key addresses.
|
322 |
key_addresses = {} |
|
|
2592.1.41
by Robert Collins
Remove duplication in the index serialisation logic with John's suggestion. |
323 |
for key, non_ref_bytes, total_references in key_offset_info: |
324 |
key_addresses[key] = non_ref_bytes + total_references*digits |
|
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
325 |
# serialise
|
326 |
format_string = '%%0%sd' % digits |
|
327 |
for key, (absent, references, value) in nodes: |
|
|
2592.1.19
by Robert Collins
Node references are tab separated. |
328 |
flattened_references = [] |
329 |
for ref_list in references: |
|
|
2592.1.22
by Robert Collins
Node references are byte offsets. |
330 |
ref_addresses = [] |
331 |
for reference in ref_list: |
|
332 |
ref_addresses.append(format_string % key_addresses[reference]) |
|
333 |
flattened_references.append('\r'.join(ref_addresses)) |
|
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
334 |
string_key = '\x00'.join(key) |
|
2624.2.11
by Robert Collins
Review comments. |
335 |
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent, |
|
2592.1.19
by Robert Collins
Node references are tab separated. |
336 |
'\t'.join(flattened_references), value)) |
|
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
337 |
lines.append('\n') |
|
6621.22.2
by Martin
Use BytesIO or StringIO from bzrlib.sixish |
338 |
result = BytesIO(''.join(lines)) |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
339 |
if expected_bytes and len(result.getvalue()) != expected_bytes: |
340 |
raise errors.BzrError('Failed index creation. Internal error:' |
|
341 |
' mismatched output length and expected length: %d %d' % |
|
342 |
(len(result.getvalue()), expected_bytes)) |
|
|
3498.1.1
by James Westby
Don't join the lines of the index twice. |
343 |
return result |
|
2592.1.5
by Robert Collins
Trivial index reading. |
344 |
|
|
4168.3.6
by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize(). |
345 |
def set_optimize(self, for_size=None, combine_backing_indices=None): |
|
3777.5.3
by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder. |
346 |
"""Change how the builder tries to optimize the result. |
347 |
||
348 |
:param for_size: Tell the builder to try and make the index as small as
|
|
349 |
possible.
|
|
|
4168.3.6
by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize(). |
350 |
:param combine_backing_indices: If the builder spills to disk to save
|
351 |
memory, should the on-disk indices be combined. Set to True if you
|
|
352 |
are going to be probing the index, but to False if you are not. (If
|
|
353 |
you are not querying, then the time spent combining is wasted.)
|
|
|
3777.5.3
by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder. |
354 |
:return: None
|
355 |
"""
|
|
356 |
# GraphIndexBuilder itself doesn't pay attention to the flag yet, but
|
|
357 |
# other builders do.
|
|
|
4168.3.6
by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize(). |
358 |
if for_size is not None: |
359 |
self._optimize_for_size = for_size |
|
360 |
if combine_backing_indices is not None: |
|
361 |
self._combine_backing_indices = combine_backing_indices |
|
|
3777.5.3
by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder. |
362 |
|
|
4593.5.37
by John Arbash Meinel
Finish implementation tests. |
363 |
def find_ancestry(self, keys, ref_list_num): |
364 |
"""See CombinedGraphIndex.find_ancestry()""" |
|
365 |
pending = set(keys) |
|
366 |
parent_map = {} |
|
367 |
missing_keys = set() |
|
368 |
while pending: |
|
369 |
next_pending = set() |
|
370 |
for _, key, value, ref_lists in self.iter_entries(pending): |
|
371 |
parent_keys = ref_lists[ref_list_num] |
|
372 |
parent_map[key] = parent_keys |
|
373 |
next_pending.update([p for p in parent_keys if p not in |
|
374 |
parent_map]) |
|
375 |
missing_keys.update(pending.difference(parent_map)) |
|
376 |
pending = next_pending |
|
377 |
return parent_map, missing_keys |
|
378 |
||
|
2592.1.5
by Robert Collins
Trivial index reading. |
379 |
|
380 |
class GraphIndex(object): |
|
381 |
"""An index for data with embedded graphs. |
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
382 |
|
|
2592.1.10
by Robert Collins
Make validate detect node reference parsing errors. |
383 |
The index maps keys to a list of key reference lists, and a value.
|
384 |
Each node has the same number of key reference lists. Each key reference
|
|
385 |
list can be empty or an arbitrary length. The value is an opaque NULL
|
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
386 |
terminated string without any newlines. The storage of the index is
|
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
387 |
hidden in the interface: keys and key references are always tuples of
|
388 |
bytestrings, never the internal representation (e.g. dictionary offsets).
|
|
|
2592.1.30
by Robert Collins
Absent entries are not yeilded. |
389 |
|
390 |
It is presumed that the index will not be mutated - it is static data.
|
|
|
2592.1.34
by Robert Collins
Cleanup docs. |
391 |
|
|
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
392 |
Successive iter_all_entries calls will read the entire index each time.
|
393 |
Additionally, iter_entries calls will read the index linearly until the
|
|
394 |
desired keys are found. XXX: This must be fixed before the index is
|
|
|
2592.1.34
by Robert Collins
Cleanup docs. |
395 |
suitable for production use. :XXX
|
|
2592.1.5
by Robert Collins
Trivial index reading. |
396 |
"""
|
397 |
||
|
5074.4.2
by John Arbash Meinel
Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now. |
398 |
def __init__(self, transport, name, size, unlimited_cache=False, offset=0): |
|
2592.1.5
by Robert Collins
Trivial index reading. |
399 |
"""Open an index called name on transport. |
400 |
||
|
6622.1.34
by Jelmer Vernooij
Rename brzlib => breezy. |
401 |
:param transport: A breezy.transport.Transport.
|
|
2592.1.5
by Robert Collins
Trivial index reading. |
402 |
:param name: A path to provide to transport API calls.
|
|
2890.2.1
by Robert Collins
* ``bzrlib.index.GraphIndex`` now requires a size parameter to the |
403 |
:param size: The size of the index in bytes. This is used for bisection
|
404 |
logic to perform partial index reads. While the size could be
|
|
405 |
obtained by statting the file this introduced an additional round
|
|
|
2890.2.8
by Robert Collins
Make the size of the index optionally None for the pack-names index. |
406 |
trip as well as requiring stat'able transports, both of which are
|
407 |
avoided by having it supplied. If size is None, then bisection
|
|
408 |
support will be disabled and accessing the index will just stream
|
|
409 |
all the data.
|
|
|
5074.4.2
by John Arbash Meinel
Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now. |
410 |
:param offset: Instead of starting the index data at offset 0, start it
|
411 |
at an arbitrary offset.
|
|
|
2592.1.5
by Robert Collins
Trivial index reading. |
412 |
"""
|
413 |
self._transport = transport |
|
414 |
self._name = name |
|
|
2890.2.16
by Robert Collins
Review feedback. |
415 |
# Becomes a dict of key:(value, reference-list-byte-locations) used by
|
416 |
# the bisection interface to store parsed but not resolved keys.
|
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
417 |
self._bisect_nodes = None |
|
2890.2.16
by Robert Collins
Review feedback. |
418 |
# Becomes a dict of key:(value, reference-list-keys) which are ready to
|
419 |
# be returned directly to callers.
|
|
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
420 |
self._nodes = None |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
421 |
# a sorted list of slice-addresses for the parsed bytes of the file.
|
422 |
# e.g. (0,1) would mean that byte 0 is parsed.
|
|
|
2890.2.2
by Robert Collins
Opening an index creates a map for the parsed bytes. |
423 |
self._parsed_byte_map = [] |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
424 |
# a sorted list of keys matching each slice address for parsed bytes
|
425 |
# e.g. (None, 'foo@bar') would mean that the first byte contained no
|
|
426 |
# key, and the end byte of the slice is the of the data for 'foo@bar'
|
|
427 |
self._parsed_key_map = [] |
|
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
428 |
self._key_count = None |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
429 |
self._keys_by_offset = None |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
430 |
self._nodes_by_key = None |
|
2890.2.1
by Robert Collins
* ``bzrlib.index.GraphIndex`` now requires a size parameter to the |
431 |
self._size = size |
|
3665.3.3
by John Arbash Meinel
If we read more than 50% of the whole index, |
432 |
# The number of bytes we've read so far in trying to process this file
|
433 |
self._bytes_read = 0 |
|
|
5074.4.2
by John Arbash Meinel
Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now. |
434 |
self._base_offset = offset |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
435 |
|
|
2592.3.176
by Robert Collins
Various pack refactorings. |
436 |
def __eq__(self, other): |
|
2592.3.215
by Robert Collins
Review feedback. |
437 |
"""Equal when self and other were created with the same parameters.""" |
|
2592.3.176
by Robert Collins
Various pack refactorings. |
438 |
return ( |
|
6619.3.18
by Jelmer Vernooij
Run 2to3 idioms fixer. |
439 |
isinstance(self, type(other)) and |
|
2592.3.176
by Robert Collins
Various pack refactorings. |
440 |
self._transport == other._transport and |
441 |
self._name == other._name and |
|
442 |
self._size == other._size) |
|
443 |
||
444 |
def __ne__(self, other): |
|
445 |
return not self.__eq__(other) |
|
446 |
||
|
3517.4.13
by Martin Pool
Add repr methods |
447 |
def __repr__(self): |
448 |
return "%s(%r)" % (self.__class__.__name__, |
|
449 |
self._transport.abspath(self._name)) |
|
450 |
||
|
3665.3.1
by John Arbash Meinel
Updates to GraphIndex processing. |
451 |
def _buffer_all(self, stream=None): |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
452 |
"""Buffer all the index data. |
453 |
||
454 |
Mutates self._nodes and self.keys_by_offset.
|
|
|
2592.1.5
by Robert Collins
Trivial index reading. |
455 |
"""
|
|
3665.3.1
by John Arbash Meinel
Updates to GraphIndex processing. |
456 |
if self._nodes is not None: |
457 |
# We already did this
|
|
458 |
return
|
|
|
2624.2.15
by Robert Collins
Add useful -Dindex flag. |
459 |
if 'index' in debug.debug_flags: |
|
5753.2.2
by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports. |
460 |
trace.mutter('Reading entire index %s', |
461 |
self._transport.abspath(self._name)) |
|
|
3665.3.1
by John Arbash Meinel
Updates to GraphIndex processing. |
462 |
if stream is None: |
463 |
stream = self._transport.get(self._name) |
|
|
5074.4.3
by John Arbash Meinel
Actually implement offset support for GraphIndex. |
464 |
if self._base_offset != 0: |
465 |
# This is wasteful, but it is better than dealing with
|
|
466 |
# adjusting all the offsets, etc.
|
|
|
6621.22.2
by Martin
Use BytesIO or StringIO from bzrlib.sixish |
467 |
stream = BytesIO(stream.read()[self._base_offset:]) |
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
468 |
self._read_prefix(stream) |
|
2890.2.17
by Robert Collins
Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. |
469 |
self._expected_elements = 3 + self._key_length |
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
470 |
line_count = 0 |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
471 |
# raw data keyed by offset
|
472 |
self._keys_by_offset = {} |
|
473 |
# ready-to-return key:value or key:value, node_ref_lists
|
|
474 |
self._nodes = {} |
|
|
3711.3.13
by John Arbash Meinel
Shave off another 5s by not building 'node_by_key' |
475 |
self._nodes_by_key = None |
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
476 |
trailers = 0 |
477 |
pos = stream.tell() |
|
|
2890.2.17
by Robert Collins
Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. |
478 |
lines = stream.read().split('\n') |
|
4708.2.1
by Martin
Ensure all files opened by bazaar proper are explicitly closed |
479 |
# GZ 2009-09-20: Should really use a try/finally block to ensure close
|
480 |
stream.close() |
|
|
2890.2.17
by Robert Collins
Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. |
481 |
del lines[-1] |
482 |
_, _, _, trailers = self._parse_lines(lines, pos) |
|
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
483 |
for key, absent, references, value in viewvalues(self._keys_by_offset): |
|
2592.1.30
by Robert Collins
Absent entries are not yeilded. |
484 |
if absent: |
485 |
continue
|
|
|
2592.1.28
by Robert Collins
Basic two pass iter_all_entries. |
486 |
# resolve references:
|
487 |
if self.node_ref_lists: |
|
|
2890.2.17
by Robert Collins
Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. |
488 |
node_value = (value, self._resolve_references(references)) |
|
2592.1.28
by Robert Collins
Basic two pass iter_all_entries. |
489 |
else: |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
490 |
node_value = value |
491 |
self._nodes[key] = node_value |
|
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
492 |
# cache the keys for quick set intersections
|
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
493 |
if trailers != 1: |
494 |
# there must be one line - the empty trailer line.
|
|
495 |
raise errors.BadIndexData(self) |
|
496 |
||
|
4744.2.6
by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member. |
497 |
def clear_cache(self): |
498 |
"""Clear out any cached/memoized values. |
|
499 |
||
500 |
This can be called at any time, but generally it is used when we have
|
|
501 |
extracted some information, but don't expect to be requesting any more
|
|
502 |
from this index.
|
|
503 |
"""
|
|
504 |
||
|
4011.5.11
by Robert Collins
Polish the KnitVersionedFiles.scan_unvalidated_index api. |
505 |
def external_references(self, ref_list_num): |
|
4011.5.2
by Andrew Bennetts
Add more tests, improve existing tests, add GraphIndex._external_references() |
506 |
"""Return references that are not present in this index. |
507 |
"""
|
|
508 |
self._buffer_all() |
|
|
4011.5.3
by Andrew Bennetts
Implement and test external_references on GraphIndex and BTreeGraphIndex. |
509 |
if ref_list_num + 1 > self.node_ref_lists: |
510 |
raise ValueError('No ref list %d, index has %d ref lists' |
|
511 |
% (ref_list_num, self.node_ref_lists)) |
|
|
4011.5.2
by Andrew Bennetts
Add more tests, improve existing tests, add GraphIndex._external_references() |
512 |
refs = set() |
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
513 |
nodes = self._nodes |
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
514 |
for key, (value, ref_lists) in viewitems(nodes): |
|
4011.5.2
by Andrew Bennetts
Add more tests, improve existing tests, add GraphIndex._external_references() |
515 |
ref_list = ref_lists[ref_list_num] |
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
516 |
refs.update([ref for ref in ref_list if ref not in nodes]) |
517 |
return refs |
|
|
4011.5.2
by Andrew Bennetts
Add more tests, improve existing tests, add GraphIndex._external_references() |
518 |
|
|
3711.3.21
by John Arbash Meinel
Fix GraphIndex to properly generate _nodes_by_keys on demand. |
519 |
def _get_nodes_by_key(self): |
520 |
if self._nodes_by_key is None: |
|
521 |
nodes_by_key = {} |
|
522 |
if self.node_ref_lists: |
|
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
523 |
for key, (value, references) in viewitems(self._nodes): |
|
3711.3.21
by John Arbash Meinel
Fix GraphIndex to properly generate _nodes_by_keys on demand. |
524 |
key_dict = nodes_by_key |
525 |
for subkey in key[:-1]: |
|
526 |
key_dict = key_dict.setdefault(subkey, {}) |
|
527 |
key_dict[key[-1]] = key, value, references |
|
528 |
else: |
|
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
529 |
for key, value in viewitems(self._nodes): |
|
3711.3.21
by John Arbash Meinel
Fix GraphIndex to properly generate _nodes_by_keys on demand. |
530 |
key_dict = nodes_by_key |
531 |
for subkey in key[:-1]: |
|
532 |
key_dict = key_dict.setdefault(subkey, {}) |
|
533 |
key_dict[key[-1]] = key, value |
|
534 |
self._nodes_by_key = nodes_by_key |
|
535 |
return self._nodes_by_key |
|
536 |
||
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
537 |
def iter_all_entries(self): |
538 |
"""Iterate over all keys within the index. |
|
539 |
||
|
2592.5.1
by Martin Pool
Fix docstrings for Index.iter_entries etc |
540 |
:return: An iterable of (index, key, value) or (index, key, value, reference_lists).
|
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
541 |
The former tuple is used when there are no reference lists in the
|
542 |
index, making the API compatible with simple key:value index types.
|
|
543 |
There is no defined order for the result iteration - it will be in
|
|
544 |
the most efficient order for the index.
|
|
545 |
"""
|
|
|
2745.1.1
by Robert Collins
Add a number of -Devil checkpoints. |
546 |
if 'evil' in debug.debug_flags: |
|
2592.3.112
by Robert Collins
Various fixups found dogfooding. |
547 |
trace.mutter_callsite(3, |
|
2745.1.2
by Robert Collins
Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. |
548 |
"iter_all_entries scales with size of history.") |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
549 |
if self._nodes is None: |
550 |
self._buffer_all() |
|
551 |
if self.node_ref_lists: |
|
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
552 |
for key, (value, node_ref_lists) in viewitems(self._nodes): |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
553 |
yield self, key, value, node_ref_lists |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
554 |
else: |
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
555 |
for key, value in viewitems(self._nodes): |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
556 |
yield self, key, value |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
557 |
|
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
558 |
def _read_prefix(self, stream): |
559 |
signature = stream.read(len(self._signature())) |
|
560 |
if not signature == self._signature(): |
|
561 |
raise errors.BadIndexFormatSignature(self._name, GraphIndex) |
|
562 |
options_line = stream.readline() |
|
563 |
if not options_line.startswith(_OPTION_NODE_REFS): |
|
564 |
raise errors.BadIndexOptions(self) |
|
565 |
try: |
|
566 |
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1]) |
|
567 |
except ValueError: |
|
568 |
raise errors.BadIndexOptions(self) |
|
|
2624.2.8
by Robert Collins
Explicitly mark the number of keys elements in use in GraphIndex files. |
569 |
options_line = stream.readline() |
570 |
if not options_line.startswith(_OPTION_KEY_ELEMENTS): |
|
571 |
raise errors.BadIndexOptions(self) |
|
572 |
try: |
|
573 |
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1]) |
|
574 |
except ValueError: |
|
575 |
raise errors.BadIndexOptions(self) |
|
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
576 |
options_line = stream.readline() |
577 |
if not options_line.startswith(_OPTION_LEN): |
|
578 |
raise errors.BadIndexOptions(self) |
|
579 |
try: |
|
580 |
self._key_count = int(options_line[len(_OPTION_LEN):-1]) |
|
581 |
except ValueError: |
|
582 |
raise errors.BadIndexOptions(self) |
|
|
2592.1.5
by Robert Collins
Trivial index reading. |
583 |
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
584 |
def _resolve_references(self, references): |
|
2890.2.16
by Robert Collins
Review feedback. |
585 |
"""Return the resolved key references for references. |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
586 |
|
|
2890.2.16
by Robert Collins
Review feedback. |
587 |
References are resolved by looking up the location of the key in the
|
588 |
_keys_by_offset map and substituting the key name, preserving ordering.
|
|
589 |
||
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
590 |
:param references: An iterable of iterables of key locations. e.g.
|
|
2890.2.16
by Robert Collins
Review feedback. |
591 |
[[123, 456], [123]]
|
592 |
:return: A tuple of tuples of keys.
|
|
593 |
"""
|
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
594 |
node_refs = [] |
595 |
for ref_list in references: |
|
596 |
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list])) |
|
597 |
return tuple(node_refs) |
|
598 |
||
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
599 |
def _find_index(self, range_map, key): |
600 |
"""Helper for the _parsed_*_index calls. |
|
601 |
||
602 |
Given a range map - [(start, end), ...], finds the index of the range
|
|
603 |
in the map for key if it is in the map, and if it is not there, the
|
|
604 |
immediately preceeding range in the map.
|
|
605 |
"""
|
|
606 |
result = bisect_right(range_map, key) - 1 |
|
607 |
if result + 1 < len(range_map): |
|
608 |
# check the border condition, it may be in result + 1
|
|
609 |
if range_map[result + 1][0] == key[0]: |
|
610 |
return result + 1 |
|
611 |
return result |
|
612 |
||
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
613 |
def _parsed_byte_index(self, offset): |
614 |
"""Return the index of the entry immediately before offset. |
|
615 |
||
616 |
e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that
|
|
617 |
there is one unparsed byte (the 11th, addressed as[10]). then:
|
|
618 |
asking for 0 will return 0
|
|
619 |
asking for 10 will return 0
|
|
620 |
asking for 11 will return 1
|
|
621 |
asking for 12 will return 1
|
|
622 |
"""
|
|
623 |
key = (offset, 0) |
|
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
624 |
return self._find_index(self._parsed_byte_map, key) |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
625 |
|
626 |
def _parsed_key_index(self, key): |
|
627 |
"""Return the index of the entry immediately before key. |
|
628 |
||
629 |
e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed,
|
|
630 |
meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive
|
|
631 |
have been parsed, then:
|
|
632 |
asking for '' will return 0
|
|
633 |
asking for 'a' will return 0
|
|
634 |
asking for 'b' will return 1
|
|
635 |
asking for 'e' will return 1
|
|
636 |
"""
|
|
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
637 |
search_key = (key, None) |
638 |
return self._find_index(self._parsed_key_map, search_key) |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
639 |
|
640 |
def _is_parsed(self, offset): |
|
641 |
"""Returns True if offset has been parsed.""" |
|
642 |
index = self._parsed_byte_index(offset) |
|
643 |
if index == len(self._parsed_byte_map): |
|
644 |
return offset < self._parsed_byte_map[index - 1][1] |
|
645 |
start, end = self._parsed_byte_map[index] |
|
646 |
return offset >= start and offset < end |
|
647 |
||
|
2890.2.7
by Robert Collins
* Pack indices are now partially parsed for specific key lookup using a |
648 |
def _iter_entries_from_total_buffer(self, keys): |
649 |
"""Iterate over keys when the entire index is parsed.""" |
|
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
650 |
# Note: See the note in BTreeBuilder.iter_entries for why we don't use
|
651 |
# .intersection() here
|
|
652 |
nodes = self._nodes |
|
653 |
keys = [key for key in keys if key in nodes] |
|
|
2624.2.3
by Robert Collins
Make GraphIndex.iter_entries do hash lookups rather than table scans. |
654 |
if self.node_ref_lists: |
655 |
for key in keys: |
|
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
656 |
value, node_refs = nodes[key] |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
657 |
yield self, key, value, node_refs |
|
2624.2.3
by Robert Collins
Make GraphIndex.iter_entries do hash lookups rather than table scans. |
658 |
else: |
659 |
for key in keys: |
|
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
660 |
yield self, key, nodes[key] |
|
2592.1.7
by Robert Collins
A validate that goes boom. |
661 |
|
|
2890.2.7
by Robert Collins
* Pack indices are now partially parsed for specific key lookup using a |
662 |
def iter_entries(self, keys): |
663 |
"""Iterate over keys within the index. |
|
664 |
||
665 |
:param keys: An iterable providing the keys to be retrieved.
|
|
666 |
:return: An iterable as per iter_all_entries, but restricted to the
|
|
667 |
keys supplied. No additional keys will be returned, and every
|
|
668 |
key supplied that is in the index will be returned.
|
|
669 |
"""
|
|
670 |
keys = set(keys) |
|
671 |
if not keys: |
|
672 |
return [] |
|
|
2890.2.8
by Robert Collins
Make the size of the index optionally None for the pack-names index. |
673 |
if self._size is None and self._nodes is None: |
674 |
self._buffer_all() |
|
|
3665.3.3
by John Arbash Meinel
If we read more than 50% of the whole index, |
675 |
|
|
3606.6.1
by Robert Collins
Cherry-pick Robert's index buffering. |
676 |
# We fit about 20 keys per minimum-read (4K), so if we are looking for
|
677 |
# more than 1/20th of the index its likely (assuming homogenous key
|
|
678 |
# spread) that we'll read the entire index. If we're going to do that,
|
|
679 |
# buffer the whole thing. A better analysis might take key spread into
|
|
680 |
# account - but B+Tree indices are better anyway.
|
|
681 |
# We could look at all data read, and use a threshold there, which will
|
|
682 |
# trigger on ancestry walks, but that is not yet fully mapped out.
|
|
683 |
if self._nodes is None and len(keys) * 20 > self.key_count(): |
|
684 |
self._buffer_all() |
|
|
2890.2.7
by Robert Collins
* Pack indices are now partially parsed for specific key lookup using a |
685 |
if self._nodes is not None: |
686 |
return self._iter_entries_from_total_buffer(keys) |
|
687 |
else: |
|
|
5753.2.2
by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports. |
688 |
return (result[1] for result in bisect_multi.bisect_multi_bytes( |
|
2890.2.18
by Robert Collins
Review feedback. |
689 |
self._lookup_keys_via_location, self._size, keys)) |
|
2890.2.7
by Robert Collins
* Pack indices are now partially parsed for specific key lookup using a |
690 |
|
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
691 |
def iter_entries_prefix(self, keys): |
692 |
"""Iterate over keys within the index using prefix matching. |
|
693 |
||
694 |
Prefix matching is applied within the tuple of a key, not to within
|
|
695 |
the bytestring of each key element. e.g. if you have the keys ('foo',
|
|
696 |
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
|
|
697 |
only the former key is returned.
|
|
698 |
||
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
699 |
WARNING: Note that this method currently causes a full index parse
|
700 |
unconditionally (which is reasonably appropriate as it is a means for
|
|
701 |
thunking many small indices into one larger one and still supplies
|
|
702 |
iter_all_entries at the thunk layer).
|
|
703 |
||
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
704 |
:param keys: An iterable providing the key prefixes to be retrieved.
|
705 |
Each key prefix takes the form of a tuple the length of a key, but
|
|
706 |
with the last N elements 'None' rather than a regular bytestring.
|
|
707 |
The first element cannot be 'None'.
|
|
708 |
:return: An iterable as per iter_all_entries, but restricted to the
|
|
709 |
keys with a matching prefix to those supplied. No additional keys
|
|
710 |
will be returned, and every match that is in the index will be
|
|
711 |
returned.
|
|
712 |
"""
|
|
713 |
keys = set(keys) |
|
714 |
if not keys: |
|
715 |
return
|
|
716 |
# load data - also finds key lengths
|
|
717 |
if self._nodes is None: |
|
718 |
self._buffer_all() |
|
719 |
if self._key_length == 1: |
|
720 |
for key in keys: |
|
|
6654.1.1
by Martin
Factor out some copycode in iter_entries_prefix implementations |
721 |
_sanity_check_key(self, key) |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
722 |
if self.node_ref_lists: |
723 |
value, node_refs = self._nodes[key] |
|
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
724 |
yield self, key, value, node_refs |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
725 |
else: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
726 |
yield self, key, self._nodes[key] |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
727 |
return
|
|
3711.3.21
by John Arbash Meinel
Fix GraphIndex to properly generate _nodes_by_keys on demand. |
728 |
nodes_by_key = self._get_nodes_by_key() |
|
6654.1.1
by Martin
Factor out some copycode in iter_entries_prefix implementations |
729 |
for entry in _iter_entries_prefix(self, nodes_by_key, keys): |
730 |
yield entry |
|
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
731 |
|
|
4593.4.12
by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry() |
732 |
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys): |
733 |
"""See BTreeIndex._find_ancestors.""" |
|
|
4593.4.7
by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex. |
734 |
# The api can be implemented as a trivial overlay on top of
|
735 |
# iter_entries, it is not an efficient implementation, but it at least
|
|
736 |
# gets the job done.
|
|
737 |
found_keys = set() |
|
738 |
search_keys = set() |
|
739 |
for index, key, value, refs in self.iter_entries(keys): |
|
740 |
parent_keys = refs[ref_list_num] |
|
741 |
found_keys.add(key) |
|
742 |
parent_map[key] = parent_keys |
|
743 |
search_keys.update(parent_keys) |
|
744 |
# Figure out what, if anything, was missing
|
|
745 |
missing_keys.update(set(keys).difference(found_keys)) |
|
746 |
search_keys = search_keys.difference(parent_map) |
|
747 |
return search_keys |
|
748 |
||
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
749 |
def key_count(self): |
750 |
"""Return an estimate of the number of keys in this index. |
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
751 |
|
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
752 |
For GraphIndex the estimate is exact.
|
753 |
"""
|
|
754 |
if self._key_count is None: |
|
|
2979.1.1
by Robert Collins
Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily. |
755 |
self._read_and_parse([_HEADER_READV]) |
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
756 |
return self._key_count |
757 |
||
|
2890.2.18
by Robert Collins
Review feedback. |
758 |
def _lookup_keys_via_location(self, location_keys): |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
759 |
"""Public interface for implementing bisection. |
760 |
||
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
761 |
If _buffer_all has been called, then all the data for the index is in
|
762 |
memory, and this method should not be called, as it uses a separate
|
|
763 |
cache because it cannot pre-resolve all indices, which buffer_all does
|
|
764 |
for performance.
|
|
765 |
||
|
2890.2.16
by Robert Collins
Review feedback. |
766 |
:param location_keys: A list of location(byte offset), key tuples.
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
767 |
:return: A list of (location_key, result) tuples as expected by
|
|
6622.1.34
by Jelmer Vernooij
Rename brzlib => breezy. |
768 |
breezy.bisect_multi.bisect_multi_bytes.
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
769 |
"""
|
770 |
# Possible improvements:
|
|
771 |
# - only bisect lookup each key once
|
|
772 |
# - sort the keys first, and use that to reduce the bisection window
|
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
773 |
# -----
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
774 |
# this progresses in three parts:
|
775 |
# read data
|
|
776 |
# parse it
|
|
777 |
# attempt to answer the question from the now in memory data.
|
|
778 |
# build the readv request
|
|
779 |
# for each location, ask for 800 bytes - much more than rows we've seen
|
|
780 |
# anywhere.
|
|
781 |
readv_ranges = [] |
|
782 |
for location, key in location_keys: |
|
783 |
# can we answer from cache?
|
|
|
2911.3.1
by Robert Collins
(robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). |
784 |
if self._bisect_nodes and key in self._bisect_nodes: |
785 |
# We have the key parsed.
|
|
786 |
continue
|
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
787 |
index = self._parsed_key_index(key) |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
788 |
if (len(self._parsed_key_map) and |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
789 |
self._parsed_key_map[index][0] <= key and |
|
2911.3.1
by Robert Collins
(robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). |
790 |
(self._parsed_key_map[index][1] >= key or |
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
791 |
# end of the file has been parsed
|
792 |
self._parsed_byte_map[index][1] == self._size)): |
|
|
2911.3.1
by Robert Collins
(robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). |
793 |
# the key has been parsed, so no lookup is needed even if its
|
794 |
# not present.
|
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
795 |
continue
|
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
796 |
# - if we have examined this part of the file already - yes
|
797 |
index = self._parsed_byte_index(location) |
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
798 |
if (len(self._parsed_byte_map) and |
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
799 |
self._parsed_byte_map[index][0] <= location and |
800 |
self._parsed_byte_map[index][1] > location): |
|
801 |
# the byte region has been parsed, so no read is needed.
|
|
802 |
continue
|
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
803 |
length = 800 |
804 |
if location + length > self._size: |
|
805 |
length = self._size - location |
|
806 |
# todo, trim out parsed locations.
|
|
807 |
if length > 0: |
|
808 |
readv_ranges.append((location, length)) |
|
809 |
# read the header if needed
|
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
810 |
if self._bisect_nodes is None: |
|
2979.1.1
by Robert Collins
Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily. |
811 |
readv_ranges.append(_HEADER_READV) |
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
812 |
self._read_and_parse(readv_ranges) |
|
3665.3.1
by John Arbash Meinel
Updates to GraphIndex processing. |
813 |
result = [] |
814 |
if self._nodes is not None: |
|
815 |
# _read_and_parse triggered a _buffer_all because we requested the
|
|
816 |
# whole data range
|
|
817 |
for location, key in location_keys: |
|
818 |
if key not in self._nodes: # not present |
|
819 |
result.append(((location, key), False)) |
|
820 |
elif self.node_ref_lists: |
|
821 |
value, refs = self._nodes[key] |
|
822 |
result.append(((location, key), |
|
823 |
(self, key, value, refs))) |
|
824 |
else: |
|
825 |
result.append(((location, key), |
|
826 |
(self, key, self._nodes[key]))) |
|
827 |
return result |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
828 |
# generate results:
|
829 |
# - figure out <, >, missing, present
|
|
830 |
# - result present references so we can return them.
|
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
831 |
# keys that we cannot answer until we resolve references
|
832 |
pending_references = [] |
|
833 |
pending_locations = set() |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
834 |
for location, key in location_keys: |
835 |
# can we answer from cache?
|
|
|
2911.3.1
by Robert Collins
(robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). |
836 |
if key in self._bisect_nodes: |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
837 |
# the key has been parsed, so no lookup is needed
|
|
2911.3.1
by Robert Collins
(robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). |
838 |
if self.node_ref_lists: |
839 |
# the references may not have been all parsed.
|
|
840 |
value, refs = self._bisect_nodes[key] |
|
841 |
wanted_locations = [] |
|
842 |
for ref_list in refs: |
|
843 |
for ref in ref_list: |
|
844 |
if ref not in self._keys_by_offset: |
|
845 |
wanted_locations.append(ref) |
|
846 |
if wanted_locations: |
|
847 |
pending_locations.update(wanted_locations) |
|
848 |
pending_references.append((location, key)) |
|
849 |
continue
|
|
850 |
result.append(((location, key), (self, key, |
|
851 |
value, self._resolve_references(refs)))) |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
852 |
else: |
|
2911.3.1
by Robert Collins
(robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). |
853 |
result.append(((location, key), |
854 |
(self, key, self._bisect_nodes[key]))) |
|
855 |
continue
|
|
856 |
else: |
|
857 |
# has the region the key should be in, been parsed?
|
|
858 |
index = self._parsed_key_index(key) |
|
859 |
if (self._parsed_key_map[index][0] <= key and |
|
860 |
(self._parsed_key_map[index][1] >= key or |
|
861 |
# end of the file has been parsed
|
|
862 |
self._parsed_byte_map[index][1] == self._size)): |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
863 |
result.append(((location, key), False)) |
|
2911.3.1
by Robert Collins
(robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). |
864 |
continue
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
865 |
# no, is the key above or below the probed location:
|
866 |
# get the range of the probed & parsed location
|
|
867 |
index = self._parsed_byte_index(location) |
|
868 |
# if the key is below the start of the range, its below
|
|
869 |
if key < self._parsed_key_map[index][0]: |
|
870 |
direction = -1 |
|
871 |
else: |
|
872 |
direction = +1 |
|
873 |
result.append(((location, key), direction)) |
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
874 |
readv_ranges = [] |
875 |
# lookup data to resolve references
|
|
876 |
for location in pending_locations: |
|
877 |
length = 800 |
|
878 |
if location + length > self._size: |
|
879 |
length = self._size - location |
|
880 |
# TODO: trim out parsed locations (e.g. if the 800 is into the
|
|
|
2890.2.16
by Robert Collins
Review feedback. |
881 |
# parsed region trim it, and dont use the adjust_for_latency
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
882 |
# facility)
|
883 |
if length > 0: |
|
884 |
readv_ranges.append((location, length)) |
|
885 |
self._read_and_parse(readv_ranges) |
|
|
3665.3.5
by John Arbash Meinel
Move the point at which we 'buffer_all' if we've read >50% of the index. |
886 |
if self._nodes is not None: |
887 |
# The _read_and_parse triggered a _buffer_all, grab the data and
|
|
888 |
# return it
|
|
889 |
for location, key in pending_references: |
|
890 |
value, refs = self._nodes[key] |
|
891 |
result.append(((location, key), (self, key, value, refs))) |
|
892 |
return result |
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
893 |
for location, key in pending_references: |
894 |
# answer key references we had to look-up-late.
|
|
895 |
value, refs = self._bisect_nodes[key] |
|
896 |
result.append(((location, key), (self, key, |
|
897 |
value, self._resolve_references(refs)))) |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
898 |
return result |
899 |
||
900 |
def _parse_header_from_bytes(self, bytes): |
|
901 |
"""Parse the header from a region of bytes. |
|
902 |
||
903 |
:param bytes: The data to parse.
|
|
904 |
:return: An offset, data tuple such as readv yields, for the unparsed
|
|
905 |
data. (which may length 0).
|
|
906 |
"""
|
|
907 |
signature = bytes[0:len(self._signature())] |
|
908 |
if not signature == self._signature(): |
|
909 |
raise errors.BadIndexFormatSignature(self._name, GraphIndex) |
|
910 |
lines = bytes[len(self._signature()):].splitlines() |
|
911 |
options_line = lines[0] |
|
912 |
if not options_line.startswith(_OPTION_NODE_REFS): |
|
913 |
raise errors.BadIndexOptions(self) |
|
914 |
try: |
|
915 |
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):]) |
|
916 |
except ValueError: |
|
917 |
raise errors.BadIndexOptions(self) |
|
918 |
options_line = lines[1] |
|
919 |
if not options_line.startswith(_OPTION_KEY_ELEMENTS): |
|
920 |
raise errors.BadIndexOptions(self) |
|
921 |
try: |
|
922 |
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):]) |
|
923 |
except ValueError: |
|
924 |
raise errors.BadIndexOptions(self) |
|
925 |
options_line = lines[2] |
|
926 |
if not options_line.startswith(_OPTION_LEN): |
|
927 |
raise errors.BadIndexOptions(self) |
|
928 |
try: |
|
929 |
self._key_count = int(options_line[len(_OPTION_LEN):]) |
|
930 |
except ValueError: |
|
931 |
raise errors.BadIndexOptions(self) |
|
932 |
# calculate the bytes we have processed
|
|
933 |
header_end = (len(signature) + len(lines[0]) + len(lines[1]) + |
|
934 |
len(lines[2]) + 3) |
|
935 |
self._parsed_bytes(0, None, header_end, None) |
|
936 |
# setup parsing state
|
|
937 |
self._expected_elements = 3 + self._key_length |
|
938 |
# raw data keyed by offset
|
|
939 |
self._keys_by_offset = {} |
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
940 |
# keys with the value and node references
|
941 |
self._bisect_nodes = {} |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
942 |
return header_end, bytes[header_end:] |
943 |
||
944 |
def _parse_region(self, offset, data): |
|
945 |
"""Parse node data returned from a readv operation. |
|
946 |
||
947 |
:param offset: The byte offset the data starts at.
|
|
948 |
:param data: The data to parse.
|
|
949 |
"""
|
|
950 |
# trim the data.
|
|
951 |
# end first:
|
|
952 |
end = offset + len(data) |
|
|
2890.2.15
by Robert Collins
Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that. |
953 |
high_parsed = offset |
|
2890.2.14
by Robert Collins
Parse more than one segment of data from a single readv response if needed. |
954 |
while True: |
955 |
# Trivial test - if the current index's end is within the
|
|
956 |
# low-matching parsed range, we're done.
|
|
|
2890.2.15
by Robert Collins
Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that. |
957 |
index = self._parsed_byte_index(high_parsed) |
|
2890.2.14
by Robert Collins
Parse more than one segment of data from a single readv response if needed. |
958 |
if end < self._parsed_byte_map[index][1]: |
959 |
return
|
|
|
2890.2.15
by Robert Collins
Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that. |
960 |
# print "[%d:%d]" % (offset, end), \
|
961 |
# self._parsed_byte_map[index:index + 2]
|
|
962 |
high_parsed, last_segment = self._parse_segment( |
|
963 |
offset, data, end, index) |
|
964 |
if last_segment: |
|
|
2890.2.14
by Robert Collins
Parse more than one segment of data from a single readv response if needed. |
965 |
return
|
966 |
||
967 |
def _parse_segment(self, offset, data, end, index): |
|
968 |
"""Parse one segment of data. |
|
969 |
||
970 |
:param offset: Where 'data' begins in the file.
|
|
971 |
:param data: Some data to parse a segment of.
|
|
972 |
:param end: Where data ends
|
|
973 |
:param index: The current index into the parsed bytes map.
|
|
974 |
:return: True if the parsed segment is the last possible one in the
|
|
975 |
range of data.
|
|
|
2890.2.15
by Robert Collins
Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that. |
976 |
:return: high_parsed_byte, last_segment.
|
977 |
high_parsed_byte is the location of the highest parsed byte in this
|
|
978 |
segment, last_segment is True if the parsed segment is the last
|
|
979 |
possible one in the data block.
|
|
|
2890.2.14
by Robert Collins
Parse more than one segment of data from a single readv response if needed. |
980 |
"""
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
981 |
# default is to use all data
|
982 |
trim_end = None |
|
983 |
# accomodate overlap with data before this.
|
|
984 |
if offset < self._parsed_byte_map[index][1]: |
|
985 |
# overlaps the lower parsed region
|
|
986 |
# skip the parsed data
|
|
987 |
trim_start = self._parsed_byte_map[index][1] - offset |
|
988 |
# don't trim the start for \n
|
|
989 |
start_adjacent = True |
|
990 |
elif offset == self._parsed_byte_map[index][1]: |
|
991 |
# abuts the lower parsed region
|
|
992 |
# use all data
|
|
993 |
trim_start = None |
|
994 |
# do not trim anything
|
|
995 |
start_adjacent = True |
|
996 |
else: |
|
997 |
# does not overlap the lower parsed region
|
|
998 |
# use all data
|
|
999 |
trim_start = None |
|
1000 |
# but trim the leading \n
|
|
1001 |
start_adjacent = False |
|
1002 |
if end == self._size: |
|
1003 |
# lines up to the end of all data:
|
|
1004 |
# use it all
|
|
1005 |
trim_end = None |
|
1006 |
# do not strip to the last \n
|
|
1007 |
end_adjacent = True |
|
|
2890.2.14
by Robert Collins
Parse more than one segment of data from a single readv response if needed. |
1008 |
last_segment = True |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1009 |
elif index + 1 == len(self._parsed_byte_map): |
1010 |
# at the end of the parsed data
|
|
1011 |
# use it all
|
|
1012 |
trim_end = None |
|
1013 |
# but strip to the last \n
|
|
1014 |
end_adjacent = False |
|
|
2890.2.14
by Robert Collins
Parse more than one segment of data from a single readv response if needed. |
1015 |
last_segment = True |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1016 |
elif end == self._parsed_byte_map[index + 1][0]: |
1017 |
# buts up against the next parsed region
|
|
1018 |
# use it all
|
|
1019 |
trim_end = None |
|
1020 |
# do not strip to the last \n
|
|
1021 |
end_adjacent = True |
|
|
2890.2.14
by Robert Collins
Parse more than one segment of data from a single readv response if needed. |
1022 |
last_segment = True |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1023 |
elif end > self._parsed_byte_map[index + 1][0]: |
1024 |
# overlaps into the next parsed region
|
|
1025 |
# only consider the unparsed data
|
|
1026 |
trim_end = self._parsed_byte_map[index + 1][0] - offset |
|
1027 |
# do not strip to the last \n as we know its an entire record
|
|
1028 |
end_adjacent = True |
|
|
2890.2.14
by Robert Collins
Parse more than one segment of data from a single readv response if needed. |
1029 |
last_segment = end < self._parsed_byte_map[index + 1][1] |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1030 |
else: |
1031 |
# does not overlap into the next region
|
|
1032 |
# use it all
|
|
1033 |
trim_end = None |
|
1034 |
# but strip to the last \n
|
|
1035 |
end_adjacent = False |
|
|
2890.2.14
by Robert Collins
Parse more than one segment of data from a single readv response if needed. |
1036 |
last_segment = True |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1037 |
# now find bytes to discard if needed
|
1038 |
if not start_adjacent: |
|
1039 |
# work around python bug in rfind
|
|
1040 |
if trim_start is None: |
|
1041 |
trim_start = data.find('\n') + 1 |
|
1042 |
else: |
|
1043 |
trim_start = data.find('\n', trim_start) + 1 |
|
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
1044 |
if not (trim_start != 0): |
1045 |
raise AssertionError('no \n was present') |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1046 |
# print 'removing start', offset, trim_start, repr(data[:trim_start])
|
1047 |
if not end_adjacent: |
|
1048 |
# work around python bug in rfind
|
|
1049 |
if trim_end is None: |
|
1050 |
trim_end = data.rfind('\n') + 1 |
|
1051 |
else: |
|
1052 |
trim_end = data.rfind('\n', None, trim_end) + 1 |
|
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
1053 |
if not (trim_end != 0): |
1054 |
raise AssertionError('no \n was present') |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1055 |
# print 'removing end', offset, trim_end, repr(data[trim_end:])
|
1056 |
# adjust offset and data to the parseable data.
|
|
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
1057 |
trimmed_data = data[trim_start:trim_end] |
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
1058 |
if not (trimmed_data): |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1059 |
raise AssertionError('read unneeded data [%d:%d] from [%d:%d]' |
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
1060 |
% (trim_start, trim_end, offset, offset + len(data))) |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1061 |
if trim_start: |
1062 |
offset += trim_start |
|
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
1063 |
# print "parsing", repr(trimmed_data)
|
|
2890.2.10
by Robert Collins
Add test coverage to ensure \r's are not mangled by bisection parsing. |
1064 |
# splitlines mangles the \r delimiters.. don't use it.
|
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
1065 |
lines = trimmed_data.split('\n') |
|
2890.2.9
by Robert Collins
Don't use splitlines for index data parsing, we embed \r. |
1066 |
del lines[-1] |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1067 |
pos = offset |
|
2890.2.17
by Robert Collins
Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. |
1068 |
first_key, last_key, nodes, _ = self._parse_lines(lines, pos) |
1069 |
for key, value in nodes: |
|
1070 |
self._bisect_nodes[key] = value |
|
1071 |
self._parsed_bytes(offset, first_key, |
|
1072 |
offset + len(trimmed_data), last_key) |
|
1073 |
return offset + len(trimmed_data), last_segment |
|
1074 |
||
1075 |
def _parse_lines(self, lines, pos): |
|
1076 |
key = None |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1077 |
first_key = None |
|
2890.2.17
by Robert Collins
Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. |
1078 |
trailers = 0 |
1079 |
nodes = [] |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1080 |
for line in lines: |
1081 |
if line == '': |
|
1082 |
# must be at the end
|
|
|
2890.2.17
by Robert Collins
Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. |
1083 |
if self._size: |
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
1084 |
if not (self._size == pos + 1): |
1085 |
raise AssertionError("%s %s" % (self._size, pos)) |
|
|
2890.2.17
by Robert Collins
Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. |
1086 |
trailers += 1 |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1087 |
continue
|
1088 |
elements = line.split('\0') |
|
1089 |
if len(elements) != self._expected_elements: |
|
1090 |
raise errors.BadIndexData(self) |
|
|
3530.3.3
by Robert Collins
Credit and explanation for interning. |
1091 |
# keys are tuples. Each element is a string that may occur many
|
1092 |
# times, so we intern them to save space. AB, RC, 200807
|
|
|
3711.3.13
by John Arbash Meinel
Shave off another 5s by not building 'node_by_key' |
1093 |
key = tuple([intern(element) for element in elements[:self._key_length]]) |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1094 |
if first_key is None: |
1095 |
first_key = key |
|
1096 |
absent, references, value = elements[-3:] |
|
1097 |
ref_lists = [] |
|
1098 |
for ref_string in references.split('\t'): |
|
1099 |
ref_lists.append(tuple([ |
|
1100 |
int(ref) for ref in ref_string.split('\r') if ref |
|
1101 |
]))
|
|
1102 |
ref_lists = tuple(ref_lists) |
|
1103 |
self._keys_by_offset[pos] = (key, absent, ref_lists, value) |
|
1104 |
pos += len(line) + 1 # +1 for the \n |
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
1105 |
if absent: |
1106 |
continue
|
|
1107 |
if self.node_ref_lists: |
|
1108 |
node_value = (value, ref_lists) |
|
1109 |
else: |
|
1110 |
node_value = value |
|
|
2890.2.17
by Robert Collins
Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. |
1111 |
nodes.append((key, node_value)) |
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
1112 |
# print "parsed ", key
|
|
2890.2.17
by Robert Collins
Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. |
1113 |
return first_key, key, nodes, trailers |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1114 |
|
1115 |
def _parsed_bytes(self, start, start_key, end, end_key): |
|
1116 |
"""Mark the bytes from start to end as parsed. |
|
1117 |
||
1118 |
Calling self._parsed_bytes(1,2) will mark one byte (the one at offset
|
|
1119 |
1) as parsed.
|
|
1120 |
||
1121 |
:param start: The start of the parsed region.
|
|
1122 |
:param end: The end of the parsed region.
|
|
1123 |
"""
|
|
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
1124 |
index = self._parsed_byte_index(start) |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1125 |
new_value = (start, end) |
1126 |
new_key = (start_key, end_key) |
|
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
1127 |
if index == -1: |
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1128 |
# first range parsed is always the beginning.
|
1129 |
self._parsed_byte_map.insert(index, new_value) |
|
1130 |
self._parsed_key_map.insert(index, new_key) |
|
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
1131 |
return
|
1132 |
# four cases:
|
|
1133 |
# new region
|
|
1134 |
# extend lower region
|
|
1135 |
# extend higher region
|
|
1136 |
# combine two regions
|
|
1137 |
if (index + 1 < len(self._parsed_byte_map) and |
|
1138 |
self._parsed_byte_map[index][1] == start and |
|
1139 |
self._parsed_byte_map[index + 1][0] == end): |
|
1140 |
# combine two regions
|
|
1141 |
self._parsed_byte_map[index] = (self._parsed_byte_map[index][0], |
|
1142 |
self._parsed_byte_map[index + 1][1]) |
|
1143 |
self._parsed_key_map[index] = (self._parsed_key_map[index][0], |
|
1144 |
self._parsed_key_map[index + 1][1]) |
|
|
2890.2.12
by Robert Collins
More index tweaks. |
1145 |
del self._parsed_byte_map[index + 1] |
1146 |
del self._parsed_key_map[index + 1] |
|
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
1147 |
elif self._parsed_byte_map[index][1] == start: |
1148 |
# extend the lower entry
|
|
1149 |
self._parsed_byte_map[index] = ( |
|
1150 |
self._parsed_byte_map[index][0], end) |
|
1151 |
self._parsed_key_map[index] = ( |
|
1152 |
self._parsed_key_map[index][0], end_key) |
|
1153 |
elif (index + 1 < len(self._parsed_byte_map) and |
|
1154 |
self._parsed_byte_map[index + 1][0] == end): |
|
1155 |
# extend the higher entry
|
|
1156 |
self._parsed_byte_map[index + 1] = ( |
|
1157 |
start, self._parsed_byte_map[index + 1][1]) |
|
1158 |
self._parsed_key_map[index + 1] = ( |
|
1159 |
start_key, self._parsed_key_map[index + 1][1]) |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1160 |
else: |
|
2890.2.11
by Robert Collins
Bisection improvements after integrating with packs. |
1161 |
# new entry
|
1162 |
self._parsed_byte_map.insert(index + 1, new_value) |
|
1163 |
self._parsed_key_map.insert(index + 1, new_key) |
|
|
2890.2.5
by Robert Collins
Create a content lookup function for bisection in GraphIndex. |
1164 |
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
1165 |
def _read_and_parse(self, readv_ranges): |
|
4775.1.1
by Martin Pool
Remove several 'the the' typos |
1166 |
"""Read the ranges and parse the resulting data. |
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
1167 |
|
1168 |
:param readv_ranges: A prepared readv range list.
|
|
1169 |
"""
|
|
|
3665.3.5
by John Arbash Meinel
Move the point at which we 'buffer_all' if we've read >50% of the index. |
1170 |
if not readv_ranges: |
1171 |
return
|
|
1172 |
if self._nodes is None and self._bytes_read * 2 >= self._size: |
|
1173 |
# We've already read more than 50% of the file and we are about to
|
|
1174 |
# request more data, just _buffer_all() and be done
|
|
1175 |
self._buffer_all() |
|
1176 |
return
|
|
1177 |
||
|
5074.4.3
by John Arbash Meinel
Actually implement offset support for GraphIndex. |
1178 |
base_offset = self._base_offset |
1179 |
if base_offset != 0: |
|
1180 |
# Rewrite the ranges for the offset
|
|
1181 |
readv_ranges = [(start+base_offset, size) |
|
1182 |
for start, size in readv_ranges] |
|
|
3665.3.5
by John Arbash Meinel
Move the point at which we 'buffer_all' if we've read >50% of the index. |
1183 |
readv_data = self._transport.readv(self._name, readv_ranges, True, |
|
5074.4.3
by John Arbash Meinel
Actually implement offset support for GraphIndex. |
1184 |
self._size + self._base_offset) |
|
3665.3.5
by John Arbash Meinel
Move the point at which we 'buffer_all' if we've read >50% of the index. |
1185 |
# parse
|
1186 |
for offset, data in readv_data: |
|
|
5074.4.3
by John Arbash Meinel
Actually implement offset support for GraphIndex. |
1187 |
offset -= base_offset |
|
3665.3.5
by John Arbash Meinel
Move the point at which we 'buffer_all' if we've read >50% of the index. |
1188 |
self._bytes_read += len(data) |
|
5074.4.3
by John Arbash Meinel
Actually implement offset support for GraphIndex. |
1189 |
if offset < 0: |
1190 |
# transport.readv() expanded to extra data which isn't part of
|
|
1191 |
# this index
|
|
1192 |
data = data[-offset:] |
|
1193 |
offset = 0 |
|
|
3665.3.5
by John Arbash Meinel
Move the point at which we 'buffer_all' if we've read >50% of the index. |
1194 |
if offset == 0 and len(data) == self._size: |
1195 |
# We read the whole range, most likely because the
|
|
1196 |
# Transport upcast our readv ranges into one long request
|
|
1197 |
# for enough total data to grab the whole index.
|
|
|
6621.22.2
by Martin
Use BytesIO or StringIO from bzrlib.sixish |
1198 |
self._buffer_all(BytesIO(data)) |
|
3665.3.5
by John Arbash Meinel
Move the point at which we 'buffer_all' if we've read >50% of the index. |
1199 |
return
|
1200 |
if self._bisect_nodes is None: |
|
1201 |
# this must be the start
|
|
1202 |
if not (offset == 0): |
|
1203 |
raise AssertionError() |
|
1204 |
offset, data = self._parse_header_from_bytes(data) |
|
1205 |
# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))
|
|
1206 |
self._parse_region(offset, data) |
|
|
2890.2.6
by Robert Collins
Add support for key references to the index lookup_keys_via_location bisection interface. |
1207 |
|
|
2592.1.8
by Robert Collins
Empty files should validate ok. |
1208 |
def _signature(self): |
1209 |
"""The file signature for this index type.""" |
|
1210 |
return _SIGNATURE |
|
1211 |
||
|
2592.1.7
by Robert Collins
A validate that goes boom. |
1212 |
def validate(self): |
1213 |
"""Validate that everything in the index can be accessed.""" |
|
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
1214 |
# iter_all validates completely at the moment, so just do that.
|
1215 |
for node in self.iter_all_entries(): |
|
1216 |
pass
|
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1217 |
|
1218 |
||
1219 |
class CombinedGraphIndex(object): |
|
1220 |
"""A GraphIndex made up from smaller GraphIndices. |
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1221 |
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1222 |
The backing indices must implement GraphIndex, and are presumed to be
|
1223 |
static data.
|
|
|
2592.1.45
by Robert Collins
Tweak documentation as per Aaron's review. |
1224 |
|
1225 |
Queries against the combined index will be made against the first index,
|
|
|
5086.7.4
by Andrew Bennetts
Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. |
1226 |
and then the second and so on. The order of indices can thus influence
|
|
2592.1.45
by Robert Collins
Tweak documentation as per Aaron's review. |
1227 |
performance significantly. For example, if one index is on local disk and a
|
1228 |
second on a remote server, the local disk index should be before the other
|
|
1229 |
in the index list.
|
|
|
5086.7.4
by Andrew Bennetts
Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. |
1230 |
|
1231 |
Also, queries tend to need results from the same indices as previous
|
|
1232 |
queries. So the indices will be reordered after every query to put the
|
|
1233 |
indices that had the result(s) of that query first (while otherwise
|
|
1234 |
preserving the relative ordering).
|
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1235 |
"""
|
1236 |
||
|
5086.7.4
by Andrew Bennetts
Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. |
1237 |
def __init__(self, indices, reload_func=None): |
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1238 |
"""Create a CombinedGraphIndex backed by indices. |
1239 |
||
|
2592.1.45
by Robert Collins
Tweak documentation as per Aaron's review. |
1240 |
:param indices: An ordered list of indices to query for data.
|
|
3789.1.3
by John Arbash Meinel
CombinedGraphIndex can now reload when calling key_count(). |
1241 |
:param reload_func: A function to call if we find we are missing an
|
1242 |
index. Should have the form reload_func() => True/False to indicate
|
|
1243 |
if reloading actually changed anything.
|
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1244 |
"""
|
1245 |
self._indices = indices |
|
|
3789.1.3
by John Arbash Meinel
CombinedGraphIndex can now reload when calling key_count(). |
1246 |
self._reload_func = reload_func |
|
5086.7.3
by Andrew Bennetts
Improve docstrings and refactor slightly for clarity. |
1247 |
# Sibling indices are other CombinedGraphIndex that we should call
|
1248 |
# _move_to_front_by_name on when we auto-reorder ourself.
|
|
|
5086.7.2
by Andrew Bennetts
Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. |
1249 |
self._sibling_indices = [] |
|
5086.7.3
by Andrew Bennetts
Improve docstrings and refactor slightly for clarity. |
1250 |
# A list of names that corresponds to the instances in self._indices,
|
1251 |
# so _index_names[0] is always the name for _indices[0], etc. Sibling
|
|
1252 |
# indices must all use the same set of names as each other.
|
|
|
5086.7.4
by Andrew Bennetts
Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. |
1253 |
self._index_names = [None] * len(self._indices) |
|
2592.1.37
by Robert Collins
Add CombinedGraphIndex.insert_index. |
1254 |
|
|
2592.5.4
by Martin Pool
Add CombinedGraphIndex repr |
1255 |
def __repr__(self): |
1256 |
return "%s(%s)" % ( |
|
1257 |
self.__class__.__name__, |
|
1258 |
', '.join(map(repr, self._indices))) |
|
1259 |
||
|
4744.2.6
by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member. |
1260 |
def clear_cache(self): |
1261 |
"""See GraphIndex.clear_cache()""" |
|
1262 |
for index in self._indices: |
|
1263 |
index.clear_cache() |
|
1264 |
||
|
3099.3.1
by John Arbash Meinel
Implement get_parent_map for ParentProviders |
1265 |
def get_parent_map(self, keys): |
|
4379.3.3
by Gary van der Merwe
Rename and add doc string for StackedParentsProvider. |
1266 |
"""See graph.StackedParentsProvider.get_parent_map""" |
|
3099.3.1
by John Arbash Meinel
Implement get_parent_map for ParentProviders |
1267 |
search_keys = set(keys) |
|
5753.2.2
by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports. |
1268 |
if _mod_revision.NULL_REVISION in search_keys: |
1269 |
search_keys.discard(_mod_revision.NULL_REVISION) |
|
1270 |
found_parents = {_mod_revision.NULL_REVISION:[]} |
|
|
3099.3.1
by John Arbash Meinel
Implement get_parent_map for ParentProviders |
1271 |
else: |
1272 |
found_parents = {} |
|
|
2979.2.2
by Robert Collins
Per-file graph heads detection during commit for pack repositories. |
1273 |
for index, key, value, refs in self.iter_entries(search_keys): |
1274 |
parents = refs[0] |
|
1275 |
if not parents: |
|
|
5753.2.2
by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports. |
1276 |
parents = (_mod_revision.NULL_REVISION,) |
|
2979.2.2
by Robert Collins
Per-file graph heads detection during commit for pack repositories. |
1277 |
found_parents[key] = parents |
|
3099.3.1
by John Arbash Meinel
Implement get_parent_map for ParentProviders |
1278 |
return found_parents |
|
2979.2.2
by Robert Collins
Per-file graph heads detection during commit for pack repositories. |
1279 |
|
|
6619.3.8
by Jelmer Vernooij
Cope with has_key -> contains rename. |
1280 |
__contains__ = _has_key_from_parent_map |
|
3830.3.9
by Martin Pool
Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests |
1281 |
|
|
5086.7.2
by Andrew Bennetts
Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. |
1282 |
def insert_index(self, pos, index, name=None): |
|
2592.1.37
by Robert Collins
Add CombinedGraphIndex.insert_index. |
1283 |
"""Insert a new index in the list of indices to query. |
1284 |
||
1285 |
:param pos: The position to insert the index.
|
|
1286 |
:param index: The index to insert.
|
|
|
5086.7.3
by Andrew Bennetts
Improve docstrings and refactor slightly for clarity. |
1287 |
:param name: a name for this index, e.g. a pack name. These names can
|
|
5086.7.4
by Andrew Bennetts
Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. |
1288 |
be used to reflect index reorderings to related CombinedGraphIndex
|
|
5086.7.6
by Andrew Bennetts
Add public set_sibling_indices API so that AggregateIndex doesn't have to poke at _sibling_indices. |
1289 |
instances that use the same names. (see set_sibling_indices)
|
|
2592.1.37
by Robert Collins
Add CombinedGraphIndex.insert_index. |
1290 |
"""
|
1291 |
self._indices.insert(pos, index) |
|
|
5086.7.2
by Andrew Bennetts
Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. |
1292 |
self._index_names.insert(pos, name) |
|
2592.1.37
by Robert Collins
Add CombinedGraphIndex.insert_index. |
1293 |
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1294 |
def iter_all_entries(self): |
1295 |
"""Iterate over all keys within the index |
|
1296 |
||
|
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
1297 |
Duplicate keys across child indices are presumed to have the same
|
1298 |
value and are only reported once.
|
|
1299 |
||
|
2592.5.1
by Martin Pool
Fix docstrings for Index.iter_entries etc |
1300 |
:return: An iterable of (index, key, reference_lists, value).
|
1301 |
There is no defined order for the result iteration - it will be in
|
|
1302 |
the most efficient order for the index.
|
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1303 |
"""
|
1304 |
seen_keys = set() |
|
|
3789.1.5
by John Arbash Meinel
CombinedGraphIndex.iter_all_entries() can now reload when needed. |
1305 |
while True: |
1306 |
try: |
|
1307 |
for index in self._indices: |
|
1308 |
for node in index.iter_all_entries(): |
|
1309 |
if node[1] not in seen_keys: |
|
1310 |
yield node |
|
1311 |
seen_keys.add(node[1]) |
|
1312 |
return
|
|
|
6621.16.1
by Martin
Make _reload_or_raise into _try_reload and have callers reraise |
1313 |
except errors.NoSuchFile as e: |
1314 |
if not self._try_reload(e): |
|
1315 |
raise
|
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1316 |
|
1317 |
def iter_entries(self, keys): |
|
1318 |
"""Iterate over keys within the index. |
|
1319 |
||
|
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
1320 |
Duplicate keys across child indices are presumed to have the same
|
1321 |
value and are only reported once.
|
|
1322 |
||
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1323 |
:param keys: An iterable providing the keys to be retrieved.
|
|
5086.7.3
by Andrew Bennetts
Improve docstrings and refactor slightly for clarity. |
1324 |
:return: An iterable of (index, key, reference_lists, value). There is
|
1325 |
no defined order for the result iteration - it will be in the most
|
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1326 |
efficient order for the index.
|
1327 |
"""
|
|
1328 |
keys = set(keys) |
|
|
5086.7.1
by Andrew Bennetts
Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. |
1329 |
hit_indices = [] |
|
3789.1.4
by John Arbash Meinel
CombinedGraphIndex.iter_entries() is now able to reload on request. |
1330 |
while True: |
1331 |
try: |
|
1332 |
for index in self._indices: |
|
1333 |
if not keys: |
|
|
5086.7.1
by Andrew Bennetts
Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. |
1334 |
break
|
1335 |
index_hit = False |
|
|
3789.1.4
by John Arbash Meinel
CombinedGraphIndex.iter_entries() is now able to reload on request. |
1336 |
for node in index.iter_entries(keys): |
1337 |
keys.remove(node[1]) |
|
1338 |
yield node |
|
|
5086.7.1
by Andrew Bennetts
Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. |
1339 |
index_hit = True |
1340 |
if index_hit: |
|
1341 |
hit_indices.append(index) |
|
1342 |
break
|
|
|
6621.16.1
by Martin
Make _reload_or_raise into _try_reload and have callers reraise |
1343 |
except errors.NoSuchFile as e: |
1344 |
if not self._try_reload(e): |
|
1345 |
raise
|
|
|
5086.7.1
by Andrew Bennetts
Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. |
1346 |
self._move_to_front(hit_indices) |
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1347 |
|
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
1348 |
def iter_entries_prefix(self, keys): |
1349 |
"""Iterate over keys within the index using prefix matching. |
|
1350 |
||
1351 |
Duplicate keys across child indices are presumed to have the same
|
|
1352 |
value and are only reported once.
|
|
1353 |
||
1354 |
Prefix matching is applied within the tuple of a key, not to within
|
|
1355 |
the bytestring of each key element. e.g. if you have the keys ('foo',
|
|
1356 |
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
|
|
1357 |
only the former key is returned.
|
|
1358 |
||
1359 |
:param keys: An iterable providing the key prefixes to be retrieved.
|
|
1360 |
Each key prefix takes the form of a tuple the length of a key, but
|
|
1361 |
with the last N elements 'None' rather than a regular bytestring.
|
|
1362 |
The first element cannot be 'None'.
|
|
1363 |
:return: An iterable as per iter_all_entries, but restricted to the
|
|
1364 |
keys with a matching prefix to those supplied. No additional keys
|
|
1365 |
will be returned, and every match that is in the index will be
|
|
1366 |
returned.
|
|
1367 |
"""
|
|
1368 |
keys = set(keys) |
|
1369 |
if not keys: |
|
1370 |
return
|
|
1371 |
seen_keys = set() |
|
|
5086.7.1
by Andrew Bennetts
Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. |
1372 |
hit_indices = [] |
|
3789.1.6
by John Arbash Meinel
CombinedGraphIndex.iter_entries_prefix can now reload when needed. |
1373 |
while True: |
1374 |
try: |
|
1375 |
for index in self._indices: |
|
|
5086.7.1
by Andrew Bennetts
Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. |
1376 |
index_hit = False |
|
3789.1.6
by John Arbash Meinel
CombinedGraphIndex.iter_entries_prefix can now reload when needed. |
1377 |
for node in index.iter_entries_prefix(keys): |
1378 |
if node[1] in seen_keys: |
|
1379 |
continue
|
|
1380 |
seen_keys.add(node[1]) |
|
1381 |
yield node |
|
|
5086.7.1
by Andrew Bennetts
Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. |
1382 |
index_hit = True |
1383 |
if index_hit: |
|
1384 |
hit_indices.append(index) |
|
1385 |
break
|
|
|
6621.16.1
by Martin
Make _reload_or_raise into _try_reload and have callers reraise |
1386 |
except errors.NoSuchFile as e: |
1387 |
if not self._try_reload(e): |
|
1388 |
raise
|
|
|
5086.7.1
by Andrew Bennetts
Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. |
1389 |
self._move_to_front(hit_indices) |
1390 |
||
|
5086.7.3
by Andrew Bennetts
Improve docstrings and refactor slightly for clarity. |
1391 |
def _move_to_front(self, hit_indices): |
|
5086.7.1
by Andrew Bennetts
Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. |
1392 |
"""Rearrange self._indices so that hit_indices are first. |
1393 |
||
1394 |
Order is maintained as much as possible, e.g. the first unhit index
|
|
1395 |
will be the first index in _indices after the hit_indices, and the
|
|
1396 |
hit_indices will be present in exactly the order they are passed to
|
|
1397 |
_move_to_front.
|
|
|
5086.7.3
by Andrew Bennetts
Improve docstrings and refactor slightly for clarity. |
1398 |
|
1399 |
_move_to_front propagates to all objects in self._sibling_indices by
|
|
1400 |
calling _move_to_front_by_name.
|
|
|
5086.7.1
by Andrew Bennetts
Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. |
1401 |
"""
|
|
5151.2.1
by John Arbash Meinel
Avoid reordering when unnecessary. Fixes bug #562429 |
1402 |
if self._indices[:len(hit_indices)] == hit_indices: |
1403 |
# The 'hit_indices' are already at the front (and in the same
|
|
1404 |
# order), no need to re-order
|
|
1405 |
return
|
|
|
5086.7.3
by Andrew Bennetts
Improve docstrings and refactor slightly for clarity. |
1406 |
hit_names = self._move_to_front_by_index(hit_indices) |
1407 |
for sibling_idx in self._sibling_indices: |
|
1408 |
sibling_idx._move_to_front_by_name(hit_names) |
|
1409 |
||
1410 |
def _move_to_front_by_index(self, hit_indices): |
|
1411 |
"""Core logic for _move_to_front. |
|
1412 |
|
|
1413 |
Returns a list of names corresponding to the hit_indices param.
|
|
1414 |
"""
|
|
|
5151.2.3
by John Arbash Meinel
Restore the indices_info variable. |
1415 |
indices_info = zip(self._index_names, self._indices) |
|
5086.7.4
by Andrew Bennetts
Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. |
1416 |
if 'index' in debug.debug_flags: |
|
6631.2.1
by Martin
Run 2to3 zip fixer and refactor |
1417 |
indices_info = list(indices_info) |
|
5753.2.2
by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports. |
1418 |
trace.mutter('CombinedGraphIndex reordering: currently %r, ' |
1419 |
'promoting %r', indices_info, hit_indices) |
|
|
5086.7.2
by Andrew Bennetts
Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. |
1420 |
hit_names = [] |
|
5151.2.2
by John Arbash Meinel
Avoid packing and unpacking the indices, and shortcut once you've found all |
1421 |
unhit_names = [] |
1422 |
new_hit_indices = [] |
|
1423 |
unhit_indices = [] |
|
1424 |
||
|
5151.2.3
by John Arbash Meinel
Restore the indices_info variable. |
1425 |
for offset, (name, idx) in enumerate(indices_info): |
|
5086.7.2
by Andrew Bennetts
Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. |
1426 |
if idx in hit_indices: |
|
5151.2.4
by John Arbash Meinel
Minor tweak |
1427 |
hit_names.append(name) |
|
5151.2.2
by John Arbash Meinel
Avoid packing and unpacking the indices, and shortcut once you've found all |
1428 |
new_hit_indices.append(idx) |
1429 |
if len(new_hit_indices) == len(hit_indices): |
|
1430 |
# We've found all of the hit entries, everything else is
|
|
1431 |
# unhit
|
|
1432 |
unhit_names.extend(self._index_names[offset+1:]) |
|
1433 |
unhit_indices.extend(self._indices[offset+1:]) |
|
1434 |
break
|
|
|
5086.7.2
by Andrew Bennetts
Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. |
1435 |
else: |
|
5151.2.2
by John Arbash Meinel
Avoid packing and unpacking the indices, and shortcut once you've found all |
1436 |
unhit_names.append(name) |
1437 |
unhit_indices.append(idx) |
|
1438 |
||
1439 |
self._indices = new_hit_indices + unhit_indices |
|
1440 |
self._index_names = hit_names + unhit_names |
|
|
5086.7.4
by Andrew Bennetts
Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. |
1441 |
if 'index' in debug.debug_flags: |
|
5753.2.2
by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports. |
1442 |
trace.mutter('CombinedGraphIndex reordered: %r', self._indices) |
|
5086.7.3
by Andrew Bennetts
Improve docstrings and refactor slightly for clarity. |
1443 |
return hit_names |
|
5086.7.2
by Andrew Bennetts
Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. |
1444 |
|
1445 |
def _move_to_front_by_name(self, hit_names): |
|
|
5086.7.3
by Andrew Bennetts
Improve docstrings and refactor slightly for clarity. |
1446 |
"""Moves indices named by 'hit_names' to front of the search order, as |
1447 |
described in _move_to_front.
|
|
1448 |
"""
|
|
1449 |
# Translate names to index instances, and then call
|
|
1450 |
# _move_to_front_by_index.
|
|
|
5086.7.2
by Andrew Bennetts
Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. |
1451 |
indices_info = zip(self._index_names, self._indices) |
1452 |
hit_indices = [] |
|
1453 |
for name, idx in indices_info: |
|
1454 |
if name in hit_names: |
|
1455 |
hit_indices.append(idx) |
|
|
5086.7.3
by Andrew Bennetts
Improve docstrings and refactor slightly for clarity. |
1456 |
self._move_to_front_by_index(hit_indices) |
|
2624.2.9
by Robert Collins
Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. |
1457 |
|
|
4593.4.12
by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry() |
1458 |
def find_ancestry(self, keys, ref_list_num): |
|
4593.4.8
by John Arbash Meinel
Implement CombinedGraphIndex.get_ancestry() |
1459 |
"""Find the complete ancestry for the given set of keys. |
1460 |
||
|
4593.4.12
by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry() |
1461 |
Note that this is a whole-ancestry request, so it should be used
|
1462 |
sparingly.
|
|
1463 |
||
|
4593.4.8
by John Arbash Meinel
Implement CombinedGraphIndex.get_ancestry() |
1464 |
:param keys: An iterable of keys to look for
|
|
4593.4.12
by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry() |
1465 |
:param ref_list_num: The reference list which references the parents
|
1466 |
we care about.
|
|
|
4593.4.8
by John Arbash Meinel
Implement CombinedGraphIndex.get_ancestry() |
1467 |
:return: (parent_map, missing_keys)
|
1468 |
"""
|
|
|
5086.7.3
by Andrew Bennetts
Improve docstrings and refactor slightly for clarity. |
1469 |
# XXX: make this call _move_to_front?
|
|
4593.4.8
by John Arbash Meinel
Implement CombinedGraphIndex.get_ancestry() |
1470 |
missing_keys = set() |
1471 |
parent_map = {} |
|
1472 |
keys_to_lookup = set(keys) |
|
|
4593.4.9
by John Arbash Meinel
Add some debugging statements for now. |
1473 |
generation = 0 |
|
4593.4.8
by John Arbash Meinel
Implement CombinedGraphIndex.get_ancestry() |
1474 |
while keys_to_lookup: |
1475 |
# keys that *all* indexes claim are missing, stop searching them
|
|
|
4593.4.9
by John Arbash Meinel
Add some debugging statements for now. |
1476 |
generation += 1 |
|
4593.4.8
by John Arbash Meinel
Implement CombinedGraphIndex.get_ancestry() |
1477 |
all_index_missing = None |
|
4593.4.9
by John Arbash Meinel
Add some debugging statements for now. |
1478 |
# print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss'
|
1479 |
# print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup),
|
|
1480 |
# len(parent_map),
|
|
1481 |
# len(missing_keys))
|
|
1482 |
for index_idx, index in enumerate(self._indices): |
|
1483 |
# TODO: we should probably be doing something with
|
|
1484 |
# 'missing_keys' since we've already determined that
|
|
1485 |
# those revisions have not been found anywhere
|
|
|
4593.4.8
by John Arbash Meinel
Implement CombinedGraphIndex.get_ancestry() |
1486 |
index_missing_keys = set() |
1487 |
# Find all of the ancestry we can from this index
|
|
1488 |
# keep looking until the search_keys set is empty, which means
|
|
1489 |
# things we didn't find should be in index_missing_keys
|
|
1490 |
search_keys = keys_to_lookup |
|
|
4593.4.9
by John Arbash Meinel
Add some debugging statements for now. |
1491 |
sub_generation = 0 |
1492 |
# print ' \t%2d\t\t%4d\t%5d\t%5d' % (
|
|
1493 |
# index_idx, len(search_keys),
|
|
1494 |
# len(parent_map), len(index_missing_keys))
|
|
|
4593.4.8
by John Arbash Meinel
Implement CombinedGraphIndex.get_ancestry() |
1495 |
while search_keys: |
|
4593.4.9
by John Arbash Meinel
Add some debugging statements for now. |
1496 |
sub_generation += 1 |
1497 |
# TODO: ref_list_num should really be a parameter, since
|
|
1498 |
# CombinedGraphIndex does not know what the ref lists
|
|
1499 |
# mean.
|
|
|
4593.4.12
by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry() |
1500 |
search_keys = index._find_ancestors(search_keys, |
1501 |
ref_list_num, parent_map, index_missing_keys) |
|
|
4593.4.9
by John Arbash Meinel
Add some debugging statements for now. |
1502 |
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
|
1503 |
# sub_generation, len(search_keys),
|
|
1504 |
# len(parent_map), len(index_missing_keys))
|
|
|
4593.4.8
by John Arbash Meinel
Implement CombinedGraphIndex.get_ancestry() |
1505 |
# Now set whatever was missing to be searched in the next index
|
1506 |
keys_to_lookup = index_missing_keys |
|
1507 |
if all_index_missing is None: |
|
1508 |
all_index_missing = set(index_missing_keys) |
|
1509 |
else: |
|
1510 |
all_index_missing.intersection_update(index_missing_keys) |
|
1511 |
if not keys_to_lookup: |
|
1512 |
break
|
|
1513 |
if all_index_missing is None: |
|
1514 |
# There were no indexes, so all search keys are 'missing'
|
|
1515 |
missing_keys.update(keys_to_lookup) |
|
1516 |
keys_to_lookup = None |
|
1517 |
else: |
|
1518 |
missing_keys.update(all_index_missing) |
|
1519 |
keys_to_lookup.difference_update(all_index_missing) |
|
1520 |
return parent_map, missing_keys |
|
1521 |
||
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
1522 |
def key_count(self): |
1523 |
"""Return an estimate of the number of keys in this index. |
|
|
3789.1.3
by John Arbash Meinel
CombinedGraphIndex can now reload when calling key_count(). |
1524 |
|
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
1525 |
For CombinedGraphIndex this is approximated by the sum of the keys of
|
1526 |
the child indices. As child indices may have duplicate keys this can
|
|
1527 |
have a maximum error of the number of child indices * largest number of
|
|
1528 |
keys in any index.
|
|
1529 |
"""
|
|
|
3789.1.4
by John Arbash Meinel
CombinedGraphIndex.iter_entries() is now able to reload on request. |
1530 |
while True: |
|
3789.1.3
by John Arbash Meinel
CombinedGraphIndex can now reload when calling key_count(). |
1531 |
try: |
1532 |
return sum((index.key_count() for index in self._indices), 0) |
|
|
6621.16.1
by Martin
Make _reload_or_raise into _try_reload and have callers reraise |
1533 |
except errors.NoSuchFile as e: |
1534 |
if not self._try_reload(e): |
|
1535 |
raise
|
|
|
3789.1.4
by John Arbash Meinel
CombinedGraphIndex.iter_entries() is now able to reload on request. |
1536 |
|
|
3830.3.12
by Martin Pool
Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks |
1537 |
missing_keys = _missing_keys_from_parent_map |
1538 |
||
|
6621.16.1
by Martin
Make _reload_or_raise into _try_reload and have callers reraise |
1539 |
def _try_reload(self, error): |
|
3789.1.4
by John Arbash Meinel
CombinedGraphIndex.iter_entries() is now able to reload on request. |
1540 |
"""We just got a NoSuchFile exception. |
1541 |
||
1542 |
Try to reload the indices, if it fails, just raise the current
|
|
1543 |
exception.
|
|
1544 |
"""
|
|
1545 |
if self._reload_func is None: |
|
|
6621.16.1
by Martin
Make _reload_or_raise into _try_reload and have callers reraise |
1546 |
return False |
1547 |
trace.mutter('Trying to reload after getting exception: %s', error) |
|
|
3789.1.4
by John Arbash Meinel
CombinedGraphIndex.iter_entries() is now able to reload on request. |
1548 |
if not self._reload_func(): |
1549 |
# We tried to reload, but nothing changed, so we fail anyway
|
|
|
3789.1.10
by John Arbash Meinel
Review comments from Martin. |
1550 |
trace.mutter('_reload_func indicated nothing has changed.' |
1551 |
' Raising original exception.') |
|
|
6621.16.1
by Martin
Make _reload_or_raise into _try_reload and have callers reraise |
1552 |
return False |
1553 |
return True |
|
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
1554 |
|
|
5086.7.6
by Andrew Bennetts
Add public set_sibling_indices API so that AggregateIndex doesn't have to poke at _sibling_indices. |
1555 |
def set_sibling_indices(self, sibling_combined_graph_indices): |
1556 |
"""Set the CombinedGraphIndex objects to reorder after reordering self. |
|
1557 |
"""
|
|
1558 |
self._sibling_indices = sibling_combined_graph_indices |
|
1559 |
||
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
1560 |
def validate(self): |
1561 |
"""Validate that everything in the index can be accessed.""" |
|
|
3789.1.7
by John Arbash Meinel
CombinedGraphIndex.validate() will now reload. |
1562 |
while True: |
1563 |
try: |
|
1564 |
for index in self._indices: |
|
1565 |
index.validate() |
|
1566 |
return
|
|
|
6621.16.1
by Martin
Make _reload_or_raise into _try_reload and have callers reraise |
1567 |
except errors.NoSuchFile as e: |
1568 |
if not self._try_reload(e): |
|
1569 |
raise
|
|
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
1570 |
|
1571 |
||
1572 |
class InMemoryGraphIndex(GraphIndexBuilder): |
|
1573 |
"""A GraphIndex which operates entirely out of memory and is mutable. |
|
1574 |
||
1575 |
This is designed to allow the accumulation of GraphIndex entries during a
|
|
1576 |
single write operation, where the accumulated entries need to be immediately
|
|
1577 |
available - for example via a CombinedGraphIndex.
|
|
1578 |
"""
|
|
1579 |
||
1580 |
def add_nodes(self, nodes): |
|
1581 |
"""Add nodes to the index. |
|
1582 |
||
1583 |
:param nodes: An iterable of (key, node_refs, value) entries to add.
|
|
1584 |
"""
|
|
|
2592.3.39
by Robert Collins
Fugly version to remove signatures.kndx |
1585 |
if self.reference_lists: |
1586 |
for (key, value, node_refs) in nodes: |
|
1587 |
self.add_node(key, value, node_refs) |
|
1588 |
else: |
|
1589 |
for (key, value) in nodes: |
|
1590 |
self.add_node(key, value) |
|
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
1591 |
|
1592 |
def iter_all_entries(self): |
|
1593 |
"""Iterate over all keys within the index |
|
1594 |
||
|
2592.5.1
by Martin Pool
Fix docstrings for Index.iter_entries etc |
1595 |
:return: An iterable of (index, key, reference_lists, value). There is no
|
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
1596 |
defined order for the result iteration - it will be in the most
|
1597 |
efficient order for the index (in this case dictionary hash order).
|
|
1598 |
"""
|
|
|
2745.1.1
by Robert Collins
Add a number of -Devil checkpoints. |
1599 |
if 'evil' in debug.debug_flags: |
|
2592.3.112
by Robert Collins
Various fixups found dogfooding. |
1600 |
trace.mutter_callsite(3, |
|
2745.1.2
by Robert Collins
Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. |
1601 |
"iter_all_entries scales with size of history.") |
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
1602 |
if self.reference_lists: |
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
1603 |
for key, (absent, references, value) in viewitems(self._nodes): |
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
1604 |
if not absent: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1605 |
yield self, key, value, references |
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
1606 |
else: |
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
1607 |
for key, (absent, references, value) in viewitems(self._nodes): |
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
1608 |
if not absent: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1609 |
yield self, key, value |
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
1610 |
|
1611 |
def iter_entries(self, keys): |
|
1612 |
"""Iterate over keys within the index. |
|
1613 |
||
1614 |
:param keys: An iterable providing the keys to be retrieved.
|
|
|
2979.2.4
by Robert Collins
Docstring fixes from review. |
1615 |
:return: An iterable of (index, key, value, reference_lists). There is no
|
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
1616 |
defined order for the result iteration - it will be in the most
|
1617 |
efficient order for the index (keys iteration order in this case).
|
|
1618 |
"""
|
|
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
1619 |
# Note: See BTreeBuilder.iter_entries for an explanation of why we
|
1620 |
# aren't using set().intersection() here
|
|
1621 |
nodes = self._nodes |
|
1622 |
keys = [key for key in keys if key in nodes] |
|
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
1623 |
if self.reference_lists: |
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
1624 |
for key in keys: |
1625 |
node = nodes[key] |
|
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
1626 |
if not node[0]: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1627 |
yield self, key, node[2], node[1] |
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
1628 |
else: |
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
1629 |
for key in keys: |
1630 |
node = nodes[key] |
|
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
1631 |
if not node[0]: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1632 |
yield self, key, node[2] |
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
1633 |
|
|
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
1634 |
def iter_entries_prefix(self, keys): |
1635 |
"""Iterate over keys within the index using prefix matching. |
|
1636 |
||
1637 |
Prefix matching is applied within the tuple of a key, not to within
|
|
1638 |
the bytestring of each key element. e.g. if you have the keys ('foo',
|
|
1639 |
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
|
|
1640 |
only the former key is returned.
|
|
1641 |
||
1642 |
:param keys: An iterable providing the key prefixes to be retrieved.
|
|
1643 |
Each key prefix takes the form of a tuple the length of a key, but
|
|
1644 |
with the last N elements 'None' rather than a regular bytestring.
|
|
1645 |
The first element cannot be 'None'.
|
|
1646 |
:return: An iterable as per iter_all_entries, but restricted to the
|
|
1647 |
keys with a matching prefix to those supplied. No additional keys
|
|
1648 |
will be returned, and every match that is in the index will be
|
|
1649 |
returned.
|
|
1650 |
"""
|
|
1651 |
keys = set(keys) |
|
1652 |
if not keys: |
|
1653 |
return
|
|
1654 |
if self._key_length == 1: |
|
1655 |
for key in keys: |
|
|
6654.1.1
by Martin
Factor out some copycode in iter_entries_prefix implementations |
1656 |
_sanity_check_key(self, key) |
|
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
1657 |
node = self._nodes[key] |
1658 |
if node[0]: |
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1659 |
continue
|
|
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
1660 |
if self.reference_lists: |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1661 |
yield self, key, node[2], node[1] |
|
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
1662 |
else: |
|
2624.2.17
by Robert Collins
Review feedback. |
1663 |
yield self, key, node[2] |
|
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
1664 |
return
|
|
3644.2.4
by John Arbash Meinel
Change GraphIndex to also have a _get_nodes_by_key |
1665 |
nodes_by_key = self._get_nodes_by_key() |
|
6654.1.1
by Martin
Factor out some copycode in iter_entries_prefix implementations |
1666 |
for entry in _iter_entries_prefix(self, nodes_by_key, keys): |
1667 |
yield entry |
|
|
2624.2.10
by Robert Collins
Also add iter_key_prefix support to InMemoryGraphIndex. |
1668 |
|
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
1669 |
def key_count(self): |
1670 |
"""Return an estimate of the number of keys in this index. |
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1671 |
|
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
1672 |
For InMemoryGraphIndex the estimate is exact.
|
1673 |
"""
|
|
|
4789.28.2
by John Arbash Meinel
Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. |
1674 |
return len(self._nodes) - len(self._absent_keys) |
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
1675 |
|
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
1676 |
def validate(self): |
1677 |
"""In memory index's have no known corruption at the moment.""" |
|
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1678 |
|
1679 |
||
1680 |
class GraphIndexPrefixAdapter(object): |
|
1681 |
"""An adapter between GraphIndex with different key lengths. |
|
1682 |
||
1683 |
Queries against this will emit queries against the adapted Graph with the
|
|
1684 |
prefix added, queries for all items use iter_entries_prefix. The returned
|
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1685 |
nodes will have their keys and node references adjusted to remove the
|
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1686 |
prefix. Finally, an add_nodes_callback can be supplied - when called the
|
1687 |
nodes and references being added will have prefix prepended.
|
|
1688 |
"""
|
|
1689 |
||
|
2624.2.17
by Robert Collins
Review feedback. |
1690 |
def __init__(self, adapted, prefix, missing_key_length, |
1691 |
add_nodes_callback=None): |
|
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1692 |
"""Construct an adapter against adapted with prefix.""" |
1693 |
self.adapted = adapted |
|
|
2624.2.19
by Robert Collins
Why we should always test before committing. |
1694 |
self.prefix_key = prefix + (None,)*missing_key_length |
|
2624.2.17
by Robert Collins
Review feedback. |
1695 |
self.prefix = prefix |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1696 |
self.prefix_len = len(prefix) |
1697 |
self.add_nodes_callback = add_nodes_callback |
|
1698 |
||
|
2624.2.13
by Robert Collins
Implement add_node/add_nodes to the GraphIndexPrefixAdapter. |
1699 |
def add_nodes(self, nodes): |
1700 |
"""Add nodes to the index. |
|
1701 |
||
1702 |
:param nodes: An iterable of (key, node_refs, value) entries to add.
|
|
1703 |
"""
|
|
1704 |
# save nodes in case its an iterator
|
|
1705 |
nodes = tuple(nodes) |
|
1706 |
translated_nodes = [] |
|
1707 |
try: |
|
|
2624.2.17
by Robert Collins
Review feedback. |
1708 |
# Add prefix_key to each reference node_refs is a tuple of tuples,
|
1709 |
# so split it apart, and add prefix_key to the internal reference
|
|
|
2624.2.13
by Robert Collins
Implement add_node/add_nodes to the GraphIndexPrefixAdapter. |
1710 |
for (key, value, node_refs) in nodes: |
1711 |
adjusted_references = ( |
|
|
2624.2.17
by Robert Collins
Review feedback. |
1712 |
tuple(tuple(self.prefix + ref_node for ref_node in ref_list) |
|
2624.2.13
by Robert Collins
Implement add_node/add_nodes to the GraphIndexPrefixAdapter. |
1713 |
for ref_list in node_refs)) |
|
2624.2.17
by Robert Collins
Review feedback. |
1714 |
translated_nodes.append((self.prefix + key, value, |
|
2624.2.13
by Robert Collins
Implement add_node/add_nodes to the GraphIndexPrefixAdapter. |
1715 |
adjusted_references)) |
1716 |
except ValueError: |
|
1717 |
# XXX: TODO add an explicit interface for getting the reference list
|
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1718 |
# status, to handle this bit of user-friendliness in the API more
|
|
2624.2.13
by Robert Collins
Implement add_node/add_nodes to the GraphIndexPrefixAdapter. |
1719 |
# explicitly.
|
1720 |
for (key, value) in nodes: |
|
|
2624.2.17
by Robert Collins
Review feedback. |
1721 |
translated_nodes.append((self.prefix + key, value)) |
|
2624.2.13
by Robert Collins
Implement add_node/add_nodes to the GraphIndexPrefixAdapter. |
1722 |
self.add_nodes_callback(translated_nodes) |
1723 |
||
1724 |
def add_node(self, key, value, references=()): |
|
1725 |
"""Add a node to the index. |
|
1726 |
||
1727 |
:param key: The key. keys are non-empty tuples containing
|
|
1728 |
as many whitespace-free utf8 bytestrings as the key length
|
|
1729 |
defined for this index.
|
|
1730 |
:param references: An iterable of iterables of keys. Each is a
|
|
1731 |
reference to another key.
|
|
1732 |
:param value: The value to associate with the key. It may be any
|
|
1733 |
bytes as long as it does not contain \0 or \n.
|
|
1734 |
"""
|
|
1735 |
self.add_nodes(((key, value, references), )) |
|
1736 |
||
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1737 |
def _strip_prefix(self, an_iter): |
1738 |
"""Strip prefix data from nodes and return it.""" |
|
1739 |
for node in an_iter: |
|
1740 |
# cross checks
|
|
|
2624.2.17
by Robert Collins
Review feedback. |
1741 |
if node[1][:self.prefix_len] != self.prefix: |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1742 |
raise errors.BadIndexData(self) |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1743 |
for ref_list in node[3]: |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1744 |
for ref_node in ref_list: |
|
2624.2.17
by Robert Collins
Review feedback. |
1745 |
if ref_node[:self.prefix_len] != self.prefix: |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1746 |
raise errors.BadIndexData(self) |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1747 |
yield node[0], node[1][self.prefix_len:], node[2], ( |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1748 |
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list) |
|
2624.2.14
by Robert Collins
Add source index to the index iteration API to allow mapping back to the origin of retrieved data. |
1749 |
for ref_list in node[3])) |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1750 |
|
1751 |
def iter_all_entries(self): |
|
1752 |
"""Iterate over all keys within the index |
|
1753 |
||
1754 |
iter_all_entries is implemented against the adapted index using
|
|
1755 |
iter_entries_prefix.
|
|
1756 |
||
|
2592.5.1
by Martin Pool
Fix docstrings for Index.iter_entries etc |
1757 |
:return: An iterable of (index, key, reference_lists, value). There is no
|
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1758 |
defined order for the result iteration - it will be in the most
|
1759 |
efficient order for the index (in this case dictionary hash order).
|
|
1760 |
"""
|
|
|
2624.2.19
by Robert Collins
Why we should always test before committing. |
1761 |
return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key])) |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1762 |
|
1763 |
def iter_entries(self, keys): |
|
1764 |
"""Iterate over keys within the index. |
|
1765 |
||
1766 |
:param keys: An iterable providing the keys to be retrieved.
|
|
|
2979.2.4
by Robert Collins
Docstring fixes from review. |
1767 |
:return: An iterable of (index, key, value, reference_lists). There is no
|
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1768 |
defined order for the result iteration - it will be in the most
|
1769 |
efficient order for the index (keys iteration order in this case).
|
|
1770 |
"""
|
|
1771 |
return self._strip_prefix(self.adapted.iter_entries( |
|
|
2624.2.17
by Robert Collins
Review feedback. |
1772 |
self.prefix + key for key in keys)) |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1773 |
|
1774 |
def iter_entries_prefix(self, keys): |
|
1775 |
"""Iterate over keys within the index using prefix matching. |
|
1776 |
||
1777 |
Prefix matching is applied within the tuple of a key, not to within
|
|
1778 |
the bytestring of each key element. e.g. if you have the keys ('foo',
|
|
1779 |
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
|
|
1780 |
only the former key is returned.
|
|
1781 |
||
1782 |
:param keys: An iterable providing the key prefixes to be retrieved.
|
|
1783 |
Each key prefix takes the form of a tuple the length of a key, but
|
|
1784 |
with the last N elements 'None' rather than a regular bytestring.
|
|
1785 |
The first element cannot be 'None'.
|
|
1786 |
:return: An iterable as per iter_all_entries, but restricted to the
|
|
1787 |
keys with a matching prefix to those supplied. No additional keys
|
|
1788 |
will be returned, and every match that is in the index will be
|
|
1789 |
returned.
|
|
1790 |
"""
|
|
1791 |
return self._strip_prefix(self.adapted.iter_entries_prefix( |
|
|
2624.2.17
by Robert Collins
Review feedback. |
1792 |
self.prefix + key for key in keys)) |
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1793 |
|
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
1794 |
def key_count(self): |
1795 |
"""Return an estimate of the number of keys in this index. |
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
1796 |
|
|
2624.2.16
by Robert Collins
Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. |
1797 |
For GraphIndexPrefixAdapter this is relatively expensive - key
|
1798 |
iteration with the prefix is done.
|
|
1799 |
"""
|
|
1800 |
return len(list(self.iter_all_entries())) |
|
1801 |
||
|
2624.2.12
by Robert Collins
Create an adapter between indices with differing key lengths. |
1802 |
def validate(self): |
1803 |
"""Call the adapted's validate.""" |
|
1804 |
self.adapted.validate() |
|
|
6654.1.1
by Martin
Factor out some copycode in iter_entries_prefix implementations |
1805 |
|
1806 |
||
1807 |
def _sanity_check_key(index_or_builder, key): |
|
1808 |
"""Raise BadIndexKey if key cannot be used for prefix matching.""" |
|
1809 |
if key[0] is None: |
|
1810 |
raise errors.BadIndexKey(key) |
|
1811 |
if len(key) != index_or_builder._key_length: |
|
1812 |
raise errors.BadIndexKey(key) |
|
1813 |
||
1814 |
||
1815 |
def _iter_entries_prefix(index_or_builder, nodes_by_key, keys): |
|
1816 |
"""Helper for implementing prefix matching iterators.""" |
|
1817 |
for key in keys: |
|
1818 |
_sanity_check_key(index_or_builder, key) |
|
1819 |
# find what it refers to:
|
|
1820 |
key_dict = nodes_by_key |
|
1821 |
elements = list(key) |
|
1822 |
# find the subdict whose contents should be returned.
|
|
1823 |
try: |
|
1824 |
while len(elements) and elements[0] is not None: |
|
1825 |
key_dict = key_dict[elements[0]] |
|
1826 |
elements.pop(0) |
|
1827 |
except KeyError: |
|
1828 |
# a non-existant lookup.
|
|
1829 |
continue
|
|
1830 |
if len(elements): |
|
1831 |
dicts = [key_dict] |
|
1832 |
while dicts: |
|
1833 |
values_view = viewvalues(dicts.pop()) |
|
1834 |
# can't be empty or would not exist
|
|
1835 |
value = next(iter(values_view)) |
|
1836 |
if isinstance(value, dict): |
|
1837 |
# still descending, push values
|
|
1838 |
dicts.extend(values_view) |
|
1839 |
else: |
|
1840 |
# at leaf tuples, yield values
|
|
1841 |
for value in values_view: |
|
1842 |
# each value is the key:value:node refs tuple
|
|
1843 |
# ready to yield.
|
|
1844 |
yield (index_or_builder, ) + value |
|
1845 |
else: |
|
1846 |
# the last thing looked up was a terminal element
|
|
1847 |
yield (index_or_builder, ) + key_dict |