bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
4763.2.4
by John Arbash Meinel
 merge bzr.2.1 in preparation for NEWS entry.  | 
1  | 
# Copyright (C) 2007-2010 Canonical Ltd
 | 
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
2  | 
#
 | 
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
| 
4183.7.1
by Sabin Iacob
 update FSF mailing address  | 
15  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
16  | 
|
17  | 
"""Indexing facilities."""
 | 
|
18  | 
||
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
19  | 
__all__ = [  | 
20  | 
'CombinedGraphIndex',  | 
|
21  | 
'GraphIndex',  | 
|
22  | 
'GraphIndexBuilder',  | 
|
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
23  | 
'GraphIndexPrefixAdapter',  | 
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
24  | 
'InMemoryGraphIndex',  | 
25  | 
    ]
 | 
|
| 
2592.1.32
by Robert Collins
 Add __all__ to index.  | 
26  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
27  | 
from bisect import bisect_right  | 
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
28  | 
from cStringIO import StringIO  | 
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
29  | 
import re  | 
| 
3789.1.3
by John Arbash Meinel
 CombinedGraphIndex can now reload when calling key_count().  | 
30  | 
import sys  | 
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
31  | 
|
| 
2624.2.15
by Robert Collins
 Add useful -Dindex flag.  | 
32  | 
from bzrlib.lazy_import import lazy_import  | 
33  | 
lazy_import(globals(), """  | 
|
| 
2745.1.2
by Robert Collins
 Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly.  | 
34  | 
from bzrlib import trace
 | 
| 
2890.2.7
by Robert Collins
 * Pack indices are now partially parsed for specific key lookup using a  | 
35  | 
from bzrlib.bisect_multi import bisect_multi_bytes
 | 
| 
2979.2.2
by Robert Collins
 Per-file graph heads detection during commit for pack repositories.  | 
36  | 
from bzrlib.revision import NULL_REVISION
 | 
| 
2745.1.2
by Robert Collins
 Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly.  | 
37  | 
from bzrlib.trace import mutter
 | 
| 
2624.2.15
by Robert Collins
 Add useful -Dindex flag.  | 
38  | 
""")  | 
| 
3099.3.3
by John Arbash Meinel
 Deprecate get_parents() in favor of get_parent_map()  | 
39  | 
from bzrlib import (  | 
40  | 
debug,  | 
|
41  | 
errors,  | 
|
42  | 
    )
 | 
|
| 
4679.8.3
by John Arbash Meinel
 Expose bzrlib.static_tuple.StaticTuple as a thunk  | 
43  | 
from bzrlib.static_tuple import StaticTuple  | 
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
44  | 
|
| 
2979.1.1
by Robert Collins
 Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily.  | 
45  | 
_HEADER_READV = (0, 200)  | 
| 
2624.2.8
by Robert Collins
 Explicitly mark the number of keys elements in use in GraphIndex files.  | 
46  | 
_OPTION_KEY_ELEMENTS = "key_elements="  | 
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
47  | 
_OPTION_LEN = "len="  | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
48  | 
_OPTION_NODE_REFS = "node_ref_lists="  | 
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
49  | 
_SIGNATURE = "Bazaar Graph Index 1\n"  | 
50  | 
||
51  | 
||
| 
2592.1.14
by Robert Collins
 Detect bad reference key values.  | 
52  | 
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')  | 
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
53  | 
_newline_null_re = re.compile('[\n\0]')  | 
54  | 
||
55  | 
||
| 
3830.3.12
by Martin Pool
 Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks  | 
56  | 
def _has_key_from_parent_map(self, key):  | 
57  | 
"""Check if this index has one key.  | 
|
58  | 
||
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
59  | 
    If it's possible to check for multiple keys at once through
 | 
| 
3830.3.12
by Martin Pool
 Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks  | 
60  | 
    calling get_parent_map that should be faster.
 | 
61  | 
    """
 | 
|
62  | 
return (key in self.get_parent_map([key]))  | 
|
63  | 
||
| 
3830.3.20
by John Arbash Meinel
 Minor PEP8 and copyright updates.  | 
64  | 
|
| 
3830.3.12
by Martin Pool
 Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks  | 
65  | 
def _missing_keys_from_parent_map(self, keys):  | 
66  | 
return set(keys) - set(self.get_parent_map(keys))  | 
|
67  | 
||
68  | 
||
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
69  | 
class GraphIndexBuilder(object):  | 
| 
2592.1.18
by Robert Collins
 Add space to mark absent nodes.  | 
70  | 
"""A builder that can build a GraphIndex.  | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
71  | 
|
| 
2592.1.18
by Robert Collins
 Add space to mark absent nodes.  | 
72  | 
    The resulting graph has the structure:
 | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
73  | 
|
| 
2592.1.18
by Robert Collins
 Add space to mark absent nodes.  | 
74  | 
    _SIGNATURE OPTIONS NODES NEWLINE
 | 
75  | 
    _SIGNATURE     := 'Bazaar Graph Index 1' NEWLINE
 | 
|
76  | 
    OPTIONS        := 'node_ref_lists=' DIGITS NEWLINE
 | 
|
77  | 
    NODES          := NODE*
 | 
|
78  | 
    NODE           := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
 | 
|
79  | 
    KEY            := Not-whitespace-utf8
 | 
|
80  | 
    ABSENT         := 'a'
 | 
|
| 
2592.1.19
by Robert Collins
 Node references are tab separated.  | 
81  | 
    REFERENCES     := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
 | 
82  | 
    REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
 | 
|
83  | 
    REFERENCE      := DIGITS  ; digits is the byte offset in the index of the
 | 
|
84  | 
                              ; referenced key.
 | 
|
| 
2592.1.18
by Robert Collins
 Add space to mark absent nodes.  | 
85  | 
    VALUE          := no-newline-no-null-bytes
 | 
86  | 
    """
 | 
|
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
87  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
88  | 
def __init__(self, reference_lists=0, key_elements=1):  | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
89  | 
"""Create a GraphIndex builder.  | 
90  | 
||
91  | 
        :param reference_lists: The number of node references lists for each
 | 
|
92  | 
            entry.
 | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
93  | 
        :param key_elements: The number of bytestrings in each key.
 | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
94  | 
        """
 | 
95  | 
self.reference_lists = reference_lists  | 
|
| 
3644.2.1
by John Arbash Meinel
 Change the IndexBuilders to not generate the nodes_by_key unless needed.  | 
96  | 
        # A dict of {key: (absent, ref_lists, value)}
 | 
| 
2592.1.15
by Robert Collins
 Detect duplicate key insertion.  | 
97  | 
self._nodes = {}  | 
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
98  | 
        # Keys that are referenced but not actually present in this index
 | 
99  | 
self._absent_keys = set()  | 
|
| 
3644.2.1
by John Arbash Meinel
 Change the IndexBuilders to not generate the nodes_by_key unless needed.  | 
100  | 
self._nodes_by_key = None  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
101  | 
self._key_length = key_elements  | 
| 
3777.5.3
by John Arbash Meinel
 Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.  | 
102  | 
self._optimize_for_size = False  | 
| 
4168.3.6
by John Arbash Meinel
 Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().  | 
103  | 
self._combine_backing_indices = True  | 
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
104  | 
|
105  | 
def _check_key(self, key):  | 
|
106  | 
"""Raise BadIndexKey if key is not a valid key for this index."""  | 
|
| 
4679.7.1
by John Arbash Meinel
 Merge the 2.1-static-tuple-no-use branch, but restore the  | 
107  | 
if type(key) not in (tuple, StaticTuple):  | 
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
108  | 
raise errors.BadIndexKey(key)  | 
109  | 
if self._key_length != len(key):  | 
|
110  | 
raise errors.BadIndexKey(key)  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
111  | 
for element in key:  | 
112  | 
if not element or _whitespace_re.search(element) is not None:  | 
|
113  | 
raise errors.BadIndexKey(element)  | 
|
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
114  | 
|
| 
3830.3.5
by Martin Pool
 GraphIndexBuilder shouldn't know references are for compression so rename  | 
115  | 
def _external_references(self):  | 
116  | 
"""Return references that are not present in this index.  | 
|
| 
3830.3.4
by Martin Pool
 Move _external_compression_references onto the GraphIndexBuilder, and check them for inventories too  | 
117  | 
        """
 | 
118  | 
keys = set()  | 
|
119  | 
refs = set()  | 
|
| 
3830.3.19
by John Arbash Meinel
 Small update to GraphIndexBuilder._external_references  | 
120  | 
        # TODO: JAM 2008-11-21 This makes an assumption about how the reference
 | 
121  | 
        #       lists are used. It is currently correct for pack-0.92 through
 | 
|
122  | 
        #       1.9, which use the node references (3rd column) second
 | 
|
123  | 
        #       reference list as the compression parent. Perhaps this should
 | 
|
124  | 
        #       be moved into something higher up the stack, since it
 | 
|
125  | 
        #       makes assumptions about how the index is used.
 | 
|
126  | 
if self.reference_lists > 1:  | 
|
127  | 
for node in self.iter_all_entries():  | 
|
128  | 
keys.add(node[1])  | 
|
129  | 
refs.update(node[3][1])  | 
|
130  | 
return refs - keys  | 
|
131  | 
else:  | 
|
132  | 
            # If reference_lists == 0 there can be no external references, and
 | 
|
133  | 
            # if reference_lists == 1, then there isn't a place to store the
 | 
|
134  | 
            # compression parent
 | 
|
135  | 
return set()  | 
|
| 
3830.3.4
by Martin Pool
 Move _external_compression_references onto the GraphIndexBuilder, and check them for inventories too  | 
136  | 
|
| 
3644.2.4
by John Arbash Meinel
 Change GraphIndex to also have a _get_nodes_by_key  | 
137  | 
def _get_nodes_by_key(self):  | 
138  | 
if self._nodes_by_key is None:  | 
|
139  | 
nodes_by_key = {}  | 
|
140  | 
if self.reference_lists:  | 
|
141  | 
for key, (absent, references, value) in self._nodes.iteritems():  | 
|
142  | 
if absent:  | 
|
143  | 
                        continue
 | 
|
144  | 
key_dict = nodes_by_key  | 
|
145  | 
for subkey in key[:-1]:  | 
|
146  | 
key_dict = key_dict.setdefault(subkey, {})  | 
|
147  | 
key_dict[key[-1]] = key, value, references  | 
|
148  | 
else:  | 
|
149  | 
for key, (absent, references, value) in self._nodes.iteritems():  | 
|
150  | 
if absent:  | 
|
151  | 
                        continue
 | 
|
152  | 
key_dict = nodes_by_key  | 
|
153  | 
for subkey in key[:-1]:  | 
|
154  | 
key_dict = key_dict.setdefault(subkey, {})  | 
|
155  | 
key_dict[key[-1]] = key, value  | 
|
156  | 
self._nodes_by_key = nodes_by_key  | 
|
157  | 
return self._nodes_by_key  | 
|
158  | 
||
| 
3644.2.3
by John Arbash Meinel
 Do a bit more work to get all the tests to pass.  | 
159  | 
def _update_nodes_by_key(self, key, value, node_refs):  | 
160  | 
"""Update the _nodes_by_key dict with a new key.  | 
|
161  | 
||
162  | 
        For a key of (foo, bar, baz) create
 | 
|
163  | 
        _nodes_by_key[foo][bar][baz] = key_value
 | 
|
164  | 
        """
 | 
|
165  | 
if self._nodes_by_key is None:  | 
|
166  | 
            return
 | 
|
167  | 
key_dict = self._nodes_by_key  | 
|
168  | 
if self.reference_lists:  | 
|
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
169  | 
key_value = StaticTuple(key, value, node_refs)  | 
| 
3644.2.3
by John Arbash Meinel
 Do a bit more work to get all the tests to pass.  | 
170  | 
else:  | 
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
171  | 
key_value = StaticTuple(key, value)  | 
| 
3644.2.3
by John Arbash Meinel
 Do a bit more work to get all the tests to pass.  | 
172  | 
for subkey in key[:-1]:  | 
173  | 
key_dict = key_dict.setdefault(subkey, {})  | 
|
174  | 
key_dict[key[-1]] = key_value  | 
|
175  | 
||
| 
3644.2.9
by John Arbash Meinel
 Refactor some code.  | 
176  | 
def _check_key_ref_value(self, key, references, value):  | 
177  | 
"""Check that 'key' and 'references' are all valid.  | 
|
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
178  | 
|
| 
3644.2.9
by John Arbash Meinel
 Refactor some code.  | 
179  | 
        :param key: A key tuple. Must conform to the key interface (be a tuple,
 | 
180  | 
            be of the right length, not have any whitespace or nulls in any key
 | 
|
181  | 
            element.)
 | 
|
182  | 
        :param references: An iterable of reference lists. Something like
 | 
|
183  | 
            [[(ref, key)], [(ref, key), (other, key)]]
 | 
|
184  | 
        :param value: The value associate with this key. Must not contain
 | 
|
185  | 
            newlines or null characters.
 | 
|
186  | 
        :return: (node_refs, absent_references)
 | 
|
187  | 
            node_refs   basically a packed form of 'references' where all
 | 
|
188  | 
                        iterables are tuples
 | 
|
189  | 
            absent_references   reference keys that are not in self._nodes.
 | 
|
190  | 
                                This may contain duplicates if the same key is
 | 
|
191  | 
                                referenced in multiple lists.
 | 
|
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
192  | 
        """
 | 
| 
4789.28.1
by John Arbash Meinel
 Use StaticTuple as part of the builder process.  | 
193  | 
as_st = StaticTuple.from_sequence  | 
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
194  | 
self._check_key(key)  | 
| 
2592.1.12
by Robert Collins
 Handle basic node adds.  | 
195  | 
if _newline_null_re.search(value) is not None:  | 
196  | 
raise errors.BadIndexValue(value)  | 
|
| 
2592.1.13
by Robert Collins
 Handle mismatched numbers of reference lists.  | 
197  | 
if len(references) != self.reference_lists:  | 
198  | 
raise errors.BadIndexValue(references)  | 
|
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
199  | 
node_refs = []  | 
| 
3644.2.9
by John Arbash Meinel
 Refactor some code.  | 
200  | 
absent_references = []  | 
| 
2592.1.14
by Robert Collins
 Detect bad reference key values.  | 
201  | 
for reference_list in references:  | 
202  | 
for reference in reference_list:  | 
|
| 
3644.2.9
by John Arbash Meinel
 Refactor some code.  | 
203  | 
                # If reference *is* in self._nodes, then we know it has already
 | 
204  | 
                # been checked.
 | 
|
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
205  | 
if reference not in self._nodes:  | 
| 
3644.2.9
by John Arbash Meinel
 Refactor some code.  | 
206  | 
self._check_key(reference)  | 
207  | 
absent_references.append(reference)  | 
|
| 
4848.1.1
by John Arbash Meinel
 Track down one more location that needs casting to static tuple for the new builder code  | 
208  | 
reference_list = as_st([as_st(ref).intern()  | 
209  | 
for ref in reference_list])  | 
|
210  | 
node_refs.append(reference_list)  | 
|
| 
4789.28.1
by John Arbash Meinel
 Use StaticTuple as part of the builder process.  | 
211  | 
return as_st(node_refs), absent_references  | 
| 
3644.2.9
by John Arbash Meinel
 Refactor some code.  | 
212  | 
|
213  | 
def add_node(self, key, value, references=()):  | 
|
214  | 
"""Add a node to the index.  | 
|
215  | 
||
216  | 
        :param key: The key. keys are non-empty tuples containing
 | 
|
217  | 
            as many whitespace-free utf8 bytestrings as the key length
 | 
|
218  | 
            defined for this index.
 | 
|
219  | 
        :param references: An iterable of iterables of keys. Each is a
 | 
|
220  | 
            reference to another key.
 | 
|
221  | 
        :param value: The value to associate with the key. It may be any
 | 
|
222  | 
            bytes as long as it does not contain \0 or \n.
 | 
|
223  | 
        """
 | 
|
224  | 
(node_refs,  | 
|
225  | 
absent_references) = self._check_key_ref_value(key, references, value)  | 
|
226  | 
if key in self._nodes and self._nodes[key][0] != 'a':  | 
|
| 
2592.1.15
by Robert Collins
 Detect duplicate key insertion.  | 
227  | 
raise errors.BadIndexDuplicateKey(key, self)  | 
| 
3644.2.9
by John Arbash Meinel
 Refactor some code.  | 
228  | 
for reference in absent_references:  | 
229  | 
            # There may be duplicates, but I don't think it is worth worrying
 | 
|
230  | 
            # about
 | 
|
231  | 
self._nodes[reference] = ('a', (), '')  | 
|
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
232  | 
self._absent_keys.update(absent_references)  | 
233  | 
self._absent_keys.discard(key)  | 
|
| 
3644.2.3
by John Arbash Meinel
 Do a bit more work to get all the tests to pass.  | 
234  | 
self._nodes[key] = ('', node_refs, value)  | 
| 
3644.2.9
by John Arbash Meinel
 Refactor some code.  | 
235  | 
if self._nodes_by_key is not None and self._key_length > 1:  | 
| 
3644.2.3
by John Arbash Meinel
 Do a bit more work to get all the tests to pass.  | 
236  | 
self._update_nodes_by_key(key, value, node_refs)  | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
237  | 
|
| 
4744.2.7
by John Arbash Meinel
 Add .clear_cache() members to GraphIndexBuilder and BTreeBuilder.  | 
238  | 
def clear_cache(self):  | 
239  | 
"""See GraphIndex.clear_cache()  | 
|
240  | 
||
241  | 
        This is a no-op, but we need the api to conform to a generic 'Index'
 | 
|
242  | 
        abstraction.
 | 
|
243  | 
        """
 | 
|
244  | 
||
| 
2592.1.4
by Robert Collins
 Create a GraphIndexBuilder.  | 
245  | 
def finish(self):  | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
246  | 
lines = [_SIGNATURE]  | 
247  | 
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')  | 
|
| 
2624.2.8
by Robert Collins
 Explicitly mark the number of keys elements in use in GraphIndex files.  | 
248  | 
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')  | 
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
249  | 
key_count = len(self._nodes) - len(self._absent_keys)  | 
250  | 
lines.append(_OPTION_LEN + str(key_count) + '\n')  | 
|
| 
2624.2.11
by Robert Collins
 Review comments.  | 
251  | 
prefix_length = sum(len(x) for x in lines)  | 
| 
2592.1.22
by Robert Collins
 Node references are byte offsets.  | 
252  | 
        # references are byte offsets. To avoid having to do nasty
 | 
| 
3644.2.9
by John Arbash Meinel
 Refactor some code.  | 
253  | 
        # polynomial work to resolve offsets (references to later in the
 | 
| 
2592.1.22
by Robert Collins
 Node references are byte offsets.  | 
254  | 
        # file cannot be determined until all the inbetween references have
 | 
255  | 
        # been calculated too) we pad the offsets with 0's to make them be
 | 
|
256  | 
        # of consistent length. Using binary offsets would break the trivial
 | 
|
257  | 
        # file parsing.
 | 
|
258  | 
        # to calculate the width of zero's needed we do three passes:
 | 
|
259  | 
        # one to gather all the non-reference data and the number of references.
 | 
|
260  | 
        # one to pad all the data with reference-length and determine entry
 | 
|
261  | 
        # addresses.
 | 
|
262  | 
        # One to serialise.
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
263  | 
|
| 
2592.1.40
by Robert Collins
 Reverse index ordering - we do not have date prefixed revids.  | 
264  | 
        # forward sorted by key. In future we may consider topological sorting,
 | 
265  | 
        # at the cost of table scans for direct lookup, or a second index for
 | 
|
266  | 
        # direct lookup
 | 
|
267  | 
nodes = sorted(self._nodes.items())  | 
|
| 
2592.1.42
by Robert Collins
 Check the index length is as expected, when we have done preprocessing.  | 
268  | 
        # if we do not prepass, we don't know how long it will be up front.
 | 
269  | 
expected_bytes = None  | 
|
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
270  | 
        # we only need to pre-pass if we have reference lists at all.
 | 
271  | 
if self.reference_lists:  | 
|
| 
2592.1.41
by Robert Collins
 Remove duplication in the index serialisation logic with John's suggestion.  | 
272  | 
key_offset_info = []  | 
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
273  | 
non_ref_bytes = prefix_length  | 
274  | 
total_references = 0  | 
|
275  | 
            # TODO use simple multiplication for the constants in this loop.
 | 
|
276  | 
for key, (absent, references, value) in nodes:  | 
|
| 
2592.1.41
by Robert Collins
 Remove duplication in the index serialisation logic with John's suggestion.  | 
277  | 
                # record the offset known *so far* for this key:
 | 
278  | 
                # the non reference bytes to date, and the total references to
 | 
|
279  | 
                # date - saves reaccumulating on the second pass
 | 
|
280  | 
key_offset_info.append((key, non_ref_bytes, total_references))  | 
|
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
281  | 
                # key is literal, value is literal, there are 3 null's, 1 NL
 | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
282  | 
                # key is variable length tuple, \x00 between elements
 | 
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
283  | 
non_ref_bytes += sum(len(element) for element in key)  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
284  | 
if self._key_length > 1:  | 
285  | 
non_ref_bytes += self._key_length - 1  | 
|
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
286  | 
                # value is literal bytes, there are 3 null's, 1 NL.
 | 
287  | 
non_ref_bytes += len(value) + 3 + 1  | 
|
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
288  | 
                # one byte for absent if set.
 | 
289  | 
if absent:  | 
|
290  | 
non_ref_bytes += 1  | 
|
| 
2592.1.36
by Robert Collins
 Bugfix incorrect offset generation when an absent record is before a referenced record.  | 
291  | 
elif self.reference_lists:  | 
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
292  | 
                    # (ref_lists -1) tabs
 | 
293  | 
non_ref_bytes += self.reference_lists - 1  | 
|
294  | 
                    # (ref-1 cr's per ref_list)
 | 
|
295  | 
for ref_list in references:  | 
|
296  | 
                        # how many references across the whole file?
 | 
|
297  | 
total_references += len(ref_list)  | 
|
298  | 
                        # accrue reference separators
 | 
|
299  | 
if ref_list:  | 
|
300  | 
non_ref_bytes += len(ref_list) - 1  | 
|
301  | 
            # how many digits are needed to represent the total byte count?
 | 
|
302  | 
digits = 1  | 
|
| 
2592.1.22
by Robert Collins
 Node references are byte offsets.  | 
303  | 
possible_total_bytes = non_ref_bytes + total_references*digits  | 
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
304  | 
while 10 ** digits < possible_total_bytes:  | 
305  | 
digits += 1  | 
|
306  | 
possible_total_bytes = non_ref_bytes + total_references*digits  | 
|
| 
2592.1.42
by Robert Collins
 Check the index length is as expected, when we have done preprocessing.  | 
307  | 
expected_bytes = possible_total_bytes + 1 # terminating newline  | 
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
308  | 
            # resolve key addresses.
 | 
309  | 
key_addresses = {}  | 
|
| 
2592.1.41
by Robert Collins
 Remove duplication in the index serialisation logic with John's suggestion.  | 
310  | 
for key, non_ref_bytes, total_references in key_offset_info:  | 
311  | 
key_addresses[key] = non_ref_bytes + total_references*digits  | 
|
| 
2592.1.25
by Robert Collins
 Fix and tune node offset calculation.  | 
312  | 
            # serialise
 | 
313  | 
format_string = '%%0%sd' % digits  | 
|
314  | 
for key, (absent, references, value) in nodes:  | 
|
| 
2592.1.19
by Robert Collins
 Node references are tab separated.  | 
315  | 
flattened_references = []  | 
316  | 
for ref_list in references:  | 
|
| 
2592.1.22
by Robert Collins
 Node references are byte offsets.  | 
317  | 
ref_addresses = []  | 
318  | 
for reference in ref_list:  | 
|
319  | 
ref_addresses.append(format_string % key_addresses[reference])  | 
|
320  | 
flattened_references.append('\r'.join(ref_addresses))  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
321  | 
string_key = '\x00'.join(key)  | 
| 
2624.2.11
by Robert Collins
 Review comments.  | 
322  | 
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,  | 
| 
2592.1.19
by Robert Collins
 Node references are tab separated.  | 
323  | 
'\t'.join(flattened_references), value))  | 
| 
2592.1.6
by Robert Collins
 Record the number of node reference lists a particular index has.  | 
324  | 
lines.append('\n')  | 
| 
2592.1.42
by Robert Collins
 Check the index length is as expected, when we have done preprocessing.  | 
325  | 
result = StringIO(''.join(lines))  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
326  | 
if expected_bytes and len(result.getvalue()) != expected_bytes:  | 
327  | 
raise errors.BzrError('Failed index creation. Internal error:'  | 
|
328  | 
' mismatched output length and expected length: %d %d' %  | 
|
329  | 
(len(result.getvalue()), expected_bytes))  | 
|
| 
3498.1.1
by James Westby
 Don't join the lines of the index twice.  | 
330  | 
return result  | 
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
331  | 
|
| 
4168.3.6
by John Arbash Meinel
 Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().  | 
332  | 
def set_optimize(self, for_size=None, combine_backing_indices=None):  | 
| 
3777.5.3
by John Arbash Meinel
 Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.  | 
333  | 
"""Change how the builder tries to optimize the result.  | 
334  | 
||
335  | 
        :param for_size: Tell the builder to try and make the index as small as
 | 
|
336  | 
            possible.
 | 
|
| 
4168.3.6
by John Arbash Meinel
 Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().  | 
337  | 
        :param combine_backing_indices: If the builder spills to disk to save
 | 
338  | 
            memory, should the on-disk indices be combined. Set to True if you
 | 
|
339  | 
            are going to be probing the index, but to False if you are not. (If
 | 
|
340  | 
            you are not querying, then the time spent combining is wasted.)
 | 
|
| 
3777.5.3
by John Arbash Meinel
 Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.  | 
341  | 
        :return: None
 | 
342  | 
        """
 | 
|
343  | 
        # GraphIndexBuilder itself doesn't pay attention to the flag yet, but
 | 
|
344  | 
        # other builders do.
 | 
|
| 
4168.3.6
by John Arbash Meinel
 Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().  | 
345  | 
if for_size is not None:  | 
346  | 
self._optimize_for_size = for_size  | 
|
347  | 
if combine_backing_indices is not None:  | 
|
348  | 
self._combine_backing_indices = combine_backing_indices  | 
|
| 
3777.5.3
by John Arbash Meinel
 Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.  | 
349  | 
|
| 
4593.5.37
by John Arbash Meinel
 Finish implementation tests.  | 
350  | 
def find_ancestry(self, keys, ref_list_num):  | 
351  | 
"""See CombinedGraphIndex.find_ancestry()"""  | 
|
352  | 
pending = set(keys)  | 
|
353  | 
parent_map = {}  | 
|
354  | 
missing_keys = set()  | 
|
355  | 
while pending:  | 
|
356  | 
next_pending = set()  | 
|
357  | 
for _, key, value, ref_lists in self.iter_entries(pending):  | 
|
358  | 
parent_keys = ref_lists[ref_list_num]  | 
|
359  | 
parent_map[key] = parent_keys  | 
|
360  | 
next_pending.update([p for p in parent_keys if p not in  | 
|
361  | 
parent_map])  | 
|
362  | 
missing_keys.update(pending.difference(parent_map))  | 
|
363  | 
pending = next_pending  | 
|
364  | 
return parent_map, missing_keys  | 
|
365  | 
||
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
366  | 
|
367  | 
class GraphIndex(object):  | 
|
368  | 
"""An index for data with embedded graphs.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
369  | 
|
| 
2592.1.10
by Robert Collins
 Make validate detect node reference parsing errors.  | 
370  | 
    The index maps keys to a list of key reference lists, and a value.
 | 
371  | 
    Each node has the same number of key reference lists. Each key reference
 | 
|
372  | 
    list can be empty or an arbitrary length. The value is an opaque NULL
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
373  | 
    terminated string without any newlines. The storage of the index is
 | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
374  | 
    hidden in the interface: keys and key references are always tuples of
 | 
375  | 
    bytestrings, never the internal representation (e.g. dictionary offsets).
 | 
|
| 
2592.1.30
by Robert Collins
 Absent entries are not yeilded.  | 
376  | 
|
377  | 
    It is presumed that the index will not be mutated - it is static data.
 | 
|
| 
2592.1.34
by Robert Collins
 Cleanup docs.  | 
378  | 
|
| 
2592.1.44
by Robert Collins
 Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.  | 
379  | 
    Successive iter_all_entries calls will read the entire index each time.
 | 
380  | 
    Additionally, iter_entries calls will read the index linearly until the
 | 
|
381  | 
    desired keys are found. XXX: This must be fixed before the index is
 | 
|
| 
2592.1.34
by Robert Collins
 Cleanup docs.  | 
382  | 
    suitable for production use. :XXX
 | 
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
383  | 
    """
 | 
384  | 
||
| 
5074.4.2
by John Arbash Meinel
 Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now.  | 
385  | 
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):  | 
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
386  | 
"""Open an index called name on transport.  | 
387  | 
||
388  | 
        :param transport: A bzrlib.transport.Transport.
 | 
|
389  | 
        :param name: A path to provide to transport API calls.
 | 
|
| 
2890.2.1
by Robert Collins
 * ``bzrlib.index.GraphIndex`` now requires a size parameter to the  | 
390  | 
        :param size: The size of the index in bytes. This is used for bisection
 | 
391  | 
            logic to perform partial index reads. While the size could be
 | 
|
392  | 
            obtained by statting the file this introduced an additional round
 | 
|
| 
2890.2.8
by Robert Collins
 Make the size of the index optionally None for the pack-names index.  | 
393  | 
            trip as well as requiring stat'able transports, both of which are
 | 
394  | 
            avoided by having it supplied. If size is None, then bisection
 | 
|
395  | 
            support will be disabled and accessing the index will just stream
 | 
|
396  | 
            all the data.
 | 
|
| 
5074.4.2
by John Arbash Meinel
 Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now.  | 
397  | 
        :param offset: Instead of starting the index data at offset 0, start it
 | 
398  | 
            at an arbitrary offset.
 | 
|
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
399  | 
        """
 | 
400  | 
self._transport = transport  | 
|
401  | 
self._name = name  | 
|
| 
2890.2.16
by Robert Collins
 Review feedback.  | 
402  | 
        # Becomes a dict of key:(value, reference-list-byte-locations) used by
 | 
403  | 
        # the bisection interface to store parsed but not resolved keys.
 | 
|
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
404  | 
self._bisect_nodes = None  | 
| 
2890.2.16
by Robert Collins
 Review feedback.  | 
405  | 
        # Becomes a dict of key:(value, reference-list-keys) which are ready to
 | 
406  | 
        # be returned directly to callers.
 | 
|
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
407  | 
self._nodes = None  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
408  | 
        # a sorted list of slice-addresses for the parsed bytes of the file.
 | 
409  | 
        # e.g. (0,1) would mean that byte 0 is parsed.
 | 
|
| 
2890.2.2
by Robert Collins
 Opening an index creates a map for the parsed bytes.  | 
410  | 
self._parsed_byte_map = []  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
411  | 
        # a sorted list of keys matching each slice address for parsed bytes
 | 
412  | 
        # e.g. (None, 'foo@bar') would mean that the first byte contained no
 | 
|
413  | 
        # key, and the end byte of the slice is the of the data for 'foo@bar'
 | 
|
414  | 
self._parsed_key_map = []  | 
|
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
415  | 
self._key_count = None  | 
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
416  | 
self._keys_by_offset = None  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
417  | 
self._nodes_by_key = None  | 
| 
2890.2.1
by Robert Collins
 * ``bzrlib.index.GraphIndex`` now requires a size parameter to the  | 
418  | 
self._size = size  | 
| 
3665.3.3
by John Arbash Meinel
 If we read more than 50% of the whole index,  | 
419  | 
        # The number of bytes we've read so far in trying to process this file
 | 
420  | 
self._bytes_read = 0  | 
|
| 
5074.4.2
by John Arbash Meinel
 Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now.  | 
421  | 
self._base_offset = offset  | 
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
422  | 
|
| 
2592.3.176
by Robert Collins
 Various pack refactorings.  | 
423  | 
def __eq__(self, other):  | 
| 
2592.3.215
by Robert Collins
 Review feedback.  | 
424  | 
"""Equal when self and other were created with the same parameters."""  | 
| 
2592.3.176
by Robert Collins
 Various pack refactorings.  | 
425  | 
return (  | 
426  | 
type(self) == type(other) and  | 
|
427  | 
self._transport == other._transport and  | 
|
428  | 
self._name == other._name and  | 
|
429  | 
self._size == other._size)  | 
|
430  | 
||
431  | 
def __ne__(self, other):  | 
|
432  | 
return not self.__eq__(other)  | 
|
433  | 
||
| 
3517.4.13
by Martin Pool
 Add repr methods  | 
434  | 
def __repr__(self):  | 
435  | 
return "%s(%r)" % (self.__class__.__name__,  | 
|
436  | 
self._transport.abspath(self._name))  | 
|
437  | 
||
| 
3665.3.1
by John Arbash Meinel
 Updates to GraphIndex processing.  | 
438  | 
def _buffer_all(self, stream=None):  | 
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
439  | 
"""Buffer all the index data.  | 
440  | 
||
441  | 
        Mutates self._nodes and self.keys_by_offset.
 | 
|
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
442  | 
        """
 | 
| 
3665.3.1
by John Arbash Meinel
 Updates to GraphIndex processing.  | 
443  | 
if self._nodes is not None:  | 
444  | 
            # We already did this
 | 
|
445  | 
            return
 | 
|
| 
2624.2.15
by Robert Collins
 Add useful -Dindex flag.  | 
446  | 
if 'index' in debug.debug_flags:  | 
447  | 
mutter('Reading entire index %s', self._transport.abspath(self._name))  | 
|
| 
3665.3.1
by John Arbash Meinel
 Updates to GraphIndex processing.  | 
448  | 
if stream is None:  | 
449  | 
stream = self._transport.get(self._name)  | 
|
| 
5074.4.3
by John Arbash Meinel
 Actually implement offset support for GraphIndex.  | 
450  | 
if self._base_offset != 0:  | 
451  | 
                # This is wasteful, but it is better than dealing with
 | 
|
452  | 
                # adjusting all the offsets, etc.
 | 
|
453  | 
stream = StringIO(stream.read()[self._base_offset:])  | 
|
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
454  | 
self._read_prefix(stream)  | 
| 
2890.2.17
by Robert Collins
 Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.  | 
455  | 
self._expected_elements = 3 + self._key_length  | 
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
456  | 
line_count = 0  | 
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
457  | 
        # raw data keyed by offset
 | 
458  | 
self._keys_by_offset = {}  | 
|
459  | 
        # ready-to-return key:value or key:value, node_ref_lists
 | 
|
460  | 
self._nodes = {}  | 
|
| 
3711.3.13
by John Arbash Meinel
 Shave off another 5s by not building 'node_by_key'  | 
461  | 
self._nodes_by_key = None  | 
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
462  | 
trailers = 0  | 
463  | 
pos = stream.tell()  | 
|
| 
2890.2.17
by Robert Collins
 Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.  | 
464  | 
lines = stream.read().split('\n')  | 
| 
4852.1.5
by John Arbash Meinel
 Explicitly call stream.close() in the index code.  | 
465  | 
stream.close()  | 
| 
2890.2.17
by Robert Collins
 Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.  | 
466  | 
del lines[-1]  | 
467  | 
_, _, _, trailers = self._parse_lines(lines, pos)  | 
|
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
468  | 
for key, absent, references, value in self._keys_by_offset.itervalues():  | 
| 
2592.1.30
by Robert Collins
 Absent entries are not yeilded.  | 
469  | 
if absent:  | 
470  | 
                continue
 | 
|
| 
2592.1.28
by Robert Collins
 Basic two pass iter_all_entries.  | 
471  | 
            # resolve references:
 | 
472  | 
if self.node_ref_lists:  | 
|
| 
2890.2.17
by Robert Collins
 Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.  | 
473  | 
node_value = (value, self._resolve_references(references))  | 
| 
2592.1.28
by Robert Collins
 Basic two pass iter_all_entries.  | 
474  | 
else:  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
475  | 
node_value = value  | 
476  | 
self._nodes[key] = node_value  | 
|
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
477  | 
        # cache the keys for quick set intersections
 | 
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
478  | 
if trailers != 1:  | 
479  | 
            # there must be one line - the empty trailer line.
 | 
|
480  | 
raise errors.BadIndexData(self)  | 
|
481  | 
||
| 
4744.2.6
by John Arbash Meinel
 Start exposing an GraphIndex.clear_cache() member.  | 
482  | 
def clear_cache(self):  | 
483  | 
"""Clear out any cached/memoized values.  | 
|
484  | 
||
485  | 
        This can be called at any time, but generally it is used when we have
 | 
|
486  | 
        extracted some information, but don't expect to be requesting any more
 | 
|
487  | 
        from this index.
 | 
|
488  | 
        """
 | 
|
489  | 
||
| 
4011.5.11
by Robert Collins
 Polish the KnitVersionedFiles.scan_unvalidated_index api.  | 
490  | 
def external_references(self, ref_list_num):  | 
| 
4011.5.2
by Andrew Bennetts
 Add more tests, improve existing tests, add GraphIndex._external_references()  | 
491  | 
"""Return references that are not present in this index.  | 
492  | 
        """
 | 
|
493  | 
self._buffer_all()  | 
|
| 
4011.5.3
by Andrew Bennetts
 Implement and test external_references on GraphIndex and BTreeGraphIndex.  | 
494  | 
if ref_list_num + 1 > self.node_ref_lists:  | 
495  | 
raise ValueError('No ref list %d, index has %d ref lists'  | 
|
496  | 
% (ref_list_num, self.node_ref_lists))  | 
|
| 
4011.5.2
by Andrew Bennetts
 Add more tests, improve existing tests, add GraphIndex._external_references()  | 
497  | 
refs = set()  | 
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
498  | 
nodes = self._nodes  | 
499  | 
for key, (value, ref_lists) in nodes.iteritems():  | 
|
| 
4011.5.2
by Andrew Bennetts
 Add more tests, improve existing tests, add GraphIndex._external_references()  | 
500  | 
ref_list = ref_lists[ref_list_num]  | 
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
501  | 
refs.update([ref for ref in ref_list if ref not in nodes])  | 
502  | 
return refs  | 
|
| 
4011.5.2
by Andrew Bennetts
 Add more tests, improve existing tests, add GraphIndex._external_references()  | 
503  | 
|
| 
3711.3.21
by John Arbash Meinel
 Fix GraphIndex to properly generate _nodes_by_keys on demand.  | 
504  | 
def _get_nodes_by_key(self):  | 
505  | 
if self._nodes_by_key is None:  | 
|
506  | 
nodes_by_key = {}  | 
|
507  | 
if self.node_ref_lists:  | 
|
508  | 
for key, (value, references) in self._nodes.iteritems():  | 
|
509  | 
key_dict = nodes_by_key  | 
|
510  | 
for subkey in key[:-1]:  | 
|
511  | 
key_dict = key_dict.setdefault(subkey, {})  | 
|
512  | 
key_dict[key[-1]] = key, value, references  | 
|
513  | 
else:  | 
|
514  | 
for key, value in self._nodes.iteritems():  | 
|
515  | 
key_dict = nodes_by_key  | 
|
516  | 
for subkey in key[:-1]:  | 
|
517  | 
key_dict = key_dict.setdefault(subkey, {})  | 
|
518  | 
key_dict[key[-1]] = key, value  | 
|
519  | 
self._nodes_by_key = nodes_by_key  | 
|
520  | 
return self._nodes_by_key  | 
|
521  | 
||
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
522  | 
def iter_all_entries(self):  | 
523  | 
"""Iterate over all keys within the index.  | 
|
524  | 
||
| 
2592.5.1
by Martin Pool
 Fix docstrings for Index.iter_entries etc  | 
525  | 
        :return: An iterable of (index, key, value) or (index, key, value, reference_lists).
 | 
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
526  | 
            The former tuple is used when there are no reference lists in the
 | 
527  | 
            index, making the API compatible with simple key:value index types.
 | 
|
528  | 
            There is no defined order for the result iteration - it will be in
 | 
|
529  | 
            the most efficient order for the index.
 | 
|
530  | 
        """
 | 
|
| 
2745.1.1
by Robert Collins
 Add a number of -Devil checkpoints.  | 
531  | 
if 'evil' in debug.debug_flags:  | 
| 
2592.3.112
by Robert Collins
 Various fixups found dogfooding.  | 
532  | 
trace.mutter_callsite(3,  | 
| 
2745.1.2
by Robert Collins
 Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly.  | 
533  | 
"iter_all_entries scales with size of history.")  | 
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
534  | 
if self._nodes is None:  | 
535  | 
self._buffer_all()  | 
|
536  | 
if self.node_ref_lists:  | 
|
537  | 
for key, (value, node_ref_lists) in self._nodes.iteritems():  | 
|
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
538  | 
yield self, key, value, node_ref_lists  | 
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
539  | 
else:  | 
540  | 
for key, value in self._nodes.iteritems():  | 
|
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
541  | 
yield self, key, value  | 
| 
2624.2.2
by Robert Collins
 Temporary performance hack for GraphIndex : load the entire index once and only once into ram.  | 
542  | 
|
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
543  | 
def _read_prefix(self, stream):  | 
544  | 
signature = stream.read(len(self._signature()))  | 
|
545  | 
if not signature == self._signature():  | 
|
546  | 
raise errors.BadIndexFormatSignature(self._name, GraphIndex)  | 
|
547  | 
options_line = stream.readline()  | 
|
548  | 
if not options_line.startswith(_OPTION_NODE_REFS):  | 
|
549  | 
raise errors.BadIndexOptions(self)  | 
|
550  | 
try:  | 
|
551  | 
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])  | 
|
552  | 
except ValueError:  | 
|
553  | 
raise errors.BadIndexOptions(self)  | 
|
| 
2624.2.8
by Robert Collins
 Explicitly mark the number of keys elements in use in GraphIndex files.  | 
554  | 
options_line = stream.readline()  | 
555  | 
if not options_line.startswith(_OPTION_KEY_ELEMENTS):  | 
|
556  | 
raise errors.BadIndexOptions(self)  | 
|
557  | 
try:  | 
|
558  | 
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])  | 
|
559  | 
except ValueError:  | 
|
560  | 
raise errors.BadIndexOptions(self)  | 
|
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
561  | 
options_line = stream.readline()  | 
562  | 
if not options_line.startswith(_OPTION_LEN):  | 
|
563  | 
raise errors.BadIndexOptions(self)  | 
|
564  | 
try:  | 
|
565  | 
self._key_count = int(options_line[len(_OPTION_LEN):-1])  | 
|
566  | 
except ValueError:  | 
|
567  | 
raise errors.BadIndexOptions(self)  | 
|
| 
2592.1.5
by Robert Collins
 Trivial index reading.  | 
568  | 
|
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
569  | 
def _resolve_references(self, references):  | 
| 
2890.2.16
by Robert Collins
 Review feedback.  | 
570  | 
"""Return the resolved key references for references.  | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
571  | 
|
| 
2890.2.16
by Robert Collins
 Review feedback.  | 
572  | 
        References are resolved by looking up the location of the key in the
 | 
573  | 
        _keys_by_offset map and substituting the key name, preserving ordering.
 | 
|
574  | 
||
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
575  | 
        :param references: An iterable of iterables of key locations. e.g.
 | 
| 
2890.2.16
by Robert Collins
 Review feedback.  | 
576  | 
            [[123, 456], [123]]
 | 
577  | 
        :return: A tuple of tuples of keys.
 | 
|
578  | 
        """
 | 
|
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
579  | 
node_refs = []  | 
580  | 
for ref_list in references:  | 
|
581  | 
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))  | 
|
582  | 
return tuple(node_refs)  | 
|
583  | 
||
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
584  | 
def _find_index(self, range_map, key):  | 
585  | 
"""Helper for the _parsed_*_index calls.  | 
|
586  | 
||
587  | 
        Given a range map - [(start, end), ...], finds the index of the range
 | 
|
588  | 
        in the map for key if it is in the map, and if it is not there, the
 | 
|
589  | 
        immediately preceeding range in the map.
 | 
|
590  | 
        """
 | 
|
591  | 
result = bisect_right(range_map, key) - 1  | 
|
592  | 
if result + 1 < len(range_map):  | 
|
593  | 
            # check the border condition, it may be in result + 1
 | 
|
594  | 
if range_map[result + 1][0] == key[0]:  | 
|
595  | 
return result + 1  | 
|
596  | 
return result  | 
|
597  | 
||
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
598  | 
def _parsed_byte_index(self, offset):  | 
599  | 
"""Return the index of the entry immediately before offset.  | 
|
600  | 
||
601  | 
        e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that
 | 
|
602  | 
        there is one unparsed byte (the 11th, addressed as[10]). then:
 | 
|
603  | 
        asking for 0 will return 0
 | 
|
604  | 
        asking for 10 will return 0
 | 
|
605  | 
        asking for 11 will return 1
 | 
|
606  | 
        asking for 12 will return 1
 | 
|
607  | 
        """
 | 
|
608  | 
key = (offset, 0)  | 
|
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
609  | 
return self._find_index(self._parsed_byte_map, key)  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
610  | 
|
611  | 
def _parsed_key_index(self, key):  | 
|
612  | 
"""Return the index of the entry immediately before key.  | 
|
613  | 
||
614  | 
        e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed,
 | 
|
615  | 
        meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive
 | 
|
616  | 
        have been parsed, then:
 | 
|
617  | 
        asking for '' will return 0
 | 
|
618  | 
        asking for 'a' will return 0
 | 
|
619  | 
        asking for 'b' will return 1
 | 
|
620  | 
        asking for 'e' will return 1
 | 
|
621  | 
        """
 | 
|
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
622  | 
search_key = (key, None)  | 
623  | 
return self._find_index(self._parsed_key_map, search_key)  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
624  | 
|
625  | 
def _is_parsed(self, offset):  | 
|
626  | 
"""Returns True if offset has been parsed."""  | 
|
627  | 
index = self._parsed_byte_index(offset)  | 
|
628  | 
if index == len(self._parsed_byte_map):  | 
|
629  | 
return offset < self._parsed_byte_map[index - 1][1]  | 
|
630  | 
start, end = self._parsed_byte_map[index]  | 
|
631  | 
return offset >= start and offset < end  | 
|
632  | 
||
| 
2890.2.7
by Robert Collins
 * Pack indices are now partially parsed for specific key lookup using a  | 
633  | 
def _iter_entries_from_total_buffer(self, keys):  | 
634  | 
"""Iterate over keys when the entire index is parsed."""  | 
|
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
635  | 
        # Note: See the note in BTreeBuilder.iter_entries for why we don't use
 | 
636  | 
        #       .intersection() here
 | 
|
637  | 
nodes = self._nodes  | 
|
638  | 
keys = [key for key in keys if key in nodes]  | 
|
| 
2624.2.3
by Robert Collins
 Make GraphIndex.iter_entries do hash lookups rather than table scans.  | 
639  | 
if self.node_ref_lists:  | 
640  | 
for key in keys:  | 
|
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
641  | 
value, node_refs = nodes[key]  | 
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
642  | 
yield self, key, value, node_refs  | 
| 
2624.2.3
by Robert Collins
 Make GraphIndex.iter_entries do hash lookups rather than table scans.  | 
643  | 
else:  | 
644  | 
for key in keys:  | 
|
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
645  | 
yield self, key, nodes[key]  | 
| 
2592.1.7
by Robert Collins
 A validate that goes boom.  | 
646  | 
|
| 
2890.2.7
by Robert Collins
 * Pack indices are now partially parsed for specific key lookup using a  | 
647  | 
def iter_entries(self, keys):  | 
648  | 
"""Iterate over keys within the index.  | 
|
649  | 
||
650  | 
        :param keys: An iterable providing the keys to be retrieved.
 | 
|
651  | 
        :return: An iterable as per iter_all_entries, but restricted to the
 | 
|
652  | 
            keys supplied. No additional keys will be returned, and every
 | 
|
653  | 
            key supplied that is in the index will be returned.
 | 
|
654  | 
        """
 | 
|
655  | 
keys = set(keys)  | 
|
656  | 
if not keys:  | 
|
657  | 
return []  | 
|
| 
2890.2.8
by Robert Collins
 Make the size of the index optionally None for the pack-names index.  | 
658  | 
if self._size is None and self._nodes is None:  | 
659  | 
self._buffer_all()  | 
|
| 
3665.3.3
by John Arbash Meinel
 If we read more than 50% of the whole index,  | 
660  | 
|
| 
3606.6.1
by Robert Collins
 Cherry-pick Robert's index buffering.  | 
661  | 
        # We fit about 20 keys per minimum-read (4K), so if we are looking for
 | 
662  | 
        # more than 1/20th of the index its likely (assuming homogenous key
 | 
|
663  | 
        # spread) that we'll read the entire index. If we're going to do that,
 | 
|
664  | 
        # buffer the whole thing. A better analysis might take key spread into
 | 
|
665  | 
        # account - but B+Tree indices are better anyway.
 | 
|
666  | 
        # We could look at all data read, and use a threshold there, which will
 | 
|
667  | 
        # trigger on ancestry walks, but that is not yet fully mapped out.
 | 
|
668  | 
if self._nodes is None and len(keys) * 20 > self.key_count():  | 
|
669  | 
self._buffer_all()  | 
|
| 
2890.2.7
by Robert Collins
 * Pack indices are now partially parsed for specific key lookup using a  | 
670  | 
if self._nodes is not None:  | 
671  | 
return self._iter_entries_from_total_buffer(keys)  | 
|
672  | 
else:  | 
|
673  | 
return (result[1] for result in bisect_multi_bytes(  | 
|
| 
2890.2.18
by Robert Collins
 Review feedback.  | 
674  | 
self._lookup_keys_via_location, self._size, keys))  | 
| 
2890.2.7
by Robert Collins
 * Pack indices are now partially parsed for specific key lookup using a  | 
675  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
676  | 
def iter_entries_prefix(self, keys):  | 
677  | 
"""Iterate over keys within the index using prefix matching.  | 
|
678  | 
||
679  | 
        Prefix matching is applied within the tuple of a key, not to within
 | 
|
680  | 
        the bytestring of each key element. e.g. if you have the keys ('foo',
 | 
|
681  | 
        'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
 | 
|
682  | 
        only the former key is returned.
 | 
|
683  | 
||
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
684  | 
        WARNING: Note that this method currently causes a full index parse
 | 
685  | 
        unconditionally (which is reasonably appropriate as it is a means for
 | 
|
686  | 
        thunking many small indices into one larger one and still supplies
 | 
|
687  | 
        iter_all_entries at the thunk layer).
 | 
|
688  | 
||
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
689  | 
        :param keys: An iterable providing the key prefixes to be retrieved.
 | 
690  | 
            Each key prefix takes the form of a tuple the length of a key, but
 | 
|
691  | 
            with the last N elements 'None' rather than a regular bytestring.
 | 
|
692  | 
            The first element cannot be 'None'.
 | 
|
693  | 
        :return: An iterable as per iter_all_entries, but restricted to the
 | 
|
694  | 
            keys with a matching prefix to those supplied. No additional keys
 | 
|
695  | 
            will be returned, and every match that is in the index will be
 | 
|
696  | 
            returned.
 | 
|
697  | 
        """
 | 
|
698  | 
keys = set(keys)  | 
|
699  | 
if not keys:  | 
|
700  | 
            return
 | 
|
701  | 
        # load data - also finds key lengths
 | 
|
702  | 
if self._nodes is None:  | 
|
703  | 
self._buffer_all()  | 
|
704  | 
if self._key_length == 1:  | 
|
705  | 
for key in keys:  | 
|
706  | 
                # sanity check
 | 
|
707  | 
if key[0] is None:  | 
|
708  | 
raise errors.BadIndexKey(key)  | 
|
709  | 
if len(key) != self._key_length:  | 
|
710  | 
raise errors.BadIndexKey(key)  | 
|
711  | 
if self.node_ref_lists:  | 
|
712  | 
value, node_refs = self._nodes[key]  | 
|
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
713  | 
yield self, key, value, node_refs  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
714  | 
else:  | 
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
715  | 
yield self, key, self._nodes[key]  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
716  | 
            return
 | 
| 
3711.3.21
by John Arbash Meinel
 Fix GraphIndex to properly generate _nodes_by_keys on demand.  | 
717  | 
nodes_by_key = self._get_nodes_by_key()  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
718  | 
for key in keys:  | 
719  | 
            # sanity check
 | 
|
720  | 
if key[0] is None:  | 
|
721  | 
raise errors.BadIndexKey(key)  | 
|
722  | 
if len(key) != self._key_length:  | 
|
723  | 
raise errors.BadIndexKey(key)  | 
|
724  | 
            # find what it refers to:
 | 
|
| 
3711.3.21
by John Arbash Meinel
 Fix GraphIndex to properly generate _nodes_by_keys on demand.  | 
725  | 
key_dict = nodes_by_key  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
726  | 
elements = list(key)  | 
| 
2624.2.11
by Robert Collins
 Review comments.  | 
727  | 
            # find the subdict whose contents should be returned.
 | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
728  | 
try:  | 
729  | 
while len(elements) and elements[0] is not None:  | 
|
730  | 
key_dict = key_dict[elements[0]]  | 
|
731  | 
elements.pop(0)  | 
|
732  | 
except KeyError:  | 
|
733  | 
                # a non-existant lookup.
 | 
|
734  | 
                continue
 | 
|
735  | 
if len(elements):  | 
|
736  | 
dicts = [key_dict]  | 
|
737  | 
while dicts:  | 
|
738  | 
key_dict = dicts.pop(-1)  | 
|
739  | 
                    # can't be empty or would not exist
 | 
|
740  | 
item, value = key_dict.iteritems().next()  | 
|
741  | 
if type(value) == dict:  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
742  | 
                        # push keys
 | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
743  | 
dicts.extend(key_dict.itervalues())  | 
744  | 
else:  | 
|
745  | 
                        # yield keys
 | 
|
746  | 
for value in key_dict.itervalues():  | 
|
| 
2624.2.11
by Robert Collins
 Review comments.  | 
747  | 
                            # each value is the key:value:node refs tuple
 | 
748  | 
                            # ready to yield.
 | 
|
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
749  | 
yield (self, ) + value  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
750  | 
else:  | 
| 
2624.2.11
by Robert Collins
 Review comments.  | 
751  | 
                # the last thing looked up was a terminal element
 | 
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
752  | 
yield (self, ) + key_dict  | 
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
753  | 
|
| 
4593.4.12
by John Arbash Meinel
 Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()  | 
754  | 
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):  | 
755  | 
"""See BTreeIndex._find_ancestors."""  | 
|
| 
4593.4.7
by John Arbash Meinel
 Basic implementation of a conforming interface for GraphIndex.  | 
756  | 
        # The api can be implemented as a trivial overlay on top of
 | 
757  | 
        # iter_entries, it is not an efficient implementation, but it at least
 | 
|
758  | 
        # gets the job done.
 | 
|
759  | 
found_keys = set()  | 
|
760  | 
search_keys = set()  | 
|
761  | 
for index, key, value, refs in self.iter_entries(keys):  | 
|
762  | 
parent_keys = refs[ref_list_num]  | 
|
763  | 
found_keys.add(key)  | 
|
764  | 
parent_map[key] = parent_keys  | 
|
765  | 
search_keys.update(parent_keys)  | 
|
766  | 
        # Figure out what, if anything, was missing
 | 
|
767  | 
missing_keys.update(set(keys).difference(found_keys))  | 
|
768  | 
search_keys = search_keys.difference(parent_map)  | 
|
769  | 
return search_keys  | 
|
770  | 
||
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
771  | 
def key_count(self):  | 
772  | 
"""Return an estimate of the number of keys in this index.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
773  | 
|
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
774  | 
        For GraphIndex the estimate is exact.
 | 
775  | 
        """
 | 
|
776  | 
if self._key_count is None:  | 
|
| 
2979.1.1
by Robert Collins
 Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily.  | 
777  | 
self._read_and_parse([_HEADER_READV])  | 
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
778  | 
return self._key_count  | 
779  | 
||
| 
2890.2.18
by Robert Collins
 Review feedback.  | 
780  | 
def _lookup_keys_via_location(self, location_keys):  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
781  | 
"""Public interface for implementing bisection.  | 
782  | 
||
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
783  | 
        If _buffer_all has been called, then all the data for the index is in
 | 
784  | 
        memory, and this method should not be called, as it uses a separate
 | 
|
785  | 
        cache because it cannot pre-resolve all indices, which buffer_all does
 | 
|
786  | 
        for performance.
 | 
|
787  | 
||
| 
2890.2.16
by Robert Collins
 Review feedback.  | 
788  | 
        :param location_keys: A list of location(byte offset), key tuples.
 | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
789  | 
        :return: A list of (location_key, result) tuples as expected by
 | 
790  | 
            bzrlib.bisect_multi.bisect_multi_bytes.
 | 
|
791  | 
        """
 | 
|
792  | 
        # Possible improvements:
 | 
|
793  | 
        #  - only bisect lookup each key once
 | 
|
794  | 
        #  - sort the keys first, and use that to reduce the bisection window
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
795  | 
        # -----
 | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
796  | 
        # this progresses in three parts:
 | 
797  | 
        # read data
 | 
|
798  | 
        # parse it
 | 
|
799  | 
        # attempt to answer the question from the now in memory data.
 | 
|
800  | 
        # build the readv request
 | 
|
801  | 
        # for each location, ask for 800 bytes - much more than rows we've seen
 | 
|
802  | 
        # anywhere.
 | 
|
803  | 
readv_ranges = []  | 
|
804  | 
for location, key in location_keys:  | 
|
805  | 
            # can we answer from cache?
 | 
|
| 
2911.3.1
by Robert Collins
 (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).  | 
806  | 
if self._bisect_nodes and key in self._bisect_nodes:  | 
807  | 
                # We have the key parsed.
 | 
|
808  | 
                continue
 | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
809  | 
index = self._parsed_key_index(key)  | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
810  | 
if (len(self._parsed_key_map) and  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
811  | 
self._parsed_key_map[index][0] <= key and  | 
| 
2911.3.1
by Robert Collins
 (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).  | 
812  | 
(self._parsed_key_map[index][1] >= key or  | 
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
813  | 
                 # end of the file has been parsed
 | 
814  | 
self._parsed_byte_map[index][1] == self._size)):  | 
|
| 
2911.3.1
by Robert Collins
 (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).  | 
815  | 
                # the key has been parsed, so no lookup is needed even if its
 | 
816  | 
                # not present.
 | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
817  | 
                continue
 | 
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
818  | 
            # - if we have examined this part of the file already - yes
 | 
819  | 
index = self._parsed_byte_index(location)  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
820  | 
if (len(self._parsed_byte_map) and  | 
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
821  | 
self._parsed_byte_map[index][0] <= location and  | 
822  | 
self._parsed_byte_map[index][1] > location):  | 
|
823  | 
                # the byte region has been parsed, so no read is needed.
 | 
|
824  | 
                continue
 | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
825  | 
length = 800  | 
826  | 
if location + length > self._size:  | 
|
827  | 
length = self._size - location  | 
|
828  | 
            # todo, trim out parsed locations.
 | 
|
829  | 
if length > 0:  | 
|
830  | 
readv_ranges.append((location, length))  | 
|
831  | 
        # read the header if needed
 | 
|
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
832  | 
if self._bisect_nodes is None:  | 
| 
2979.1.1
by Robert Collins
 Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily.  | 
833  | 
readv_ranges.append(_HEADER_READV)  | 
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
834  | 
self._read_and_parse(readv_ranges)  | 
| 
3665.3.1
by John Arbash Meinel
 Updates to GraphIndex processing.  | 
835  | 
result = []  | 
836  | 
if self._nodes is not None:  | 
|
837  | 
            # _read_and_parse triggered a _buffer_all because we requested the
 | 
|
838  | 
            # whole data range
 | 
|
839  | 
for location, key in location_keys:  | 
|
840  | 
if key not in self._nodes: # not present  | 
|
841  | 
result.append(((location, key), False))  | 
|
842  | 
elif self.node_ref_lists:  | 
|
843  | 
value, refs = self._nodes[key]  | 
|
844  | 
result.append(((location, key),  | 
|
845  | 
(self, key, value, refs)))  | 
|
846  | 
else:  | 
|
847  | 
result.append(((location, key),  | 
|
848  | 
(self, key, self._nodes[key])))  | 
|
849  | 
return result  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
850  | 
        # generate results:
 | 
851  | 
        #  - figure out <, >, missing, present
 | 
|
852  | 
        #  - result present references so we can return them.
 | 
|
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
853  | 
        # keys that we cannot answer until we resolve references
 | 
854  | 
pending_references = []  | 
|
855  | 
pending_locations = set()  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
856  | 
for location, key in location_keys:  | 
857  | 
            # can we answer from cache?
 | 
|
| 
2911.3.1
by Robert Collins
 (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).  | 
858  | 
if key in self._bisect_nodes:  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
859  | 
                # the key has been parsed, so no lookup is needed
 | 
| 
2911.3.1
by Robert Collins
 (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).  | 
860  | 
if self.node_ref_lists:  | 
861  | 
                    # the references may not have been all parsed.
 | 
|
862  | 
value, refs = self._bisect_nodes[key]  | 
|
863  | 
wanted_locations = []  | 
|
864  | 
for ref_list in refs:  | 
|
865  | 
for ref in ref_list:  | 
|
866  | 
if ref not in self._keys_by_offset:  | 
|
867  | 
wanted_locations.append(ref)  | 
|
868  | 
if wanted_locations:  | 
|
869  | 
pending_locations.update(wanted_locations)  | 
|
870  | 
pending_references.append((location, key))  | 
|
871  | 
                        continue
 | 
|
872  | 
result.append(((location, key), (self, key,  | 
|
873  | 
value, self._resolve_references(refs))))  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
874  | 
else:  | 
| 
2911.3.1
by Robert Collins
 (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).  | 
875  | 
result.append(((location, key),  | 
876  | 
(self, key, self._bisect_nodes[key])))  | 
|
877  | 
                continue
 | 
|
878  | 
else:  | 
|
879  | 
                # has the region the key should be in, been parsed?
 | 
|
880  | 
index = self._parsed_key_index(key)  | 
|
881  | 
if (self._parsed_key_map[index][0] <= key and  | 
|
882  | 
(self._parsed_key_map[index][1] >= key or  | 
|
883  | 
                     # end of the file has been parsed
 | 
|
884  | 
self._parsed_byte_map[index][1] == self._size)):  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
885  | 
result.append(((location, key), False))  | 
| 
2911.3.1
by Robert Collins
 (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).  | 
886  | 
                    continue
 | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
887  | 
            # no, is the key above or below the probed location:
 | 
888  | 
            # get the range of the probed & parsed location
 | 
|
889  | 
index = self._parsed_byte_index(location)  | 
|
890  | 
            # if the key is below the start of the range, its below
 | 
|
891  | 
if key < self._parsed_key_map[index][0]:  | 
|
892  | 
direction = -1  | 
|
893  | 
else:  | 
|
894  | 
direction = +1  | 
|
895  | 
result.append(((location, key), direction))  | 
|
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
896  | 
readv_ranges = []  | 
897  | 
        # lookup data to resolve references
 | 
|
898  | 
for location in pending_locations:  | 
|
899  | 
length = 800  | 
|
900  | 
if location + length > self._size:  | 
|
901  | 
length = self._size - location  | 
|
902  | 
            # TODO: trim out parsed locations (e.g. if the 800 is into the
 | 
|
| 
2890.2.16
by Robert Collins
 Review feedback.  | 
903  | 
            # parsed region trim it, and dont use the adjust_for_latency
 | 
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
904  | 
            # facility)
 | 
905  | 
if length > 0:  | 
|
906  | 
readv_ranges.append((location, length))  | 
|
907  | 
self._read_and_parse(readv_ranges)  | 
|
| 
3665.3.5
by John Arbash Meinel
 Move the point at which we 'buffer_all' if we've read >50% of the index.  | 
908  | 
if self._nodes is not None:  | 
909  | 
            # The _read_and_parse triggered a _buffer_all, grab the data and
 | 
|
910  | 
            # return it
 | 
|
911  | 
for location, key in pending_references:  | 
|
912  | 
value, refs = self._nodes[key]  | 
|
913  | 
result.append(((location, key), (self, key, value, refs)))  | 
|
914  | 
return result  | 
|
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
915  | 
for location, key in pending_references:  | 
916  | 
            # answer key references we had to look-up-late.
 | 
|
917  | 
value, refs = self._bisect_nodes[key]  | 
|
918  | 
result.append(((location, key), (self, key,  | 
|
919  | 
value, self._resolve_references(refs))))  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
920  | 
return result  | 
921  | 
||
922  | 
def _parse_header_from_bytes(self, bytes):  | 
|
923  | 
"""Parse the header from a region of bytes.  | 
|
924  | 
||
925  | 
        :param bytes: The data to parse.
 | 
|
926  | 
        :return: An offset, data tuple such as readv yields, for the unparsed
 | 
|
927  | 
            data. (which may length 0).
 | 
|
928  | 
        """
 | 
|
929  | 
signature = bytes[0:len(self._signature())]  | 
|
930  | 
if not signature == self._signature():  | 
|
931  | 
raise errors.BadIndexFormatSignature(self._name, GraphIndex)  | 
|
932  | 
lines = bytes[len(self._signature()):].splitlines()  | 
|
933  | 
options_line = lines[0]  | 
|
934  | 
if not options_line.startswith(_OPTION_NODE_REFS):  | 
|
935  | 
raise errors.BadIndexOptions(self)  | 
|
936  | 
try:  | 
|
937  | 
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])  | 
|
938  | 
except ValueError:  | 
|
939  | 
raise errors.BadIndexOptions(self)  | 
|
940  | 
options_line = lines[1]  | 
|
941  | 
if not options_line.startswith(_OPTION_KEY_ELEMENTS):  | 
|
942  | 
raise errors.BadIndexOptions(self)  | 
|
943  | 
try:  | 
|
944  | 
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])  | 
|
945  | 
except ValueError:  | 
|
946  | 
raise errors.BadIndexOptions(self)  | 
|
947  | 
options_line = lines[2]  | 
|
948  | 
if not options_line.startswith(_OPTION_LEN):  | 
|
949  | 
raise errors.BadIndexOptions(self)  | 
|
950  | 
try:  | 
|
951  | 
self._key_count = int(options_line[len(_OPTION_LEN):])  | 
|
952  | 
except ValueError:  | 
|
953  | 
raise errors.BadIndexOptions(self)  | 
|
954  | 
        # calculate the bytes we have processed
 | 
|
955  | 
header_end = (len(signature) + len(lines[0]) + len(lines[1]) +  | 
|
956  | 
len(lines[2]) + 3)  | 
|
957  | 
self._parsed_bytes(0, None, header_end, None)  | 
|
958  | 
        # setup parsing state
 | 
|
959  | 
self._expected_elements = 3 + self._key_length  | 
|
960  | 
        # raw data keyed by offset
 | 
|
961  | 
self._keys_by_offset = {}  | 
|
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
962  | 
        # keys with the value and node references
 | 
963  | 
self._bisect_nodes = {}  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
964  | 
return header_end, bytes[header_end:]  | 
965  | 
||
966  | 
def _parse_region(self, offset, data):  | 
|
967  | 
"""Parse node data returned from a readv operation.  | 
|
968  | 
||
969  | 
        :param offset: The byte offset the data starts at.
 | 
|
970  | 
        :param data: The data to parse.
 | 
|
971  | 
        """
 | 
|
972  | 
        # trim the data.
 | 
|
973  | 
        # end first:
 | 
|
974  | 
end = offset + len(data)  | 
|
| 
2890.2.15
by Robert Collins
 Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.  | 
975  | 
high_parsed = offset  | 
| 
2890.2.14
by Robert Collins
 Parse more than one segment of data from a single readv response if needed.  | 
976  | 
while True:  | 
977  | 
            # Trivial test - if the current index's end is within the
 | 
|
978  | 
            # low-matching parsed range, we're done.
 | 
|
| 
2890.2.15
by Robert Collins
 Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.  | 
979  | 
index = self._parsed_byte_index(high_parsed)  | 
| 
2890.2.14
by Robert Collins
 Parse more than one segment of data from a single readv response if needed.  | 
980  | 
if end < self._parsed_byte_map[index][1]:  | 
981  | 
                return
 | 
|
| 
2890.2.15
by Robert Collins
 Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.  | 
982  | 
            # print "[%d:%d]" % (offset, end), \
 | 
983  | 
            #     self._parsed_byte_map[index:index + 2]
 | 
|
984  | 
high_parsed, last_segment = self._parse_segment(  | 
|
985  | 
offset, data, end, index)  | 
|
986  | 
if last_segment:  | 
|
| 
2890.2.14
by Robert Collins
 Parse more than one segment of data from a single readv response if needed.  | 
987  | 
                return
 | 
988  | 
||
989  | 
def _parse_segment(self, offset, data, end, index):  | 
|
990  | 
"""Parse one segment of data.  | 
|
991  | 
||
992  | 
        :param offset: Where 'data' begins in the file.
 | 
|
993  | 
        :param data: Some data to parse a segment of.
 | 
|
994  | 
        :param end: Where data ends
 | 
|
995  | 
        :param index: The current index into the parsed bytes map.
 | 
|
996  | 
        :return: True if the parsed segment is the last possible one in the
 | 
|
997  | 
            range of data.
 | 
|
| 
2890.2.15
by Robert Collins
 Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.  | 
998  | 
        :return: high_parsed_byte, last_segment.
 | 
999  | 
            high_parsed_byte is the location of the highest parsed byte in this
 | 
|
1000  | 
            segment, last_segment is True if the parsed segment is the last
 | 
|
1001  | 
            possible one in the data block.
 | 
|
| 
2890.2.14
by Robert Collins
 Parse more than one segment of data from a single readv response if needed.  | 
1002  | 
        """
 | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1003  | 
        # default is to use all data
 | 
1004  | 
trim_end = None  | 
|
1005  | 
        # accomodate overlap with data before this.
 | 
|
1006  | 
if offset < self._parsed_byte_map[index][1]:  | 
|
1007  | 
            # overlaps the lower parsed region
 | 
|
1008  | 
            # skip the parsed data
 | 
|
1009  | 
trim_start = self._parsed_byte_map[index][1] - offset  | 
|
1010  | 
            # don't trim the start for \n
 | 
|
1011  | 
start_adjacent = True  | 
|
1012  | 
elif offset == self._parsed_byte_map[index][1]:  | 
|
1013  | 
            # abuts the lower parsed region
 | 
|
1014  | 
            # use all data
 | 
|
1015  | 
trim_start = None  | 
|
1016  | 
            # do not trim anything
 | 
|
1017  | 
start_adjacent = True  | 
|
1018  | 
else:  | 
|
1019  | 
            # does not overlap the lower parsed region
 | 
|
1020  | 
            # use all data
 | 
|
1021  | 
trim_start = None  | 
|
1022  | 
            # but trim the leading \n
 | 
|
1023  | 
start_adjacent = False  | 
|
1024  | 
if end == self._size:  | 
|
1025  | 
            # lines up to the end of all data:
 | 
|
1026  | 
            # use it all
 | 
|
1027  | 
trim_end = None  | 
|
1028  | 
            # do not strip to the last \n
 | 
|
1029  | 
end_adjacent = True  | 
|
| 
2890.2.14
by Robert Collins
 Parse more than one segment of data from a single readv response if needed.  | 
1030  | 
last_segment = True  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1031  | 
elif index + 1 == len(self._parsed_byte_map):  | 
1032  | 
            # at the end of the parsed data
 | 
|
1033  | 
            # use it all
 | 
|
1034  | 
trim_end = None  | 
|
1035  | 
            # but strip to the last \n
 | 
|
1036  | 
end_adjacent = False  | 
|
| 
2890.2.14
by Robert Collins
 Parse more than one segment of data from a single readv response if needed.  | 
1037  | 
last_segment = True  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1038  | 
elif end == self._parsed_byte_map[index + 1][0]:  | 
1039  | 
            # buts up against the next parsed region
 | 
|
1040  | 
            # use it all
 | 
|
1041  | 
trim_end = None  | 
|
1042  | 
            # do not strip to the last \n
 | 
|
1043  | 
end_adjacent = True  | 
|
| 
2890.2.14
by Robert Collins
 Parse more than one segment of data from a single readv response if needed.  | 
1044  | 
last_segment = True  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1045  | 
elif end > self._parsed_byte_map[index + 1][0]:  | 
1046  | 
            # overlaps into the next parsed region
 | 
|
1047  | 
            # only consider the unparsed data
 | 
|
1048  | 
trim_end = self._parsed_byte_map[index + 1][0] - offset  | 
|
1049  | 
            # do not strip to the last \n as we know its an entire record
 | 
|
1050  | 
end_adjacent = True  | 
|
| 
2890.2.14
by Robert Collins
 Parse more than one segment of data from a single readv response if needed.  | 
1051  | 
last_segment = end < self._parsed_byte_map[index + 1][1]  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1052  | 
else:  | 
1053  | 
            # does not overlap into the next region
 | 
|
1054  | 
            # use it all
 | 
|
1055  | 
trim_end = None  | 
|
1056  | 
            # but strip to the last \n
 | 
|
1057  | 
end_adjacent = False  | 
|
| 
2890.2.14
by Robert Collins
 Parse more than one segment of data from a single readv response if needed.  | 
1058  | 
last_segment = True  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1059  | 
        # now find bytes to discard if needed
 | 
1060  | 
if not start_adjacent:  | 
|
1061  | 
            # work around python bug in rfind
 | 
|
1062  | 
if trim_start is None:  | 
|
1063  | 
trim_start = data.find('\n') + 1  | 
|
1064  | 
else:  | 
|
1065  | 
trim_start = data.find('\n', trim_start) + 1  | 
|
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
1066  | 
if not (trim_start != 0):  | 
1067  | 
raise AssertionError('no \n was present')  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1068  | 
            # print 'removing start', offset, trim_start, repr(data[:trim_start])
 | 
1069  | 
if not end_adjacent:  | 
|
1070  | 
            # work around python bug in rfind
 | 
|
1071  | 
if trim_end is None:  | 
|
1072  | 
trim_end = data.rfind('\n') + 1  | 
|
1073  | 
else:  | 
|
1074  | 
trim_end = data.rfind('\n', None, trim_end) + 1  | 
|
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
1075  | 
if not (trim_end != 0):  | 
1076  | 
raise AssertionError('no \n was present')  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1077  | 
            # print 'removing end', offset, trim_end, repr(data[trim_end:])
 | 
1078  | 
        # adjust offset and data to the parseable data.
 | 
|
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
1079  | 
trimmed_data = data[trim_start:trim_end]  | 
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
1080  | 
if not (trimmed_data):  | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
1081  | 
raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'  | 
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
1082  | 
% (trim_start, trim_end, offset, offset + len(data)))  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1083  | 
if trim_start:  | 
1084  | 
offset += trim_start  | 
|
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
1085  | 
        # print "parsing", repr(trimmed_data)
 | 
| 
2890.2.10
by Robert Collins
 Add test coverage to ensure \r's are not mangled by bisection parsing.  | 
1086  | 
        # splitlines mangles the \r delimiters.. don't use it.
 | 
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
1087  | 
lines = trimmed_data.split('\n')  | 
| 
2890.2.9
by Robert Collins
 Don't use splitlines for index data parsing, we embed \r.  | 
1088  | 
del lines[-1]  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1089  | 
pos = offset  | 
| 
2890.2.17
by Robert Collins
 Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.  | 
1090  | 
first_key, last_key, nodes, _ = self._parse_lines(lines, pos)  | 
1091  | 
for key, value in nodes:  | 
|
1092  | 
self._bisect_nodes[key] = value  | 
|
1093  | 
self._parsed_bytes(offset, first_key,  | 
|
1094  | 
offset + len(trimmed_data), last_key)  | 
|
1095  | 
return offset + len(trimmed_data), last_segment  | 
|
1096  | 
||
1097  | 
def _parse_lines(self, lines, pos):  | 
|
1098  | 
key = None  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1099  | 
first_key = None  | 
| 
2890.2.17
by Robert Collins
 Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.  | 
1100  | 
trailers = 0  | 
1101  | 
nodes = []  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1102  | 
for line in lines:  | 
1103  | 
if line == '':  | 
|
1104  | 
                # must be at the end
 | 
|
| 
2890.2.17
by Robert Collins
 Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.  | 
1105  | 
if self._size:  | 
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
1106  | 
if not (self._size == pos + 1):  | 
1107  | 
raise AssertionError("%s %s" % (self._size, pos))  | 
|
| 
2890.2.17
by Robert Collins
 Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.  | 
1108  | 
trailers += 1  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1109  | 
                continue
 | 
1110  | 
elements = line.split('\0')  | 
|
1111  | 
if len(elements) != self._expected_elements:  | 
|
1112  | 
raise errors.BadIndexData(self)  | 
|
| 
3530.3.3
by Robert Collins
 Credit and explanation for interning.  | 
1113  | 
            # keys are tuples. Each element is a string that may occur many
 | 
1114  | 
            # times, so we intern them to save space. AB, RC, 200807
 | 
|
| 
3711.3.13
by John Arbash Meinel
 Shave off another 5s by not building 'node_by_key'  | 
1115  | 
key = tuple([intern(element) for element in elements[:self._key_length]])  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1116  | 
if first_key is None:  | 
1117  | 
first_key = key  | 
|
1118  | 
absent, references, value = elements[-3:]  | 
|
1119  | 
ref_lists = []  | 
|
1120  | 
for ref_string in references.split('\t'):  | 
|
1121  | 
ref_lists.append(tuple([  | 
|
1122  | 
int(ref) for ref in ref_string.split('\r') if ref  | 
|
1123  | 
                    ]))
 | 
|
1124  | 
ref_lists = tuple(ref_lists)  | 
|
1125  | 
self._keys_by_offset[pos] = (key, absent, ref_lists, value)  | 
|
1126  | 
pos += len(line) + 1 # +1 for the \n  | 
|
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
1127  | 
if absent:  | 
1128  | 
                continue
 | 
|
1129  | 
if self.node_ref_lists:  | 
|
1130  | 
node_value = (value, ref_lists)  | 
|
1131  | 
else:  | 
|
1132  | 
node_value = value  | 
|
| 
2890.2.17
by Robert Collins
 Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.  | 
1133  | 
nodes.append((key, node_value))  | 
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
1134  | 
            # print "parsed ", key
 | 
| 
2890.2.17
by Robert Collins
 Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.  | 
1135  | 
return first_key, key, nodes, trailers  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1136  | 
|
1137  | 
def _parsed_bytes(self, start, start_key, end, end_key):  | 
|
1138  | 
"""Mark the bytes from start to end as parsed.  | 
|
1139  | 
||
1140  | 
        Calling self._parsed_bytes(1,2) will mark one byte (the one at offset
 | 
|
1141  | 
        1) as parsed.
 | 
|
1142  | 
||
1143  | 
        :param start: The start of the parsed region.
 | 
|
1144  | 
        :param end: The end of the parsed region.
 | 
|
1145  | 
        """
 | 
|
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
1146  | 
index = self._parsed_byte_index(start)  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1147  | 
new_value = (start, end)  | 
1148  | 
new_key = (start_key, end_key)  | 
|
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
1149  | 
if index == -1:  | 
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1150  | 
            # first range parsed is always the beginning.
 | 
1151  | 
self._parsed_byte_map.insert(index, new_value)  | 
|
1152  | 
self._parsed_key_map.insert(index, new_key)  | 
|
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
1153  | 
            return
 | 
1154  | 
        # four cases:
 | 
|
1155  | 
        # new region
 | 
|
1156  | 
        # extend lower region
 | 
|
1157  | 
        # extend higher region
 | 
|
1158  | 
        # combine two regions
 | 
|
1159  | 
if (index + 1 < len(self._parsed_byte_map) and  | 
|
1160  | 
self._parsed_byte_map[index][1] == start and  | 
|
1161  | 
self._parsed_byte_map[index + 1][0] == end):  | 
|
1162  | 
            # combine two regions
 | 
|
1163  | 
self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],  | 
|
1164  | 
self._parsed_byte_map[index + 1][1])  | 
|
1165  | 
self._parsed_key_map[index] = (self._parsed_key_map[index][0],  | 
|
1166  | 
self._parsed_key_map[index + 1][1])  | 
|
| 
2890.2.12
by Robert Collins
 More index tweaks.  | 
1167  | 
del self._parsed_byte_map[index + 1]  | 
1168  | 
del self._parsed_key_map[index + 1]  | 
|
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
1169  | 
elif self._parsed_byte_map[index][1] == start:  | 
1170  | 
            # extend the lower entry
 | 
|
1171  | 
self._parsed_byte_map[index] = (  | 
|
1172  | 
self._parsed_byte_map[index][0], end)  | 
|
1173  | 
self._parsed_key_map[index] = (  | 
|
1174  | 
self._parsed_key_map[index][0], end_key)  | 
|
1175  | 
elif (index + 1 < len(self._parsed_byte_map) and  | 
|
1176  | 
self._parsed_byte_map[index + 1][0] == end):  | 
|
1177  | 
            # extend the higher entry
 | 
|
1178  | 
self._parsed_byte_map[index + 1] = (  | 
|
1179  | 
start, self._parsed_byte_map[index + 1][1])  | 
|
1180  | 
self._parsed_key_map[index + 1] = (  | 
|
1181  | 
start_key, self._parsed_key_map[index + 1][1])  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1182  | 
else:  | 
| 
2890.2.11
by Robert Collins
 Bisection improvements after integrating with packs.  | 
1183  | 
            # new entry
 | 
1184  | 
self._parsed_byte_map.insert(index + 1, new_value)  | 
|
1185  | 
self._parsed_key_map.insert(index + 1, new_key)  | 
|
| 
2890.2.5
by Robert Collins
 Create a content lookup function for bisection in GraphIndex.  | 
1186  | 
|
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
1187  | 
def _read_and_parse(self, readv_ranges):  | 
| 
4775.1.1
by Martin Pool
 Remove several 'the the' typos  | 
1188  | 
"""Read the ranges and parse the resulting data.  | 
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
1189  | 
|
1190  | 
        :param readv_ranges: A prepared readv range list.
 | 
|
1191  | 
        """
 | 
|
| 
3665.3.5
by John Arbash Meinel
 Move the point at which we 'buffer_all' if we've read >50% of the index.  | 
1192  | 
if not readv_ranges:  | 
1193  | 
            return
 | 
|
1194  | 
if self._nodes is None and self._bytes_read * 2 >= self._size:  | 
|
1195  | 
            # We've already read more than 50% of the file and we are about to
 | 
|
1196  | 
            # request more data, just _buffer_all() and be done
 | 
|
1197  | 
self._buffer_all()  | 
|
1198  | 
            return
 | 
|
1199  | 
||
| 
5074.4.3
by John Arbash Meinel
 Actually implement offset support for GraphIndex.  | 
1200  | 
base_offset = self._base_offset  | 
1201  | 
if base_offset != 0:  | 
|
1202  | 
            # Rewrite the ranges for the offset
 | 
|
1203  | 
readv_ranges = [(start+base_offset, size)  | 
|
1204  | 
for start, size in readv_ranges]  | 
|
| 
3665.3.5
by John Arbash Meinel
 Move the point at which we 'buffer_all' if we've read >50% of the index.  | 
1205  | 
readv_data = self._transport.readv(self._name, readv_ranges, True,  | 
| 
5074.4.3
by John Arbash Meinel
 Actually implement offset support for GraphIndex.  | 
1206  | 
self._size + self._base_offset)  | 
| 
3665.3.5
by John Arbash Meinel
 Move the point at which we 'buffer_all' if we've read >50% of the index.  | 
1207  | 
        # parse
 | 
1208  | 
for offset, data in readv_data:  | 
|
| 
5074.4.3
by John Arbash Meinel
 Actually implement offset support for GraphIndex.  | 
1209  | 
offset -= base_offset  | 
| 
3665.3.5
by John Arbash Meinel
 Move the point at which we 'buffer_all' if we've read >50% of the index.  | 
1210  | 
self._bytes_read += len(data)  | 
| 
5074.4.3
by John Arbash Meinel
 Actually implement offset support for GraphIndex.  | 
1211  | 
if offset < 0:  | 
1212  | 
                # transport.readv() expanded to extra data which isn't part of
 | 
|
1213  | 
                # this index
 | 
|
1214  | 
data = data[-offset:]  | 
|
1215  | 
offset = 0  | 
|
| 
3665.3.5
by John Arbash Meinel
 Move the point at which we 'buffer_all' if we've read >50% of the index.  | 
1216  | 
if offset == 0 and len(data) == self._size:  | 
1217  | 
                # We read the whole range, most likely because the
 | 
|
1218  | 
                # Transport upcast our readv ranges into one long request
 | 
|
1219  | 
                # for enough total data to grab the whole index.
 | 
|
1220  | 
self._buffer_all(StringIO(data))  | 
|
1221  | 
                return
 | 
|
1222  | 
if self._bisect_nodes is None:  | 
|
1223  | 
                # this must be the start
 | 
|
1224  | 
if not (offset == 0):  | 
|
1225  | 
raise AssertionError()  | 
|
1226  | 
offset, data = self._parse_header_from_bytes(data)  | 
|
1227  | 
            # print readv_ranges, "[%d:%d]" % (offset, offset + len(data))
 | 
|
1228  | 
self._parse_region(offset, data)  | 
|
| 
2890.2.6
by Robert Collins
 Add support for key references to the index lookup_keys_via_location bisection interface.  | 
1229  | 
|
| 
2592.1.8
by Robert Collins
 Empty files should validate ok.  | 
1230  | 
def _signature(self):  | 
1231  | 
"""The file signature for this index type."""  | 
|
1232  | 
return _SIGNATURE  | 
|
1233  | 
||
| 
2592.1.7
by Robert Collins
 A validate that goes boom.  | 
1234  | 
def validate(self):  | 
1235  | 
"""Validate that everything in the index can be accessed."""  | 
|
| 
2592.1.27
by Robert Collins
 Test missing end lines with non-empty indices.  | 
1236  | 
        # iter_all validates completely at the moment, so just do that.
 | 
1237  | 
for node in self.iter_all_entries():  | 
|
1238  | 
            pass
 | 
|
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1239  | 
|
1240  | 
||
1241  | 
class CombinedGraphIndex(object):  | 
|
1242  | 
"""A GraphIndex made up from smaller GraphIndices.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
1243  | 
|
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1244  | 
    The backing indices must implement GraphIndex, and are presumed to be
 | 
1245  | 
    static data.
 | 
|
| 
2592.1.45
by Robert Collins
 Tweak documentation as per Aaron's review.  | 
1246  | 
|
1247  | 
    Queries against the combined index will be made against the first index,
 | 
|
1248  | 
    and then the second and so on. The order of index's can thus influence
 | 
|
1249  | 
    performance significantly. For example, if one index is on local disk and a
 | 
|
1250  | 
    second on a remote server, the local disk index should be before the other
 | 
|
1251  | 
    in the index list.
 | 
|
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1252  | 
    """
 | 
1253  | 
||
| 
3789.1.3
by John Arbash Meinel
 CombinedGraphIndex can now reload when calling key_count().  | 
1254  | 
def __init__(self, indices, reload_func=None):  | 
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1255  | 
"""Create a CombinedGraphIndex backed by indices.  | 
1256  | 
||
| 
2592.1.45
by Robert Collins
 Tweak documentation as per Aaron's review.  | 
1257  | 
        :param indices: An ordered list of indices to query for data.
 | 
| 
3789.1.3
by John Arbash Meinel
 CombinedGraphIndex can now reload when calling key_count().  | 
1258  | 
        :param reload_func: A function to call if we find we are missing an
 | 
1259  | 
            index. Should have the form reload_func() => True/False to indicate
 | 
|
1260  | 
            if reloading actually changed anything.
 | 
|
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1261  | 
        """
 | 
1262  | 
self._indices = indices  | 
|
| 
3789.1.3
by John Arbash Meinel
 CombinedGraphIndex can now reload when calling key_count().  | 
1263  | 
self._reload_func = reload_func  | 
| 
2592.1.37
by Robert Collins
 Add CombinedGraphIndex.insert_index.  | 
1264  | 
|
| 
2592.5.4
by Martin Pool
 Add CombinedGraphIndex repr  | 
1265  | 
def __repr__(self):  | 
1266  | 
return "%s(%s)" % (  | 
|
1267  | 
self.__class__.__name__,  | 
|
1268  | 
', '.join(map(repr, self._indices)))  | 
|
1269  | 
||
| 
4744.2.6
by John Arbash Meinel
 Start exposing an GraphIndex.clear_cache() member.  | 
1270  | 
def clear_cache(self):  | 
1271  | 
"""See GraphIndex.clear_cache()"""  | 
|
1272  | 
for index in self._indices:  | 
|
1273  | 
index.clear_cache()  | 
|
1274  | 
||
| 
3099.3.1
by John Arbash Meinel
 Implement get_parent_map for ParentProviders  | 
1275  | 
def get_parent_map(self, keys):  | 
| 
4379.3.3
by Gary van der Merwe
 Rename and add doc string for StackedParentsProvider.  | 
1276  | 
"""See graph.StackedParentsProvider.get_parent_map"""  | 
| 
3099.3.1
by John Arbash Meinel
 Implement get_parent_map for ParentProviders  | 
1277  | 
search_keys = set(keys)  | 
1278  | 
if NULL_REVISION in search_keys:  | 
|
1279  | 
search_keys.discard(NULL_REVISION)  | 
|
1280  | 
found_parents = {NULL_REVISION:[]}  | 
|
1281  | 
else:  | 
|
1282  | 
found_parents = {}  | 
|
| 
2979.2.2
by Robert Collins
 Per-file graph heads detection during commit for pack repositories.  | 
1283  | 
for index, key, value, refs in self.iter_entries(search_keys):  | 
1284  | 
parents = refs[0]  | 
|
1285  | 
if not parents:  | 
|
1286  | 
parents = (NULL_REVISION,)  | 
|
1287  | 
found_parents[key] = parents  | 
|
| 
3099.3.1
by John Arbash Meinel
 Implement get_parent_map for ParentProviders  | 
1288  | 
return found_parents  | 
| 
2979.2.2
by Robert Collins
 Per-file graph heads detection during commit for pack repositories.  | 
1289  | 
|
| 
3830.3.12
by Martin Pool
 Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks  | 
1290  | 
has_key = _has_key_from_parent_map  | 
| 
3830.3.9
by Martin Pool
 Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests  | 
1291  | 
|
| 
2592.1.37
by Robert Collins
 Add CombinedGraphIndex.insert_index.  | 
1292  | 
def insert_index(self, pos, index):  | 
1293  | 
"""Insert a new index in the list of indices to query.  | 
|
1294  | 
||
1295  | 
        :param pos: The position to insert the index.
 | 
|
1296  | 
        :param index: The index to insert.
 | 
|
1297  | 
        """
 | 
|
1298  | 
self._indices.insert(pos, index)  | 
|
1299  | 
||
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1300  | 
def iter_all_entries(self):  | 
1301  | 
"""Iterate over all keys within the index  | 
|
1302  | 
||
| 
2592.1.44
by Robert Collins
 Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.  | 
1303  | 
        Duplicate keys across child indices are presumed to have the same
 | 
1304  | 
        value and are only reported once.
 | 
|
1305  | 
||
| 
2592.5.1
by Martin Pool
 Fix docstrings for Index.iter_entries etc  | 
1306  | 
        :return: An iterable of (index, key, reference_lists, value).
 | 
1307  | 
            There is no defined order for the result iteration - it will be in
 | 
|
1308  | 
            the most efficient order for the index.
 | 
|
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1309  | 
        """
 | 
1310  | 
seen_keys = set()  | 
|
| 
3789.1.5
by John Arbash Meinel
 CombinedGraphIndex.iter_all_entries() can now reload when needed.  | 
1311  | 
while True:  | 
1312  | 
try:  | 
|
1313  | 
for index in self._indices:  | 
|
1314  | 
for node in index.iter_all_entries():  | 
|
1315  | 
if node[1] not in seen_keys:  | 
|
1316  | 
yield node  | 
|
1317  | 
seen_keys.add(node[1])  | 
|
1318  | 
                return
 | 
|
1319  | 
except errors.NoSuchFile:  | 
|
1320  | 
self._reload_or_raise()  | 
|
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1321  | 
|
1322  | 
def iter_entries(self, keys):  | 
|
1323  | 
"""Iterate over keys within the index.  | 
|
1324  | 
||
| 
2592.1.44
by Robert Collins
 Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.  | 
1325  | 
        Duplicate keys across child indices are presumed to have the same
 | 
1326  | 
        value and are only reported once.
 | 
|
1327  | 
||
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1328  | 
        :param keys: An iterable providing the keys to be retrieved.
 | 
| 
2592.5.1
by Martin Pool
 Fix docstrings for Index.iter_entries etc  | 
1329  | 
        :return: An iterable of (index, key, reference_lists, value). There is no
 | 
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1330  | 
            defined order for the result iteration - it will be in the most
 | 
1331  | 
            efficient order for the index.
 | 
|
1332  | 
        """
 | 
|
1333  | 
keys = set(keys)  | 
|
| 
3789.1.4
by John Arbash Meinel
 CombinedGraphIndex.iter_entries() is now able to reload on request.  | 
1334  | 
while True:  | 
1335  | 
try:  | 
|
1336  | 
for index in self._indices:  | 
|
1337  | 
if not keys:  | 
|
1338  | 
                        return
 | 
|
1339  | 
for node in index.iter_entries(keys):  | 
|
1340  | 
keys.remove(node[1])  | 
|
1341  | 
yield node  | 
|
| 
2592.1.44
by Robert Collins
 Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.  | 
1342  | 
                return
 | 
| 
3789.1.4
by John Arbash Meinel
 CombinedGraphIndex.iter_entries() is now able to reload on request.  | 
1343  | 
except errors.NoSuchFile:  | 
1344  | 
self._reload_or_raise()  | 
|
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1345  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
1346  | 
def iter_entries_prefix(self, keys):  | 
1347  | 
"""Iterate over keys within the index using prefix matching.  | 
|
1348  | 
||
1349  | 
        Duplicate keys across child indices are presumed to have the same
 | 
|
1350  | 
        value and are only reported once.
 | 
|
1351  | 
||
1352  | 
        Prefix matching is applied within the tuple of a key, not to within
 | 
|
1353  | 
        the bytestring of each key element. e.g. if you have the keys ('foo',
 | 
|
1354  | 
        'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
 | 
|
1355  | 
        only the former key is returned.
 | 
|
1356  | 
||
1357  | 
        :param keys: An iterable providing the key prefixes to be retrieved.
 | 
|
1358  | 
            Each key prefix takes the form of a tuple the length of a key, but
 | 
|
1359  | 
            with the last N elements 'None' rather than a regular bytestring.
 | 
|
1360  | 
            The first element cannot be 'None'.
 | 
|
1361  | 
        :return: An iterable as per iter_all_entries, but restricted to the
 | 
|
1362  | 
            keys with a matching prefix to those supplied. No additional keys
 | 
|
1363  | 
            will be returned, and every match that is in the index will be
 | 
|
1364  | 
            returned.
 | 
|
1365  | 
        """
 | 
|
1366  | 
keys = set(keys)  | 
|
1367  | 
if not keys:  | 
|
1368  | 
            return
 | 
|
1369  | 
seen_keys = set()  | 
|
| 
3789.1.6
by John Arbash Meinel
 CombinedGraphIndex.iter_entries_prefix can now reload when needed.  | 
1370  | 
while True:  | 
1371  | 
try:  | 
|
1372  | 
for index in self._indices:  | 
|
1373  | 
for node in index.iter_entries_prefix(keys):  | 
|
1374  | 
if node[1] in seen_keys:  | 
|
1375  | 
                            continue
 | 
|
1376  | 
seen_keys.add(node[1])  | 
|
1377  | 
yield node  | 
|
1378  | 
                return
 | 
|
1379  | 
except errors.NoSuchFile:  | 
|
1380  | 
self._reload_or_raise()  | 
|
| 
2624.2.9
by Robert Collins
 Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.  | 
1381  | 
|
| 
4593.4.12
by John Arbash Meinel
 Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()  | 
1382  | 
def find_ancestry(self, keys, ref_list_num):  | 
| 
4593.4.8
by John Arbash Meinel
 Implement CombinedGraphIndex.get_ancestry()  | 
1383  | 
"""Find the complete ancestry for the given set of keys.  | 
1384  | 
||
| 
4593.4.12
by John Arbash Meinel
 Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()  | 
1385  | 
        Note that this is a whole-ancestry request, so it should be used
 | 
1386  | 
        sparingly.
 | 
|
1387  | 
||
| 
4593.4.8
by John Arbash Meinel
 Implement CombinedGraphIndex.get_ancestry()  | 
1388  | 
        :param keys: An iterable of keys to look for
 | 
| 
4593.4.12
by John Arbash Meinel
 Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()  | 
1389  | 
        :param ref_list_num: The reference list which references the parents
 | 
1390  | 
            we care about.
 | 
|
| 
4593.4.8
by John Arbash Meinel
 Implement CombinedGraphIndex.get_ancestry()  | 
1391  | 
        :return: (parent_map, missing_keys)
 | 
1392  | 
        """
 | 
|
1393  | 
missing_keys = set()  | 
|
1394  | 
parent_map = {}  | 
|
1395  | 
keys_to_lookup = set(keys)  | 
|
| 
4593.4.9
by John Arbash Meinel
 Add some debugging statements for now.  | 
1396  | 
generation = 0  | 
| 
4593.4.8
by John Arbash Meinel
 Implement CombinedGraphIndex.get_ancestry()  | 
1397  | 
while keys_to_lookup:  | 
1398  | 
            # keys that *all* indexes claim are missing, stop searching them
 | 
|
| 
4593.4.9
by John Arbash Meinel
 Add some debugging statements for now.  | 
1399  | 
generation += 1  | 
| 
4593.4.8
by John Arbash Meinel
 Implement CombinedGraphIndex.get_ancestry()  | 
1400  | 
all_index_missing = None  | 
| 
4593.4.9
by John Arbash Meinel
 Add some debugging statements for now.  | 
1401  | 
            # print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss'
 | 
1402  | 
            # print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup),
 | 
|
1403  | 
            #                                   len(parent_map),
 | 
|
1404  | 
            #                                   len(missing_keys))
 | 
|
1405  | 
for index_idx, index in enumerate(self._indices):  | 
|
1406  | 
                # TODO: we should probably be doing something with
 | 
|
1407  | 
                #       'missing_keys' since we've already determined that
 | 
|
1408  | 
                #       those revisions have not been found anywhere
 | 
|
| 
4593.4.8
by John Arbash Meinel
 Implement CombinedGraphIndex.get_ancestry()  | 
1409  | 
index_missing_keys = set()  | 
1410  | 
                # Find all of the ancestry we can from this index
 | 
|
1411  | 
                # keep looking until the search_keys set is empty, which means
 | 
|
1412  | 
                # things we didn't find should be in index_missing_keys
 | 
|
1413  | 
search_keys = keys_to_lookup  | 
|
| 
4593.4.9
by John Arbash Meinel
 Add some debugging statements for now.  | 
1414  | 
sub_generation = 0  | 
1415  | 
                # print '    \t%2d\t\t%4d\t%5d\t%5d' % (
 | 
|
1416  | 
                #     index_idx, len(search_keys),
 | 
|
1417  | 
                #     len(parent_map), len(index_missing_keys))
 | 
|
| 
4593.4.8
by John Arbash Meinel
 Implement CombinedGraphIndex.get_ancestry()  | 
1418  | 
while search_keys:  | 
| 
4593.4.9
by John Arbash Meinel
 Add some debugging statements for now.  | 
1419  | 
sub_generation += 1  | 
1420  | 
                    # TODO: ref_list_num should really be a parameter, since
 | 
|
1421  | 
                    #       CombinedGraphIndex does not know what the ref lists
 | 
|
1422  | 
                    #       mean.
 | 
|
| 
4593.4.12
by John Arbash Meinel
 Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()  | 
1423  | 
search_keys = index._find_ancestors(search_keys,  | 
1424  | 
ref_list_num, parent_map, index_missing_keys)  | 
|
| 
4593.4.9
by John Arbash Meinel
 Add some debugging statements for now.  | 
1425  | 
                    # print '    \t  \t%2d\t%4d\t%5d\t%5d' % (
 | 
1426  | 
                    #     sub_generation, len(search_keys),
 | 
|
1427  | 
                    #     len(parent_map), len(index_missing_keys))
 | 
|
| 
4593.4.8
by John Arbash Meinel
 Implement CombinedGraphIndex.get_ancestry()  | 
1428  | 
                # Now set whatever was missing to be searched in the next index
 | 
1429  | 
keys_to_lookup = index_missing_keys  | 
|
1430  | 
if all_index_missing is None:  | 
|
1431  | 
all_index_missing = set(index_missing_keys)  | 
|
1432  | 
else:  | 
|
1433  | 
all_index_missing.intersection_update(index_missing_keys)  | 
|
1434  | 
if not keys_to_lookup:  | 
|
1435  | 
                    break
 | 
|
1436  | 
if all_index_missing is None:  | 
|
1437  | 
                # There were no indexes, so all search keys are 'missing'
 | 
|
1438  | 
missing_keys.update(keys_to_lookup)  | 
|
1439  | 
keys_to_lookup = None  | 
|
1440  | 
else:  | 
|
1441  | 
missing_keys.update(all_index_missing)  | 
|
1442  | 
keys_to_lookup.difference_update(all_index_missing)  | 
|
1443  | 
return parent_map, missing_keys  | 
|
1444  | 
||
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
1445  | 
def key_count(self):  | 
1446  | 
"""Return an estimate of the number of keys in this index.  | 
|
| 
3789.1.3
by John Arbash Meinel
 CombinedGraphIndex can now reload when calling key_count().  | 
1447  | 
|
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
1448  | 
        For CombinedGraphIndex this is approximated by the sum of the keys of
 | 
1449  | 
        the child indices. As child indices may have duplicate keys this can
 | 
|
1450  | 
        have a maximum error of the number of child indices * largest number of
 | 
|
1451  | 
        keys in any index.
 | 
|
1452  | 
        """
 | 
|
| 
3789.1.4
by John Arbash Meinel
 CombinedGraphIndex.iter_entries() is now able to reload on request.  | 
1453  | 
while True:  | 
| 
3789.1.3
by John Arbash Meinel
 CombinedGraphIndex can now reload when calling key_count().  | 
1454  | 
try:  | 
1455  | 
return sum((index.key_count() for index in self._indices), 0)  | 
|
1456  | 
except errors.NoSuchFile:  | 
|
| 
3789.1.4
by John Arbash Meinel
 CombinedGraphIndex.iter_entries() is now able to reload on request.  | 
1457  | 
self._reload_or_raise()  | 
1458  | 
||
| 
3830.3.12
by Martin Pool
 Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks  | 
1459  | 
missing_keys = _missing_keys_from_parent_map  | 
1460  | 
||
| 
3789.1.4
by John Arbash Meinel
 CombinedGraphIndex.iter_entries() is now able to reload on request.  | 
1461  | 
def _reload_or_raise(self):  | 
1462  | 
"""We just got a NoSuchFile exception.  | 
|
1463  | 
||
1464  | 
        Try to reload the indices, if it fails, just raise the current
 | 
|
1465  | 
        exception.
 | 
|
1466  | 
        """
 | 
|
1467  | 
if self._reload_func is None:  | 
|
1468  | 
            raise
 | 
|
1469  | 
exc_type, exc_value, exc_traceback = sys.exc_info()  | 
|
| 
3789.1.10
by John Arbash Meinel
 Review comments from Martin.  | 
1470  | 
trace.mutter('Trying to reload after getting exception: %s',  | 
1471  | 
exc_value)  | 
|
| 
3789.1.4
by John Arbash Meinel
 CombinedGraphIndex.iter_entries() is now able to reload on request.  | 
1472  | 
if not self._reload_func():  | 
1473  | 
            # We tried to reload, but nothing changed, so we fail anyway
 | 
|
| 
3789.1.10
by John Arbash Meinel
 Review comments from Martin.  | 
1474  | 
trace.mutter('_reload_func indicated nothing has changed.'  | 
1475  | 
' Raising original exception.')  | 
|
| 
3789.1.4
by John Arbash Meinel
 CombinedGraphIndex.iter_entries() is now able to reload on request.  | 
1476  | 
raise exc_type, exc_value, exc_traceback  | 
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
1477  | 
|
| 
2592.1.31
by Robert Collins
 Build a combined graph index to use multiple indices at once.  | 
1478  | 
def validate(self):  | 
1479  | 
"""Validate that everything in the index can be accessed."""  | 
|
| 
3789.1.7
by John Arbash Meinel
 CombinedGraphIndex.validate() will now reload.  | 
1480  | 
while True:  | 
1481  | 
try:  | 
|
1482  | 
for index in self._indices:  | 
|
1483  | 
index.validate()  | 
|
1484  | 
                return
 | 
|
1485  | 
except errors.NoSuchFile:  | 
|
1486  | 
self._reload_or_raise()  | 
|
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
1487  | 
|
1488  | 
||
1489  | 
class InMemoryGraphIndex(GraphIndexBuilder):  | 
|
1490  | 
"""A GraphIndex which operates entirely out of memory and is mutable.  | 
|
1491  | 
||
1492  | 
    This is designed to allow the accumulation of GraphIndex entries during a
 | 
|
1493  | 
    single write operation, where the accumulated entries need to be immediately
 | 
|
1494  | 
    available - for example via a CombinedGraphIndex.
 | 
|
1495  | 
    """
 | 
|
1496  | 
||
1497  | 
def add_nodes(self, nodes):  | 
|
1498  | 
"""Add nodes to the index.  | 
|
1499  | 
||
1500  | 
        :param nodes: An iterable of (key, node_refs, value) entries to add.
 | 
|
1501  | 
        """
 | 
|
| 
2592.3.39
by Robert Collins
 Fugly version to remove signatures.kndx  | 
1502  | 
if self.reference_lists:  | 
1503  | 
for (key, value, node_refs) in nodes:  | 
|
1504  | 
self.add_node(key, value, node_refs)  | 
|
1505  | 
else:  | 
|
1506  | 
for (key, value) in nodes:  | 
|
1507  | 
self.add_node(key, value)  | 
|
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
1508  | 
|
1509  | 
def iter_all_entries(self):  | 
|
1510  | 
"""Iterate over all keys within the index  | 
|
1511  | 
||
| 
2592.5.1
by Martin Pool
 Fix docstrings for Index.iter_entries etc  | 
1512  | 
        :return: An iterable of (index, key, reference_lists, value). There is no
 | 
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
1513  | 
            defined order for the result iteration - it will be in the most
 | 
1514  | 
            efficient order for the index (in this case dictionary hash order).
 | 
|
1515  | 
        """
 | 
|
| 
2745.1.1
by Robert Collins
 Add a number of -Devil checkpoints.  | 
1516  | 
if 'evil' in debug.debug_flags:  | 
| 
2592.3.112
by Robert Collins
 Various fixups found dogfooding.  | 
1517  | 
trace.mutter_callsite(3,  | 
| 
2745.1.2
by Robert Collins
 Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly.  | 
1518  | 
"iter_all_entries scales with size of history.")  | 
| 
2592.1.46
by Robert Collins
 Make GraphIndex accept nodes as key, value, references, so that the method  | 
1519  | 
if self.reference_lists:  | 
1520  | 
for key, (absent, references, value) in self._nodes.iteritems():  | 
|
1521  | 
if not absent:  | 
|
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
1522  | 
yield self, key, value, references  | 
| 
2592.1.46
by Robert Collins
 Make GraphIndex accept nodes as key, value, references, so that the method  | 
1523  | 
else:  | 
1524  | 
for key, (absent, references, value) in self._nodes.iteritems():  | 
|
1525  | 
if not absent:  | 
|
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
1526  | 
yield self, key, value  | 
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
1527  | 
|
1528  | 
def iter_entries(self, keys):  | 
|
1529  | 
"""Iterate over keys within the index.  | 
|
1530  | 
||
1531  | 
        :param keys: An iterable providing the keys to be retrieved.
 | 
|
| 
2979.2.4
by Robert Collins
 Docstring fixes from review.  | 
1532  | 
        :return: An iterable of (index, key, value, reference_lists). There is no
 | 
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
1533  | 
            defined order for the result iteration - it will be in the most
 | 
1534  | 
            efficient order for the index (keys iteration order in this case).
 | 
|
1535  | 
        """
 | 
|
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
1536  | 
        # Note: See BTreeBuilder.iter_entries for an explanation of why we
 | 
1537  | 
        #       aren't using set().intersection() here
 | 
|
1538  | 
nodes = self._nodes  | 
|
1539  | 
keys = [key for key in keys if key in nodes]  | 
|
| 
2592.1.46
by Robert Collins
 Make GraphIndex accept nodes as key, value, references, so that the method  | 
1540  | 
if self.reference_lists:  | 
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
1541  | 
for key in keys:  | 
1542  | 
node = nodes[key]  | 
|
| 
2592.1.46
by Robert Collins
 Make GraphIndex accept nodes as key, value, references, so that the method  | 
1543  | 
if not node[0]:  | 
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
1544  | 
yield self, key, node[2], node[1]  | 
| 
2592.1.46
by Robert Collins
 Make GraphIndex accept nodes as key, value, references, so that the method  | 
1545  | 
else:  | 
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
1546  | 
for key in keys:  | 
1547  | 
node = nodes[key]  | 
|
| 
2592.1.46
by Robert Collins
 Make GraphIndex accept nodes as key, value, references, so that the method  | 
1548  | 
if not node[0]:  | 
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
1549  | 
yield self, key, node[2]  | 
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
1550  | 
|
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
1551  | 
def iter_entries_prefix(self, keys):  | 
1552  | 
"""Iterate over keys within the index using prefix matching.  | 
|
1553  | 
||
1554  | 
        Prefix matching is applied within the tuple of a key, not to within
 | 
|
1555  | 
        the bytestring of each key element. e.g. if you have the keys ('foo',
 | 
|
1556  | 
        'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
 | 
|
1557  | 
        only the former key is returned.
 | 
|
1558  | 
||
1559  | 
        :param keys: An iterable providing the key prefixes to be retrieved.
 | 
|
1560  | 
            Each key prefix takes the form of a tuple the length of a key, but
 | 
|
1561  | 
            with the last N elements 'None' rather than a regular bytestring.
 | 
|
1562  | 
            The first element cannot be 'None'.
 | 
|
1563  | 
        :return: An iterable as per iter_all_entries, but restricted to the
 | 
|
1564  | 
            keys with a matching prefix to those supplied. No additional keys
 | 
|
1565  | 
            will be returned, and every match that is in the index will be
 | 
|
1566  | 
            returned.
 | 
|
1567  | 
        """
 | 
|
1568  | 
        # XXX: To much duplication with the GraphIndex class; consider finding
 | 
|
1569  | 
        # a good place to pull out the actual common logic.
 | 
|
1570  | 
keys = set(keys)  | 
|
1571  | 
if not keys:  | 
|
1572  | 
            return
 | 
|
1573  | 
if self._key_length == 1:  | 
|
1574  | 
for key in keys:  | 
|
1575  | 
                # sanity check
 | 
|
1576  | 
if key[0] is None:  | 
|
1577  | 
raise errors.BadIndexKey(key)  | 
|
1578  | 
if len(key) != self._key_length:  | 
|
1579  | 
raise errors.BadIndexKey(key)  | 
|
1580  | 
node = self._nodes[key]  | 
|
1581  | 
if node[0]:  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
1582  | 
                    continue
 | 
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
1583  | 
if self.reference_lists:  | 
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
1584  | 
yield self, key, node[2], node[1]  | 
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
1585  | 
else:  | 
| 
2624.2.17
by Robert Collins
 Review feedback.  | 
1586  | 
yield self, key, node[2]  | 
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
1587  | 
            return
 | 
| 
3644.2.4
by John Arbash Meinel
 Change GraphIndex to also have a _get_nodes_by_key  | 
1588  | 
nodes_by_key = self._get_nodes_by_key()  | 
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
1589  | 
for key in keys:  | 
1590  | 
            # sanity check
 | 
|
1591  | 
if key[0] is None:  | 
|
1592  | 
raise errors.BadIndexKey(key)  | 
|
1593  | 
if len(key) != self._key_length:  | 
|
1594  | 
raise errors.BadIndexKey(key)  | 
|
1595  | 
            # find what it refers to:
 | 
|
| 
3644.2.4
by John Arbash Meinel
 Change GraphIndex to also have a _get_nodes_by_key  | 
1596  | 
key_dict = nodes_by_key  | 
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
1597  | 
elements = list(key)  | 
1598  | 
            # find the subdict to return
 | 
|
1599  | 
try:  | 
|
1600  | 
while len(elements) and elements[0] is not None:  | 
|
1601  | 
key_dict = key_dict[elements[0]]  | 
|
1602  | 
elements.pop(0)  | 
|
1603  | 
except KeyError:  | 
|
1604  | 
                # a non-existant lookup.
 | 
|
1605  | 
                continue
 | 
|
1606  | 
if len(elements):  | 
|
1607  | 
dicts = [key_dict]  | 
|
1608  | 
while dicts:  | 
|
1609  | 
key_dict = dicts.pop(-1)  | 
|
1610  | 
                    # can't be empty or would not exist
 | 
|
1611  | 
item, value = key_dict.iteritems().next()  | 
|
1612  | 
if type(value) == dict:  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
1613  | 
                        # push keys
 | 
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
1614  | 
dicts.extend(key_dict.itervalues())  | 
1615  | 
else:  | 
|
1616  | 
                        # yield keys
 | 
|
1617  | 
for value in key_dict.itervalues():  | 
|
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
1618  | 
yield (self, ) + value  | 
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
1619  | 
else:  | 
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
1620  | 
yield (self, ) + key_dict  | 
| 
2624.2.10
by Robert Collins
 Also add iter_key_prefix support to InMemoryGraphIndex.  | 
1621  | 
|
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
1622  | 
def key_count(self):  | 
1623  | 
"""Return an estimate of the number of keys in this index.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
1624  | 
|
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
1625  | 
        For InMemoryGraphIndex the estimate is exact.
 | 
1626  | 
        """
 | 
|
| 
4789.28.2
by John Arbash Meinel
 Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.  | 
1627  | 
return len(self._nodes) - len(self._absent_keys)  | 
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
1628  | 
|
| 
2592.1.38
by Robert Collins
 Create an InMemoryGraphIndex for temporary indexing.  | 
1629  | 
def validate(self):  | 
1630  | 
"""In memory index's have no known corruption at the moment."""  | 
|
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1631  | 
|
1632  | 
||
1633  | 
class GraphIndexPrefixAdapter(object):  | 
|
1634  | 
"""An adapter between GraphIndex with different key lengths.  | 
|
1635  | 
||
1636  | 
    Queries against this will emit queries against the adapted Graph with the
 | 
|
1637  | 
    prefix added, queries for all items use iter_entries_prefix. The returned
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
1638  | 
    nodes will have their keys and node references adjusted to remove the
 | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1639  | 
    prefix. Finally, an add_nodes_callback can be supplied - when called the
 | 
1640  | 
    nodes and references being added will have prefix prepended.
 | 
|
1641  | 
    """
 | 
|
1642  | 
||
| 
2624.2.17
by Robert Collins
 Review feedback.  | 
1643  | 
def __init__(self, adapted, prefix, missing_key_length,  | 
1644  | 
add_nodes_callback=None):  | 
|
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1645  | 
"""Construct an adapter against adapted with prefix."""  | 
1646  | 
self.adapted = adapted  | 
|
| 
2624.2.19
by Robert Collins
 Why we should always test before committing.  | 
1647  | 
self.prefix_key = prefix + (None,)*missing_key_length  | 
| 
2624.2.17
by Robert Collins
 Review feedback.  | 
1648  | 
self.prefix = prefix  | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1649  | 
self.prefix_len = len(prefix)  | 
1650  | 
self.add_nodes_callback = add_nodes_callback  | 
|
1651  | 
||
| 
2624.2.13
by Robert Collins
 Implement add_node/add_nodes to the GraphIndexPrefixAdapter.  | 
1652  | 
def add_nodes(self, nodes):  | 
1653  | 
"""Add nodes to the index.  | 
|
1654  | 
||
1655  | 
        :param nodes: An iterable of (key, node_refs, value) entries to add.
 | 
|
1656  | 
        """
 | 
|
1657  | 
        # save nodes in case its an iterator
 | 
|
1658  | 
nodes = tuple(nodes)  | 
|
1659  | 
translated_nodes = []  | 
|
1660  | 
try:  | 
|
| 
2624.2.17
by Robert Collins
 Review feedback.  | 
1661  | 
            # Add prefix_key to each reference node_refs is a tuple of tuples,
 | 
1662  | 
            # so split it apart, and add prefix_key to the internal reference
 | 
|
| 
2624.2.13
by Robert Collins
 Implement add_node/add_nodes to the GraphIndexPrefixAdapter.  | 
1663  | 
for (key, value, node_refs) in nodes:  | 
1664  | 
adjusted_references = (  | 
|
| 
2624.2.17
by Robert Collins
 Review feedback.  | 
1665  | 
tuple(tuple(self.prefix + ref_node for ref_node in ref_list)  | 
| 
2624.2.13
by Robert Collins
 Implement add_node/add_nodes to the GraphIndexPrefixAdapter.  | 
1666  | 
for ref_list in node_refs))  | 
| 
2624.2.17
by Robert Collins
 Review feedback.  | 
1667  | 
translated_nodes.append((self.prefix + key, value,  | 
| 
2624.2.13
by Robert Collins
 Implement add_node/add_nodes to the GraphIndexPrefixAdapter.  | 
1668  | 
adjusted_references))  | 
1669  | 
except ValueError:  | 
|
1670  | 
            # XXX: TODO add an explicit interface for getting the reference list
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
1671  | 
            # status, to handle this bit of user-friendliness in the API more
 | 
| 
2624.2.13
by Robert Collins
 Implement add_node/add_nodes to the GraphIndexPrefixAdapter.  | 
1672  | 
            # explicitly.
 | 
1673  | 
for (key, value) in nodes:  | 
|
| 
2624.2.17
by Robert Collins
 Review feedback.  | 
1674  | 
translated_nodes.append((self.prefix + key, value))  | 
| 
2624.2.13
by Robert Collins
 Implement add_node/add_nodes to the GraphIndexPrefixAdapter.  | 
1675  | 
self.add_nodes_callback(translated_nodes)  | 
1676  | 
||
1677  | 
def add_node(self, key, value, references=()):  | 
|
1678  | 
"""Add a node to the index.  | 
|
1679  | 
||
1680  | 
        :param key: The key. keys are non-empty tuples containing
 | 
|
1681  | 
            as many whitespace-free utf8 bytestrings as the key length
 | 
|
1682  | 
            defined for this index.
 | 
|
1683  | 
        :param references: An iterable of iterables of keys. Each is a
 | 
|
1684  | 
            reference to another key.
 | 
|
1685  | 
        :param value: The value to associate with the key. It may be any
 | 
|
1686  | 
            bytes as long as it does not contain \0 or \n.
 | 
|
1687  | 
        """
 | 
|
1688  | 
self.add_nodes(((key, value, references), ))  | 
|
1689  | 
||
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1690  | 
def _strip_prefix(self, an_iter):  | 
1691  | 
"""Strip prefix data from nodes and return it."""  | 
|
1692  | 
for node in an_iter:  | 
|
1693  | 
            # cross checks
 | 
|
| 
2624.2.17
by Robert Collins
 Review feedback.  | 
1694  | 
if node[1][:self.prefix_len] != self.prefix:  | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1695  | 
raise errors.BadIndexData(self)  | 
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
1696  | 
for ref_list in node[3]:  | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1697  | 
for ref_node in ref_list:  | 
| 
2624.2.17
by Robert Collins
 Review feedback.  | 
1698  | 
if ref_node[:self.prefix_len] != self.prefix:  | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1699  | 
raise errors.BadIndexData(self)  | 
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
1700  | 
yield node[0], node[1][self.prefix_len:], node[2], (  | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1701  | 
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)  | 
| 
2624.2.14
by Robert Collins
 Add source index to the index iteration API to allow mapping back to the origin of retrieved data.  | 
1702  | 
for ref_list in node[3]))  | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1703  | 
|
1704  | 
def iter_all_entries(self):  | 
|
1705  | 
"""Iterate over all keys within the index  | 
|
1706  | 
||
1707  | 
        iter_all_entries is implemented against the adapted index using
 | 
|
1708  | 
        iter_entries_prefix.
 | 
|
1709  | 
||
| 
2592.5.1
by Martin Pool
 Fix docstrings for Index.iter_entries etc  | 
1710  | 
        :return: An iterable of (index, key, reference_lists, value). There is no
 | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1711  | 
            defined order for the result iteration - it will be in the most
 | 
1712  | 
            efficient order for the index (in this case dictionary hash order).
 | 
|
1713  | 
        """
 | 
|
| 
2624.2.19
by Robert Collins
 Why we should always test before committing.  | 
1714  | 
return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))  | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1715  | 
|
1716  | 
def iter_entries(self, keys):  | 
|
1717  | 
"""Iterate over keys within the index.  | 
|
1718  | 
||
1719  | 
        :param keys: An iterable providing the keys to be retrieved.
 | 
|
| 
2979.2.4
by Robert Collins
 Docstring fixes from review.  | 
1720  | 
        :return: An iterable of (index, key, value, reference_lists). There is no
 | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1721  | 
            defined order for the result iteration - it will be in the most
 | 
1722  | 
            efficient order for the index (keys iteration order in this case).
 | 
|
1723  | 
        """
 | 
|
1724  | 
return self._strip_prefix(self.adapted.iter_entries(  | 
|
| 
2624.2.17
by Robert Collins
 Review feedback.  | 
1725  | 
self.prefix + key for key in keys))  | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1726  | 
|
1727  | 
def iter_entries_prefix(self, keys):  | 
|
1728  | 
"""Iterate over keys within the index using prefix matching.  | 
|
1729  | 
||
1730  | 
        Prefix matching is applied within the tuple of a key, not to within
 | 
|
1731  | 
        the bytestring of each key element. e.g. if you have the keys ('foo',
 | 
|
1732  | 
        'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
 | 
|
1733  | 
        only the former key is returned.
 | 
|
1734  | 
||
1735  | 
        :param keys: An iterable providing the key prefixes to be retrieved.
 | 
|
1736  | 
            Each key prefix takes the form of a tuple the length of a key, but
 | 
|
1737  | 
            with the last N elements 'None' rather than a regular bytestring.
 | 
|
1738  | 
            The first element cannot be 'None'.
 | 
|
1739  | 
        :return: An iterable as per iter_all_entries, but restricted to the
 | 
|
1740  | 
            keys with a matching prefix to those supplied. No additional keys
 | 
|
1741  | 
            will be returned, and every match that is in the index will be
 | 
|
1742  | 
            returned.
 | 
|
1743  | 
        """
 | 
|
1744  | 
return self._strip_prefix(self.adapted.iter_entries_prefix(  | 
|
| 
2624.2.17
by Robert Collins
 Review feedback.  | 
1745  | 
self.prefix + key for key in keys))  | 
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1746  | 
|
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
1747  | 
def key_count(self):  | 
1748  | 
"""Return an estimate of the number of keys in this index.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
1749  | 
|
| 
2624.2.16
by Robert Collins
 Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.  | 
1750  | 
        For GraphIndexPrefixAdapter this is relatively expensive - key
 | 
1751  | 
        iteration with the prefix is done.
 | 
|
1752  | 
        """
 | 
|
1753  | 
return len(list(self.iter_all_entries()))  | 
|
1754  | 
||
| 
2624.2.12
by Robert Collins
 Create an adapter between indices with differing key lengths.  | 
1755  | 
def validate(self):  | 
1756  | 
"""Call the adapted's validate."""  | 
|
1757  | 
self.adapted.validate()  |