bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 4763.2.4
by John Arbash Meinel merge bzr.2.1 in preparation for NEWS entry. | 1 | # Copyright (C) 2007-2010 Canonical Ltd
 | 
| 2592.1.4
by Robert Collins Create a GraphIndexBuilder. | 2 | #
 | 
| 3 | # This program is free software; you can redistribute it and/or modify
 | |
| 4 | # it under the terms of the GNU General Public License as published by
 | |
| 5 | # the Free Software Foundation; either version 2 of the License, or
 | |
| 6 | # (at your option) any later version.
 | |
| 7 | #
 | |
| 8 | # This program is distributed in the hope that it will be useful,
 | |
| 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| 11 | # GNU General Public License for more details.
 | |
| 12 | #
 | |
| 13 | # You should have received a copy of the GNU General Public License
 | |
| 14 | # along with this program; if not, write to the Free Software
 | |
| 4183.7.1
by Sabin Iacob update FSF mailing address | 15 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
| 2592.1.4
by Robert Collins Create a GraphIndexBuilder. | 16 | |
| 17 | """Indexing facilities."""
 | |
| 18 | ||
| 2592.1.38
by Robert Collins Create an InMemoryGraphIndex for temporary indexing. | 19 | __all__ = [ | 
| 20 | 'CombinedGraphIndex', | |
| 21 | 'GraphIndex', | |
| 22 | 'GraphIndexBuilder', | |
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 23 | 'GraphIndexPrefixAdapter', | 
| 2592.1.38
by Robert Collins Create an InMemoryGraphIndex for temporary indexing. | 24 | 'InMemoryGraphIndex', | 
| 25 |     ]
 | |
| 2592.1.32
by Robert Collins Add __all__ to index. | 26 | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 27 | from bisect import bisect_right | 
| 2592.1.4
by Robert Collins Create a GraphIndexBuilder. | 28 | from cStringIO import StringIO | 
| 2592.1.12
by Robert Collins Handle basic node adds. | 29 | import re | 
| 3789.1.3
by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count(). | 30 | import sys | 
| 2592.1.4
by Robert Collins Create a GraphIndexBuilder. | 31 | |
| 2624.2.15
by Robert Collins Add useful -Dindex flag. | 32 | from bzrlib.lazy_import import lazy_import | 
| 33 | lazy_import(globals(), """ | |
| 2745.1.2
by Robert Collins Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. | 34 | from bzrlib import trace
 | 
| 2890.2.7
by Robert Collins * Pack indices are now partially parsed for specific key lookup using a | 35 | from bzrlib.bisect_multi import bisect_multi_bytes
 | 
| 2979.2.2
by Robert Collins Per-file graph heads detection during commit for pack repositories. | 36 | from bzrlib.revision import NULL_REVISION
 | 
| 2745.1.2
by Robert Collins Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. | 37 | from bzrlib.trace import mutter
 | 
| 2624.2.15
by Robert Collins Add useful -Dindex flag. | 38 | """) | 
| 3099.3.3
by John Arbash Meinel Deprecate get_parents() in favor of get_parent_map() | 39 | from bzrlib import ( | 
| 40 | debug, | |
| 41 | errors, | |
| 42 |     )
 | |
| 4679.8.3
by John Arbash Meinel Expose bzrlib.static_tuple.StaticTuple as a thunk | 43 | from bzrlib.static_tuple import StaticTuple | 
| 2592.1.4
by Robert Collins Create a GraphIndexBuilder. | 44 | |
| 2979.1.1
by Robert Collins Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily. | 45 | _HEADER_READV = (0, 200) | 
| 2624.2.8
by Robert Collins Explicitly mark the number of keys elements in use in GraphIndex files. | 46 | _OPTION_KEY_ELEMENTS = "key_elements=" | 
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 47 | _OPTION_LEN = "len=" | 
| 2592.1.6
by Robert Collins Record the number of node reference lists a particular index has. | 48 | _OPTION_NODE_REFS = "node_ref_lists=" | 
| 2592.1.4
by Robert Collins Create a GraphIndexBuilder. | 49 | _SIGNATURE = "Bazaar Graph Index 1\n" | 
| 50 | ||
| 51 | ||
| 2592.1.14
by Robert Collins Detect bad reference key values. | 52 | _whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]') | 
| 2592.1.12
by Robert Collins Handle basic node adds. | 53 | _newline_null_re = re.compile('[\n\0]') | 
| 54 | ||
| 55 | ||
| 3830.3.12
by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks | 56 | def _has_key_from_parent_map(self, key): | 
| 57 | """Check if this index has one key. | |
| 58 | ||
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 59 |     If it's possible to check for multiple keys at once through
 | 
| 3830.3.12
by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks | 60 |     calling get_parent_map that should be faster.
 | 
| 61 |     """
 | |
| 62 | return (key in self.get_parent_map([key])) | |
| 63 | ||
| 3830.3.20
by John Arbash Meinel Minor PEP8 and copyright updates. | 64 | |
| 3830.3.12
by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks | 65 | def _missing_keys_from_parent_map(self, keys): | 
| 66 | return set(keys) - set(self.get_parent_map(keys)) | |
| 67 | ||
| 68 | ||
| 2592.1.4
by Robert Collins Create a GraphIndexBuilder. | 69 | class GraphIndexBuilder(object): | 
| 2592.1.18
by Robert Collins Add space to mark absent nodes. | 70 | """A builder that can build a GraphIndex. | 
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 71 | |
| 2592.1.18
by Robert Collins Add space to mark absent nodes. | 72 |     The resulting graph has the structure:
 | 
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 73 | |
| 2592.1.18
by Robert Collins Add space to mark absent nodes. | 74 |     _SIGNATURE OPTIONS NODES NEWLINE
 | 
| 75 |     _SIGNATURE     := 'Bazaar Graph Index 1' NEWLINE
 | |
| 76 |     OPTIONS        := 'node_ref_lists=' DIGITS NEWLINE
 | |
| 77 |     NODES          := NODE*
 | |
| 78 |     NODE           := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
 | |
| 79 |     KEY            := Not-whitespace-utf8
 | |
| 80 |     ABSENT         := 'a'
 | |
| 2592.1.19
by Robert Collins Node references are tab separated. | 81 |     REFERENCES     := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
 | 
| 82 |     REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
 | |
| 83 |     REFERENCE      := DIGITS  ; digits is the byte offset in the index of the
 | |
| 84 |                               ; referenced key.
 | |
| 2592.1.18
by Robert Collins Add space to mark absent nodes. | 85 |     VALUE          := no-newline-no-null-bytes
 | 
| 86 |     """
 | |
| 2592.1.4
by Robert Collins Create a GraphIndexBuilder. | 87 | |
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 88 | def __init__(self, reference_lists=0, key_elements=1): | 
| 2592.1.6
by Robert Collins Record the number of node reference lists a particular index has. | 89 | """Create a GraphIndex builder. | 
| 90 | ||
| 91 |         :param reference_lists: The number of node references lists for each
 | |
| 92 |             entry.
 | |
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 93 |         :param key_elements: The number of bytestrings in each key.
 | 
| 2592.1.6
by Robert Collins Record the number of node reference lists a particular index has. | 94 |         """
 | 
| 95 | self.reference_lists = reference_lists | |
| 3644.2.1
by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed. | 96 |         # A dict of {key: (absent, ref_lists, value)}
 | 
| 2592.1.15
by Robert Collins Detect duplicate key insertion. | 97 | self._nodes = {} | 
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 98 |         # Keys that are referenced but not actually present in this index
 | 
| 99 | self._absent_keys = set() | |
| 3644.2.1
by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed. | 100 | self._nodes_by_key = None | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 101 | self._key_length = key_elements | 
| 3777.5.3
by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder. | 102 | self._optimize_for_size = False | 
| 4168.3.6
by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize(). | 103 | self._combine_backing_indices = True | 
| 2624.2.5
by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings. | 104 | |
| 105 | def _check_key(self, key): | |
| 106 | """Raise BadIndexKey if key is not a valid key for this index.""" | |
| 4679.7.1
by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the | 107 | if type(key) not in (tuple, StaticTuple): | 
| 2624.2.5
by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings. | 108 | raise errors.BadIndexKey(key) | 
| 109 | if self._key_length != len(key): | |
| 110 | raise errors.BadIndexKey(key) | |
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 111 | for element in key: | 
| 112 | if not element or _whitespace_re.search(element) is not None: | |
| 113 | raise errors.BadIndexKey(element) | |
| 2592.1.12
by Robert Collins Handle basic node adds. | 114 | |
| 3830.3.5
by Martin Pool GraphIndexBuilder shouldn't know references are for compression so rename | 115 | def _external_references(self): | 
| 116 | """Return references that are not present in this index. | |
| 3830.3.4
by Martin Pool Move _external_compression_references onto the GraphIndexBuilder, and check them for inventories too | 117 |         """
 | 
| 118 | keys = set() | |
| 119 | refs = set() | |
| 3830.3.19
by John Arbash Meinel Small update to GraphIndexBuilder._external_references | 120 |         # TODO: JAM 2008-11-21 This makes an assumption about how the reference
 | 
| 121 |         #       lists are used. It is currently correct for pack-0.92 through
 | |
| 122 |         #       1.9, which use the node references (3rd column) second
 | |
| 123 |         #       reference list as the compression parent. Perhaps this should
 | |
| 124 |         #       be moved into something higher up the stack, since it
 | |
| 125 |         #       makes assumptions about how the index is used.
 | |
| 126 | if self.reference_lists > 1: | |
| 127 | for node in self.iter_all_entries(): | |
| 128 | keys.add(node[1]) | |
| 129 | refs.update(node[3][1]) | |
| 130 | return refs - keys | |
| 131 | else: | |
| 132 |             # If reference_lists == 0 there can be no external references, and
 | |
| 133 |             # if reference_lists == 1, then there isn't a place to store the
 | |
| 134 |             # compression parent
 | |
| 135 | return set() | |
| 3830.3.4
by Martin Pool Move _external_compression_references onto the GraphIndexBuilder, and check them for inventories too | 136 | |
| 3644.2.4
by John Arbash Meinel Change GraphIndex to also have a _get_nodes_by_key | 137 | def _get_nodes_by_key(self): | 
| 138 | if self._nodes_by_key is None: | |
| 139 | nodes_by_key = {} | |
| 140 | if self.reference_lists: | |
| 141 | for key, (absent, references, value) in self._nodes.iteritems(): | |
| 142 | if absent: | |
| 143 |                         continue
 | |
| 144 | key_dict = nodes_by_key | |
| 145 | for subkey in key[:-1]: | |
| 146 | key_dict = key_dict.setdefault(subkey, {}) | |
| 147 | key_dict[key[-1]] = key, value, references | |
| 148 | else: | |
| 149 | for key, (absent, references, value) in self._nodes.iteritems(): | |
| 150 | if absent: | |
| 151 |                         continue
 | |
| 152 | key_dict = nodes_by_key | |
| 153 | for subkey in key[:-1]: | |
| 154 | key_dict = key_dict.setdefault(subkey, {}) | |
| 155 | key_dict[key[-1]] = key, value | |
| 156 | self._nodes_by_key = nodes_by_key | |
| 157 | return self._nodes_by_key | |
| 158 | ||
| 3644.2.3
by John Arbash Meinel Do a bit more work to get all the tests to pass. | 159 | def _update_nodes_by_key(self, key, value, node_refs): | 
| 160 | """Update the _nodes_by_key dict with a new key. | |
| 161 | ||
| 162 |         For a key of (foo, bar, baz) create
 | |
| 163 |         _nodes_by_key[foo][bar][baz] = key_value
 | |
| 164 |         """
 | |
| 165 | if self._nodes_by_key is None: | |
| 166 |             return
 | |
| 167 | key_dict = self._nodes_by_key | |
| 168 | if self.reference_lists: | |
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 169 | key_value = StaticTuple(key, value, node_refs) | 
| 3644.2.3
by John Arbash Meinel Do a bit more work to get all the tests to pass. | 170 | else: | 
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 171 | key_value = StaticTuple(key, value) | 
| 3644.2.3
by John Arbash Meinel Do a bit more work to get all the tests to pass. | 172 | for subkey in key[:-1]: | 
| 173 | key_dict = key_dict.setdefault(subkey, {}) | |
| 174 | key_dict[key[-1]] = key_value | |
| 175 | ||
| 3644.2.9
by John Arbash Meinel Refactor some code. | 176 | def _check_key_ref_value(self, key, references, value): | 
| 177 | """Check that 'key' and 'references' are all valid. | |
| 2592.1.12
by Robert Collins Handle basic node adds. | 178 | |
| 3644.2.9
by John Arbash Meinel Refactor some code. | 179 |         :param key: A key tuple. Must conform to the key interface (be a tuple,
 | 
| 180 |             be of the right length, not have any whitespace or nulls in any key
 | |
| 181 |             element.)
 | |
| 182 |         :param references: An iterable of reference lists. Something like
 | |
| 183 |             [[(ref, key)], [(ref, key), (other, key)]]
 | |
| 184 |         :param value: The value associate with this key. Must not contain
 | |
| 185 |             newlines or null characters.
 | |
| 186 |         :return: (node_refs, absent_references)
 | |
| 187 |             node_refs   basically a packed form of 'references' where all
 | |
| 188 |                         iterables are tuples
 | |
| 189 |             absent_references   reference keys that are not in self._nodes.
 | |
| 190 |                                 This may contain duplicates if the same key is
 | |
| 191 |                                 referenced in multiple lists.
 | |
| 2592.1.12
by Robert Collins Handle basic node adds. | 192 |         """
 | 
| 4789.28.1
by John Arbash Meinel Use StaticTuple as part of the builder process. | 193 | as_st = StaticTuple.from_sequence | 
| 2624.2.5
by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings. | 194 | self._check_key(key) | 
| 2592.1.12
by Robert Collins Handle basic node adds. | 195 | if _newline_null_re.search(value) is not None: | 
| 196 | raise errors.BadIndexValue(value) | |
| 2592.1.13
by Robert Collins Handle mismatched numbers of reference lists. | 197 | if len(references) != self.reference_lists: | 
| 198 | raise errors.BadIndexValue(references) | |
| 2592.1.38
by Robert Collins Create an InMemoryGraphIndex for temporary indexing. | 199 | node_refs = [] | 
| 3644.2.9
by John Arbash Meinel Refactor some code. | 200 | absent_references = [] | 
| 2592.1.14
by Robert Collins Detect bad reference key values. | 201 | for reference_list in references: | 
| 202 | for reference in reference_list: | |
| 3644.2.9
by John Arbash Meinel Refactor some code. | 203 |                 # If reference *is* in self._nodes, then we know it has already
 | 
| 204 |                 # been checked.
 | |
| 2592.1.25
by Robert Collins Fix and tune node offset calculation. | 205 | if reference not in self._nodes: | 
| 3644.2.9
by John Arbash Meinel Refactor some code. | 206 | self._check_key(reference) | 
| 207 | absent_references.append(reference) | |
| 4848.1.1
by John Arbash Meinel Track down one more location that needs casting to static tuple for the new builder code | 208 | reference_list = as_st([as_st(ref).intern() | 
| 209 | for ref in reference_list]) | |
| 210 | node_refs.append(reference_list) | |
| 4789.28.1
by John Arbash Meinel Use StaticTuple as part of the builder process. | 211 | return as_st(node_refs), absent_references | 
| 3644.2.9
by John Arbash Meinel Refactor some code. | 212 | |
| 213 | def add_node(self, key, value, references=()): | |
| 214 | """Add a node to the index. | |
| 215 | ||
| 216 |         :param key: The key. keys are non-empty tuples containing
 | |
| 217 |             as many whitespace-free utf8 bytestrings as the key length
 | |
| 218 |             defined for this index.
 | |
| 219 |         :param references: An iterable of iterables of keys. Each is a
 | |
| 220 |             reference to another key.
 | |
| 221 |         :param value: The value to associate with the key. It may be any
 | |
| 222 |             bytes as long as it does not contain \0 or \n.
 | |
| 223 |         """
 | |
| 224 | (node_refs, | |
| 225 | absent_references) = self._check_key_ref_value(key, references, value) | |
| 226 | if key in self._nodes and self._nodes[key][0] != 'a': | |
| 2592.1.15
by Robert Collins Detect duplicate key insertion. | 227 | raise errors.BadIndexDuplicateKey(key, self) | 
| 3644.2.9
by John Arbash Meinel Refactor some code. | 228 | for reference in absent_references: | 
| 229 |             # There may be duplicates, but I don't think it is worth worrying
 | |
| 230 |             # about
 | |
| 231 | self._nodes[reference] = ('a', (), '') | |
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 232 | self._absent_keys.update(absent_references) | 
| 233 | self._absent_keys.discard(key) | |
| 3644.2.3
by John Arbash Meinel Do a bit more work to get all the tests to pass. | 234 | self._nodes[key] = ('', node_refs, value) | 
| 3644.2.9
by John Arbash Meinel Refactor some code. | 235 | if self._nodes_by_key is not None and self._key_length > 1: | 
| 3644.2.3
by John Arbash Meinel Do a bit more work to get all the tests to pass. | 236 | self._update_nodes_by_key(key, value, node_refs) | 
| 2592.1.6
by Robert Collins Record the number of node reference lists a particular index has. | 237 | |
| 4744.2.7
by John Arbash Meinel Add .clear_cache() members to GraphIndexBuilder and BTreeBuilder. | 238 | def clear_cache(self): | 
| 239 | """See GraphIndex.clear_cache() | |
| 240 | ||
| 241 |         This is a no-op, but we need the api to conform to a generic 'Index'
 | |
| 242 |         abstraction.
 | |
| 243 |         """
 | |
| 244 | ||
| 2592.1.4
by Robert Collins Create a GraphIndexBuilder. | 245 | def finish(self): | 
| 2592.1.6
by Robert Collins Record the number of node reference lists a particular index has. | 246 | lines = [_SIGNATURE] | 
| 247 | lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n') | |
| 2624.2.8
by Robert Collins Explicitly mark the number of keys elements in use in GraphIndex files. | 248 | lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n') | 
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 249 | key_count = len(self._nodes) - len(self._absent_keys) | 
| 250 | lines.append(_OPTION_LEN + str(key_count) + '\n') | |
| 2624.2.11
by Robert Collins Review comments. | 251 | prefix_length = sum(len(x) for x in lines) | 
| 2592.1.22
by Robert Collins Node references are byte offsets. | 252 |         # references are byte offsets. To avoid having to do nasty
 | 
| 3644.2.9
by John Arbash Meinel Refactor some code. | 253 |         # polynomial work to resolve offsets (references to later in the
 | 
| 2592.1.22
by Robert Collins Node references are byte offsets. | 254 |         # file cannot be determined until all the inbetween references have
 | 
| 255 |         # been calculated too) we pad the offsets with 0's to make them be
 | |
| 256 |         # of consistent length. Using binary offsets would break the trivial
 | |
| 257 |         # file parsing.
 | |
| 258 |         # to calculate the width of zero's needed we do three passes:
 | |
| 259 |         # one to gather all the non-reference data and the number of references.
 | |
| 260 |         # one to pad all the data with reference-length and determine entry
 | |
| 261 |         # addresses.
 | |
| 262 |         # One to serialise.
 | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 263 | |
| 2592.1.40
by Robert Collins Reverse index ordering - we do not have date prefixed revids. | 264 |         # forward sorted by key. In future we may consider topological sorting,
 | 
| 265 |         # at the cost of table scans for direct lookup, or a second index for
 | |
| 266 |         # direct lookup
 | |
| 267 | nodes = sorted(self._nodes.items()) | |
| 2592.1.42
by Robert Collins Check the index length is as expected, when we have done preprocessing. | 268 |         # if we do not prepass, we don't know how long it will be up front.
 | 
| 269 | expected_bytes = None | |
| 2592.1.25
by Robert Collins Fix and tune node offset calculation. | 270 |         # we only need to pre-pass if we have reference lists at all.
 | 
| 271 | if self.reference_lists: | |
| 2592.1.41
by Robert Collins Remove duplication in the index serialisation logic with John's suggestion. | 272 | key_offset_info = [] | 
| 2592.1.25
by Robert Collins Fix and tune node offset calculation. | 273 | non_ref_bytes = prefix_length | 
| 274 | total_references = 0 | |
| 275 |             # TODO use simple multiplication for the constants in this loop.
 | |
| 276 | for key, (absent, references, value) in nodes: | |
| 2592.1.41
by Robert Collins Remove duplication in the index serialisation logic with John's suggestion. | 277 |                 # record the offset known *so far* for this key:
 | 
| 278 |                 # the non reference bytes to date, and the total references to
 | |
| 279 |                 # date - saves reaccumulating on the second pass
 | |
| 280 | key_offset_info.append((key, non_ref_bytes, total_references)) | |
| 2592.1.25
by Robert Collins Fix and tune node offset calculation. | 281 |                 # key is literal, value is literal, there are 3 null's, 1 NL
 | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 282 |                 # key is variable length tuple, \x00 between elements
 | 
| 2624.2.5
by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings. | 283 | non_ref_bytes += sum(len(element) for element in key) | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 284 | if self._key_length > 1: | 
| 285 | non_ref_bytes += self._key_length - 1 | |
| 2624.2.5
by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings. | 286 |                 # value is literal bytes, there are 3 null's, 1 NL.
 | 
| 287 | non_ref_bytes += len(value) + 3 + 1 | |
| 2592.1.25
by Robert Collins Fix and tune node offset calculation. | 288 |                 # one byte for absent if set.
 | 
| 289 | if absent: | |
| 290 | non_ref_bytes += 1 | |
| 2592.1.36
by Robert Collins Bugfix incorrect offset generation when an absent record is before a referenced record. | 291 | elif self.reference_lists: | 
| 2592.1.25
by Robert Collins Fix and tune node offset calculation. | 292 |                     # (ref_lists -1) tabs
 | 
| 293 | non_ref_bytes += self.reference_lists - 1 | |
| 294 |                     # (ref-1 cr's per ref_list)
 | |
| 295 | for ref_list in references: | |
| 296 |                         # how many references across the whole file?
 | |
| 297 | total_references += len(ref_list) | |
| 298 |                         # accrue reference separators
 | |
| 299 | if ref_list: | |
| 300 | non_ref_bytes += len(ref_list) - 1 | |
| 301 |             # how many digits are needed to represent the total byte count?
 | |
| 302 | digits = 1 | |
| 2592.1.22
by Robert Collins Node references are byte offsets. | 303 | possible_total_bytes = non_ref_bytes + total_references*digits | 
| 2592.1.25
by Robert Collins Fix and tune node offset calculation. | 304 | while 10 ** digits < possible_total_bytes: | 
| 305 | digits += 1 | |
| 306 | possible_total_bytes = non_ref_bytes + total_references*digits | |
| 2592.1.42
by Robert Collins Check the index length is as expected, when we have done preprocessing. | 307 | expected_bytes = possible_total_bytes + 1 # terminating newline | 
| 2592.1.25
by Robert Collins Fix and tune node offset calculation. | 308 |             # resolve key addresses.
 | 
| 309 | key_addresses = {} | |
| 2592.1.41
by Robert Collins Remove duplication in the index serialisation logic with John's suggestion. | 310 | for key, non_ref_bytes, total_references in key_offset_info: | 
| 311 | key_addresses[key] = non_ref_bytes + total_references*digits | |
| 2592.1.25
by Robert Collins Fix and tune node offset calculation. | 312 |             # serialise
 | 
| 313 | format_string = '%%0%sd' % digits | |
| 314 | for key, (absent, references, value) in nodes: | |
| 2592.1.19
by Robert Collins Node references are tab separated. | 315 | flattened_references = [] | 
| 316 | for ref_list in references: | |
| 2592.1.22
by Robert Collins Node references are byte offsets. | 317 | ref_addresses = [] | 
| 318 | for reference in ref_list: | |
| 319 | ref_addresses.append(format_string % key_addresses[reference]) | |
| 320 | flattened_references.append('\r'.join(ref_addresses)) | |
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 321 | string_key = '\x00'.join(key) | 
| 2624.2.11
by Robert Collins Review comments. | 322 | lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent, | 
| 2592.1.19
by Robert Collins Node references are tab separated. | 323 | '\t'.join(flattened_references), value)) | 
| 2592.1.6
by Robert Collins Record the number of node reference lists a particular index has. | 324 | lines.append('\n') | 
| 2592.1.42
by Robert Collins Check the index length is as expected, when we have done preprocessing. | 325 | result = StringIO(''.join(lines)) | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 326 | if expected_bytes and len(result.getvalue()) != expected_bytes: | 
| 327 | raise errors.BzrError('Failed index creation. Internal error:' | |
| 328 | ' mismatched output length and expected length: %d %d' % | |
| 329 | (len(result.getvalue()), expected_bytes)) | |
| 3498.1.1
by James Westby Don't join the lines of the index twice. | 330 | return result | 
| 2592.1.5
by Robert Collins Trivial index reading. | 331 | |
| 4168.3.6
by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize(). | 332 | def set_optimize(self, for_size=None, combine_backing_indices=None): | 
| 3777.5.3
by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder. | 333 | """Change how the builder tries to optimize the result. | 
| 334 | ||
| 335 |         :param for_size: Tell the builder to try and make the index as small as
 | |
| 336 |             possible.
 | |
| 4168.3.6
by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize(). | 337 |         :param combine_backing_indices: If the builder spills to disk to save
 | 
| 338 |             memory, should the on-disk indices be combined. Set to True if you
 | |
| 339 |             are going to be probing the index, but to False if you are not. (If
 | |
| 340 |             you are not querying, then the time spent combining is wasted.)
 | |
| 3777.5.3
by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder. | 341 |         :return: None
 | 
| 342 |         """
 | |
| 343 |         # GraphIndexBuilder itself doesn't pay attention to the flag yet, but
 | |
| 344 |         # other builders do.
 | |
| 4168.3.6
by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize(). | 345 | if for_size is not None: | 
| 346 | self._optimize_for_size = for_size | |
| 347 | if combine_backing_indices is not None: | |
| 348 | self._combine_backing_indices = combine_backing_indices | |
| 3777.5.3
by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder. | 349 | |
| 4593.5.37
by John Arbash Meinel Finish implementation tests. | 350 | def find_ancestry(self, keys, ref_list_num): | 
| 351 | """See CombinedGraphIndex.find_ancestry()""" | |
| 352 | pending = set(keys) | |
| 353 | parent_map = {} | |
| 354 | missing_keys = set() | |
| 355 | while pending: | |
| 356 | next_pending = set() | |
| 357 | for _, key, value, ref_lists in self.iter_entries(pending): | |
| 358 | parent_keys = ref_lists[ref_list_num] | |
| 359 | parent_map[key] = parent_keys | |
| 360 | next_pending.update([p for p in parent_keys if p not in | |
| 361 | parent_map]) | |
| 362 | missing_keys.update(pending.difference(parent_map)) | |
| 363 | pending = next_pending | |
| 364 | return parent_map, missing_keys | |
| 365 | ||
| 2592.1.5
by Robert Collins Trivial index reading. | 366 | |
| 367 | class GraphIndex(object): | |
| 368 | """An index for data with embedded graphs. | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 369 | |
| 2592.1.10
by Robert Collins Make validate detect node reference parsing errors. | 370 |     The index maps keys to a list of key reference lists, and a value.
 | 
| 371 |     Each node has the same number of key reference lists. Each key reference
 | |
| 372 |     list can be empty or an arbitrary length. The value is an opaque NULL
 | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 373 |     terminated string without any newlines. The storage of the index is
 | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 374 |     hidden in the interface: keys and key references are always tuples of
 | 
| 375 |     bytestrings, never the internal representation (e.g. dictionary offsets).
 | |
| 2592.1.30
by Robert Collins Absent entries are not yeilded. | 376 | |
| 377 |     It is presumed that the index will not be mutated - it is static data.
 | |
| 2592.1.34
by Robert Collins Cleanup docs. | 378 | |
| 2592.1.44
by Robert Collins Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. | 379 |     Successive iter_all_entries calls will read the entire index each time.
 | 
| 380 |     Additionally, iter_entries calls will read the index linearly until the
 | |
| 381 |     desired keys are found. XXX: This must be fixed before the index is
 | |
| 2592.1.34
by Robert Collins Cleanup docs. | 382 |     suitable for production use. :XXX
 | 
| 2592.1.5
by Robert Collins Trivial index reading. | 383 |     """
 | 
| 384 | ||
| 5074.4.2
by John Arbash Meinel Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now. | 385 | def __init__(self, transport, name, size, unlimited_cache=False, offset=0): | 
| 2592.1.5
by Robert Collins Trivial index reading. | 386 | """Open an index called name on transport. | 
| 387 | ||
| 388 |         :param transport: A bzrlib.transport.Transport.
 | |
| 389 |         :param name: A path to provide to transport API calls.
 | |
| 2890.2.1
by Robert Collins * ``bzrlib.index.GraphIndex`` now requires a size parameter to the | 390 |         :param size: The size of the index in bytes. This is used for bisection
 | 
| 391 |             logic to perform partial index reads. While the size could be
 | |
| 392 |             obtained by statting the file this introduced an additional round
 | |
| 2890.2.8
by Robert Collins Make the size of the index optionally None for the pack-names index. | 393 |             trip as well as requiring stat'able transports, both of which are
 | 
| 394 |             avoided by having it supplied. If size is None, then bisection
 | |
| 395 |             support will be disabled and accessing the index will just stream
 | |
| 396 |             all the data.
 | |
| 5074.4.2
by John Arbash Meinel Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now. | 397 |         :param offset: Instead of starting the index data at offset 0, start it
 | 
| 398 |             at an arbitrary offset.
 | |
| 2592.1.5
by Robert Collins Trivial index reading. | 399 |         """
 | 
| 400 | self._transport = transport | |
| 401 | self._name = name | |
| 2890.2.16
by Robert Collins Review feedback. | 402 |         # Becomes a dict of key:(value, reference-list-byte-locations) used by
 | 
| 403 |         # the bisection interface to store parsed but not resolved keys.
 | |
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 404 | self._bisect_nodes = None | 
| 2890.2.16
by Robert Collins Review feedback. | 405 |         # Becomes a dict of key:(value, reference-list-keys) which are ready to
 | 
| 406 |         # be returned directly to callers.
 | |
| 2624.2.2
by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram. | 407 | self._nodes = None | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 408 |         # a sorted list of slice-addresses for the parsed bytes of the file.
 | 
| 409 |         # e.g. (0,1) would mean that byte 0 is parsed.
 | |
| 2890.2.2
by Robert Collins Opening an index creates a map for the parsed bytes. | 410 | self._parsed_byte_map = [] | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 411 |         # a sorted list of keys matching each slice address for parsed bytes
 | 
| 412 |         # e.g. (None, 'foo@bar') would mean that the first byte contained no
 | |
| 413 |         # key, and the end byte of the slice is the of the data for 'foo@bar'
 | |
| 414 | self._parsed_key_map = [] | |
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 415 | self._key_count = None | 
| 2624.2.2
by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram. | 416 | self._keys_by_offset = None | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 417 | self._nodes_by_key = None | 
| 2890.2.1
by Robert Collins * ``bzrlib.index.GraphIndex`` now requires a size parameter to the | 418 | self._size = size | 
| 3665.3.3
by John Arbash Meinel If we read more than 50% of the whole index, | 419 |         # The number of bytes we've read so far in trying to process this file
 | 
| 420 | self._bytes_read = 0 | |
| 5074.4.2
by John Arbash Meinel Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now. | 421 | self._base_offset = offset | 
| 2624.2.2
by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram. | 422 | |
| 2592.3.176
by Robert Collins Various pack refactorings. | 423 | def __eq__(self, other): | 
| 2592.3.215
by Robert Collins Review feedback. | 424 | """Equal when self and other were created with the same parameters.""" | 
| 2592.3.176
by Robert Collins Various pack refactorings. | 425 | return ( | 
| 426 | type(self) == type(other) and | |
| 427 | self._transport == other._transport and | |
| 428 | self._name == other._name and | |
| 429 | self._size == other._size) | |
| 430 | ||
| 431 | def __ne__(self, other): | |
| 432 | return not self.__eq__(other) | |
| 433 | ||
| 3517.4.13
by Martin Pool Add repr methods | 434 | def __repr__(self): | 
| 435 | return "%s(%r)" % (self.__class__.__name__, | |
| 436 | self._transport.abspath(self._name)) | |
| 437 | ||
| 3665.3.1
by John Arbash Meinel Updates to GraphIndex processing. | 438 | def _buffer_all(self, stream=None): | 
| 2624.2.2
by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram. | 439 | """Buffer all the index data. | 
| 440 | ||
| 441 |         Mutates self._nodes and self.keys_by_offset.
 | |
| 2592.1.5
by Robert Collins Trivial index reading. | 442 |         """
 | 
| 3665.3.1
by John Arbash Meinel Updates to GraphIndex processing. | 443 | if self._nodes is not None: | 
| 444 |             # We already did this
 | |
| 445 |             return
 | |
| 2624.2.15
by Robert Collins Add useful -Dindex flag. | 446 | if 'index' in debug.debug_flags: | 
| 447 | mutter('Reading entire index %s', self._transport.abspath(self._name)) | |
| 3665.3.1
by John Arbash Meinel Updates to GraphIndex processing. | 448 | if stream is None: | 
| 449 | stream = self._transport.get(self._name) | |
| 5074.4.3
by John Arbash Meinel Actually implement offset support for GraphIndex. | 450 | if self._base_offset != 0: | 
| 451 |                 # This is wasteful, but it is better than dealing with
 | |
| 452 |                 # adjusting all the offsets, etc.
 | |
| 453 | stream = StringIO(stream.read()[self._base_offset:]) | |
| 2592.1.27
by Robert Collins Test missing end lines with non-empty indices. | 454 | self._read_prefix(stream) | 
| 2890.2.17
by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. | 455 | self._expected_elements = 3 + self._key_length | 
| 2592.1.27
by Robert Collins Test missing end lines with non-empty indices. | 456 | line_count = 0 | 
| 2624.2.2
by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram. | 457 |         # raw data keyed by offset
 | 
| 458 | self._keys_by_offset = {} | |
| 459 |         # ready-to-return key:value or key:value, node_ref_lists
 | |
| 460 | self._nodes = {} | |
| 3711.3.13
by John Arbash Meinel Shave off another 5s by not building 'node_by_key' | 461 | self._nodes_by_key = None | 
| 2592.1.27
by Robert Collins Test missing end lines with non-empty indices. | 462 | trailers = 0 | 
| 463 | pos = stream.tell() | |
| 2890.2.17
by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. | 464 | lines = stream.read().split('\n') | 
| 4852.1.5
by John Arbash Meinel Explicitly call stream.close() in the index code. | 465 | stream.close() | 
| 2890.2.17
by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. | 466 | del lines[-1] | 
| 467 | _, _, _, trailers = self._parse_lines(lines, pos) | |
| 2624.2.2
by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram. | 468 | for key, absent, references, value in self._keys_by_offset.itervalues(): | 
| 2592.1.30
by Robert Collins Absent entries are not yeilded. | 469 | if absent: | 
| 470 |                 continue
 | |
| 2592.1.28
by Robert Collins Basic two pass iter_all_entries. | 471 |             # resolve references:
 | 
| 472 | if self.node_ref_lists: | |
| 2890.2.17
by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. | 473 | node_value = (value, self._resolve_references(references)) | 
| 2592.1.28
by Robert Collins Basic two pass iter_all_entries. | 474 | else: | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 475 | node_value = value | 
| 476 | self._nodes[key] = node_value | |
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 477 |         # cache the keys for quick set intersections
 | 
| 2592.1.27
by Robert Collins Test missing end lines with non-empty indices. | 478 | if trailers != 1: | 
| 479 |             # there must be one line - the empty trailer line.
 | |
| 480 | raise errors.BadIndexData(self) | |
| 481 | ||
| 4744.2.6
by John Arbash Meinel Start exposing an GraphIndex.clear_cache() member. | 482 | def clear_cache(self): | 
| 483 | """Clear out any cached/memoized values. | |
| 484 | ||
| 485 |         This can be called at any time, but generally it is used when we have
 | |
| 486 |         extracted some information, but don't expect to be requesting any more
 | |
| 487 |         from this index.
 | |
| 488 |         """
 | |
| 489 | ||
| 4011.5.11
by Robert Collins Polish the KnitVersionedFiles.scan_unvalidated_index api. | 490 | def external_references(self, ref_list_num): | 
| 4011.5.2
by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references() | 491 | """Return references that are not present in this index. | 
| 492 |         """
 | |
| 493 | self._buffer_all() | |
| 4011.5.3
by Andrew Bennetts Implement and test external_references on GraphIndex and BTreeGraphIndex. | 494 | if ref_list_num + 1 > self.node_ref_lists: | 
| 495 | raise ValueError('No ref list %d, index has %d ref lists' | |
| 496 | % (ref_list_num, self.node_ref_lists)) | |
| 4011.5.2
by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references() | 497 | refs = set() | 
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 498 | nodes = self._nodes | 
| 499 | for key, (value, ref_lists) in nodes.iteritems(): | |
| 4011.5.2
by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references() | 500 | ref_list = ref_lists[ref_list_num] | 
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 501 | refs.update([ref for ref in ref_list if ref not in nodes]) | 
| 502 | return refs | |
| 4011.5.2
by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references() | 503 | |
| 3711.3.21
by John Arbash Meinel Fix GraphIndex to properly generate _nodes_by_keys on demand. | 504 | def _get_nodes_by_key(self): | 
| 505 | if self._nodes_by_key is None: | |
| 506 | nodes_by_key = {} | |
| 507 | if self.node_ref_lists: | |
| 508 | for key, (value, references) in self._nodes.iteritems(): | |
| 509 | key_dict = nodes_by_key | |
| 510 | for subkey in key[:-1]: | |
| 511 | key_dict = key_dict.setdefault(subkey, {}) | |
| 512 | key_dict[key[-1]] = key, value, references | |
| 513 | else: | |
| 514 | for key, value in self._nodes.iteritems(): | |
| 515 | key_dict = nodes_by_key | |
| 516 | for subkey in key[:-1]: | |
| 517 | key_dict = key_dict.setdefault(subkey, {}) | |
| 518 | key_dict[key[-1]] = key, value | |
| 519 | self._nodes_by_key = nodes_by_key | |
| 520 | return self._nodes_by_key | |
| 521 | ||
| 2624.2.2
by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram. | 522 | def iter_all_entries(self): | 
| 523 | """Iterate over all keys within the index. | |
| 524 | ||
| 2592.5.1
by Martin Pool Fix docstrings for Index.iter_entries etc | 525 |         :return: An iterable of (index, key, value) or (index, key, value, reference_lists).
 | 
| 2624.2.2
by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram. | 526 |             The former tuple is used when there are no reference lists in the
 | 
| 527 |             index, making the API compatible with simple key:value index types.
 | |
| 528 |             There is no defined order for the result iteration - it will be in
 | |
| 529 |             the most efficient order for the index.
 | |
| 530 |         """
 | |
| 2745.1.1
by Robert Collins Add a number of -Devil checkpoints. | 531 | if 'evil' in debug.debug_flags: | 
| 2592.3.112
by Robert Collins Various fixups found dogfooding. | 532 | trace.mutter_callsite(3, | 
| 2745.1.2
by Robert Collins Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. | 533 | "iter_all_entries scales with size of history.") | 
| 2624.2.2
by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram. | 534 | if self._nodes is None: | 
| 535 | self._buffer_all() | |
| 536 | if self.node_ref_lists: | |
| 537 | for key, (value, node_ref_lists) in self._nodes.iteritems(): | |
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 538 | yield self, key, value, node_ref_lists | 
| 2624.2.2
by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram. | 539 | else: | 
| 540 | for key, value in self._nodes.iteritems(): | |
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 541 | yield self, key, value | 
| 2624.2.2
by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram. | 542 | |
| 2592.1.27
by Robert Collins Test missing end lines with non-empty indices. | 543 | def _read_prefix(self, stream): | 
| 544 | signature = stream.read(len(self._signature())) | |
| 545 | if not signature == self._signature(): | |
| 546 | raise errors.BadIndexFormatSignature(self._name, GraphIndex) | |
| 547 | options_line = stream.readline() | |
| 548 | if not options_line.startswith(_OPTION_NODE_REFS): | |
| 549 | raise errors.BadIndexOptions(self) | |
| 550 | try: | |
| 551 | self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1]) | |
| 552 | except ValueError: | |
| 553 | raise errors.BadIndexOptions(self) | |
| 2624.2.8
by Robert Collins Explicitly mark the number of keys elements in use in GraphIndex files. | 554 | options_line = stream.readline() | 
| 555 | if not options_line.startswith(_OPTION_KEY_ELEMENTS): | |
| 556 | raise errors.BadIndexOptions(self) | |
| 557 | try: | |
| 558 | self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1]) | |
| 559 | except ValueError: | |
| 560 | raise errors.BadIndexOptions(self) | |
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 561 | options_line = stream.readline() | 
| 562 | if not options_line.startswith(_OPTION_LEN): | |
| 563 | raise errors.BadIndexOptions(self) | |
| 564 | try: | |
| 565 | self._key_count = int(options_line[len(_OPTION_LEN):-1]) | |
| 566 | except ValueError: | |
| 567 | raise errors.BadIndexOptions(self) | |
| 2592.1.5
by Robert Collins Trivial index reading. | 568 | |
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 569 | def _resolve_references(self, references): | 
| 2890.2.16
by Robert Collins Review feedback. | 570 | """Return the resolved key references for references. | 
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 571 | |
| 2890.2.16
by Robert Collins Review feedback. | 572 |         References are resolved by looking up the location of the key in the
 | 
| 573 |         _keys_by_offset map and substituting the key name, preserving ordering.
 | |
| 574 | ||
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 575 |         :param references: An iterable of iterables of key locations. e.g.
 | 
| 2890.2.16
by Robert Collins Review feedback. | 576 |             [[123, 456], [123]]
 | 
| 577 |         :return: A tuple of tuples of keys.
 | |
| 578 |         """
 | |
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 579 | node_refs = [] | 
| 580 | for ref_list in references: | |
| 581 | node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list])) | |
| 582 | return tuple(node_refs) | |
| 583 | ||
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 584 | def _find_index(self, range_map, key): | 
| 585 | """Helper for the _parsed_*_index calls. | |
| 586 | ||
| 587 |         Given a range map - [(start, end), ...], finds the index of the range
 | |
| 588 |         in the map for key if it is in the map, and if it is not there, the
 | |
| 589 |         immediately preceeding range in the map.
 | |
| 590 |         """
 | |
| 591 | result = bisect_right(range_map, key) - 1 | |
| 592 | if result + 1 < len(range_map): | |
| 593 |             # check the border condition, it may be in result + 1
 | |
| 594 | if range_map[result + 1][0] == key[0]: | |
| 595 | return result + 1 | |
| 596 | return result | |
| 597 | ||
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 598 | def _parsed_byte_index(self, offset): | 
| 599 | """Return the index of the entry immediately before offset. | |
| 600 | ||
| 601 |         e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that
 | |
| 602 |         there is one unparsed byte (the 11th, addressed as[10]). then:
 | |
| 603 |         asking for 0 will return 0
 | |
| 604 |         asking for 10 will return 0
 | |
| 605 |         asking for 11 will return 1
 | |
| 606 |         asking for 12 will return 1
 | |
| 607 |         """
 | |
| 608 | key = (offset, 0) | |
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 609 | return self._find_index(self._parsed_byte_map, key) | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 610 | |
| 611 | def _parsed_key_index(self, key): | |
| 612 | """Return the index of the entry immediately before key. | |
| 613 | ||
| 614 |         e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed,
 | |
| 615 |         meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive
 | |
| 616 |         have been parsed, then:
 | |
| 617 |         asking for '' will return 0
 | |
| 618 |         asking for 'a' will return 0
 | |
| 619 |         asking for 'b' will return 1
 | |
| 620 |         asking for 'e' will return 1
 | |
| 621 |         """
 | |
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 622 | search_key = (key, None) | 
| 623 | return self._find_index(self._parsed_key_map, search_key) | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 624 | |
| 625 | def _is_parsed(self, offset): | |
| 626 | """Returns True if offset has been parsed.""" | |
| 627 | index = self._parsed_byte_index(offset) | |
| 628 | if index == len(self._parsed_byte_map): | |
| 629 | return offset < self._parsed_byte_map[index - 1][1] | |
| 630 | start, end = self._parsed_byte_map[index] | |
| 631 | return offset >= start and offset < end | |
| 632 | ||
| 2890.2.7
by Robert Collins * Pack indices are now partially parsed for specific key lookup using a | 633 | def _iter_entries_from_total_buffer(self, keys): | 
| 634 | """Iterate over keys when the entire index is parsed.""" | |
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 635 |         # Note: See the note in BTreeBuilder.iter_entries for why we don't use
 | 
| 636 |         #       .intersection() here
 | |
| 637 | nodes = self._nodes | |
| 638 | keys = [key for key in keys if key in nodes] | |
| 2624.2.3
by Robert Collins Make GraphIndex.iter_entries do hash lookups rather than table scans. | 639 | if self.node_ref_lists: | 
| 640 | for key in keys: | |
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 641 | value, node_refs = nodes[key] | 
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 642 | yield self, key, value, node_refs | 
| 2624.2.3
by Robert Collins Make GraphIndex.iter_entries do hash lookups rather than table scans. | 643 | else: | 
| 644 | for key in keys: | |
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 645 | yield self, key, nodes[key] | 
| 2592.1.7
by Robert Collins A validate that goes boom. | 646 | |
| 2890.2.7
by Robert Collins * Pack indices are now partially parsed for specific key lookup using a | 647 | def iter_entries(self, keys): | 
| 648 | """Iterate over keys within the index. | |
| 649 | ||
| 650 |         :param keys: An iterable providing the keys to be retrieved.
 | |
| 651 |         :return: An iterable as per iter_all_entries, but restricted to the
 | |
| 652 |             keys supplied. No additional keys will be returned, and every
 | |
| 653 |             key supplied that is in the index will be returned.
 | |
| 654 |         """
 | |
| 655 | keys = set(keys) | |
| 656 | if not keys: | |
| 657 | return [] | |
| 2890.2.8
by Robert Collins Make the size of the index optionally None for the pack-names index. | 658 | if self._size is None and self._nodes is None: | 
| 659 | self._buffer_all() | |
| 3665.3.3
by John Arbash Meinel If we read more than 50% of the whole index, | 660 | |
| 3606.6.1
by Robert Collins Cherry-pick Robert's index buffering. | 661 |         # We fit about 20 keys per minimum-read (4K), so if we are looking for
 | 
| 662 |         # more than 1/20th of the index its likely (assuming homogenous key
 | |
| 663 |         # spread) that we'll read the entire index. If we're going to do that,
 | |
| 664 |         # buffer the whole thing. A better analysis might take key spread into
 | |
| 665 |         # account - but B+Tree indices are better anyway.
 | |
| 666 |         # We could look at all data read, and use a threshold there, which will
 | |
| 667 |         # trigger on ancestry walks, but that is not yet fully mapped out.
 | |
| 668 | if self._nodes is None and len(keys) * 20 > self.key_count(): | |
| 669 | self._buffer_all() | |
| 2890.2.7
by Robert Collins * Pack indices are now partially parsed for specific key lookup using a | 670 | if self._nodes is not None: | 
| 671 | return self._iter_entries_from_total_buffer(keys) | |
| 672 | else: | |
| 673 | return (result[1] for result in bisect_multi_bytes( | |
| 2890.2.18
by Robert Collins Review feedback. | 674 | self._lookup_keys_via_location, self._size, keys)) | 
| 2890.2.7
by Robert Collins * Pack indices are now partially parsed for specific key lookup using a | 675 | |
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 676 | def iter_entries_prefix(self, keys): | 
| 677 | """Iterate over keys within the index using prefix matching. | |
| 678 | ||
| 679 |         Prefix matching is applied within the tuple of a key, not to within
 | |
| 680 |         the bytestring of each key element. e.g. if you have the keys ('foo',
 | |
| 681 |         'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
 | |
| 682 |         only the former key is returned.
 | |
| 683 | ||
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 684 |         WARNING: Note that this method currently causes a full index parse
 | 
| 685 |         unconditionally (which is reasonably appropriate as it is a means for
 | |
| 686 |         thunking many small indices into one larger one and still supplies
 | |
| 687 |         iter_all_entries at the thunk layer).
 | |
| 688 | ||
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 689 |         :param keys: An iterable providing the key prefixes to be retrieved.
 | 
| 690 |             Each key prefix takes the form of a tuple the length of a key, but
 | |
| 691 |             with the last N elements 'None' rather than a regular bytestring.
 | |
| 692 |             The first element cannot be 'None'.
 | |
| 693 |         :return: An iterable as per iter_all_entries, but restricted to the
 | |
| 694 |             keys with a matching prefix to those supplied. No additional keys
 | |
| 695 |             will be returned, and every match that is in the index will be
 | |
| 696 |             returned.
 | |
| 697 |         """
 | |
| 698 | keys = set(keys) | |
| 699 | if not keys: | |
| 700 |             return
 | |
| 701 |         # load data - also finds key lengths
 | |
| 702 | if self._nodes is None: | |
| 703 | self._buffer_all() | |
| 704 | if self._key_length == 1: | |
| 705 | for key in keys: | |
| 706 |                 # sanity check
 | |
| 707 | if key[0] is None: | |
| 708 | raise errors.BadIndexKey(key) | |
| 709 | if len(key) != self._key_length: | |
| 710 | raise errors.BadIndexKey(key) | |
| 711 | if self.node_ref_lists: | |
| 712 | value, node_refs = self._nodes[key] | |
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 713 | yield self, key, value, node_refs | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 714 | else: | 
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 715 | yield self, key, self._nodes[key] | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 716 |             return
 | 
| 3711.3.21
by John Arbash Meinel Fix GraphIndex to properly generate _nodes_by_keys on demand. | 717 | nodes_by_key = self._get_nodes_by_key() | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 718 | for key in keys: | 
| 719 |             # sanity check
 | |
| 720 | if key[0] is None: | |
| 721 | raise errors.BadIndexKey(key) | |
| 722 | if len(key) != self._key_length: | |
| 723 | raise errors.BadIndexKey(key) | |
| 724 |             # find what it refers to:
 | |
| 3711.3.21
by John Arbash Meinel Fix GraphIndex to properly generate _nodes_by_keys on demand. | 725 | key_dict = nodes_by_key | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 726 | elements = list(key) | 
| 2624.2.11
by Robert Collins Review comments. | 727 |             # find the subdict whose contents should be returned.
 | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 728 | try: | 
| 729 | while len(elements) and elements[0] is not None: | |
| 730 | key_dict = key_dict[elements[0]] | |
| 731 | elements.pop(0) | |
| 732 | except KeyError: | |
| 733 |                 # a non-existant lookup.
 | |
| 734 |                 continue
 | |
| 735 | if len(elements): | |
| 736 | dicts = [key_dict] | |
| 737 | while dicts: | |
| 738 | key_dict = dicts.pop(-1) | |
| 739 |                     # can't be empty or would not exist
 | |
| 740 | item, value = key_dict.iteritems().next() | |
| 741 | if type(value) == dict: | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 742 |                         # push keys
 | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 743 | dicts.extend(key_dict.itervalues()) | 
| 744 | else: | |
| 745 |                         # yield keys
 | |
| 746 | for value in key_dict.itervalues(): | |
| 2624.2.11
by Robert Collins Review comments. | 747 |                             # each value is the key:value:node refs tuple
 | 
| 748 |                             # ready to yield.
 | |
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 749 | yield (self, ) + value | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 750 | else: | 
| 2624.2.11
by Robert Collins Review comments. | 751 |                 # the last thing looked up was a terminal element
 | 
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 752 | yield (self, ) + key_dict | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 753 | |
| 4593.4.12
by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry() | 754 | def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys): | 
| 755 | """See BTreeIndex._find_ancestors.""" | |
| 4593.4.7
by John Arbash Meinel Basic implementation of a conforming interface for GraphIndex. | 756 |         # The api can be implemented as a trivial overlay on top of
 | 
| 757 |         # iter_entries, it is not an efficient implementation, but it at least
 | |
| 758 |         # gets the job done.
 | |
| 759 | found_keys = set() | |
| 760 | search_keys = set() | |
| 761 | for index, key, value, refs in self.iter_entries(keys): | |
| 762 | parent_keys = refs[ref_list_num] | |
| 763 | found_keys.add(key) | |
| 764 | parent_map[key] = parent_keys | |
| 765 | search_keys.update(parent_keys) | |
| 766 |         # Figure out what, if anything, was missing
 | |
| 767 | missing_keys.update(set(keys).difference(found_keys)) | |
| 768 | search_keys = search_keys.difference(parent_map) | |
| 769 | return search_keys | |
| 770 | ||
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 771 | def key_count(self): | 
| 772 | """Return an estimate of the number of keys in this index. | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 773 | |
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 774 |         For GraphIndex the estimate is exact.
 | 
| 775 |         """
 | |
| 776 | if self._key_count is None: | |
| 2979.1.1
by Robert Collins Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily. | 777 | self._read_and_parse([_HEADER_READV]) | 
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 778 | return self._key_count | 
| 779 | ||
| 2890.2.18
by Robert Collins Review feedback. | 780 | def _lookup_keys_via_location(self, location_keys): | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 781 | """Public interface for implementing bisection. | 
| 782 | ||
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 783 |         If _buffer_all has been called, then all the data for the index is in
 | 
| 784 |         memory, and this method should not be called, as it uses a separate
 | |
| 785 |         cache because it cannot pre-resolve all indices, which buffer_all does
 | |
| 786 |         for performance.
 | |
| 787 | ||
| 2890.2.16
by Robert Collins Review feedback. | 788 |         :param location_keys: A list of location(byte offset), key tuples.
 | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 789 |         :return: A list of (location_key, result) tuples as expected by
 | 
| 790 |             bzrlib.bisect_multi.bisect_multi_bytes.
 | |
| 791 |         """
 | |
| 792 |         # Possible improvements:
 | |
| 793 |         #  - only bisect lookup each key once
 | |
| 794 |         #  - sort the keys first, and use that to reduce the bisection window
 | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 795 |         # -----
 | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 796 |         # this progresses in three parts:
 | 
| 797 |         # read data
 | |
| 798 |         # parse it
 | |
| 799 |         # attempt to answer the question from the now in memory data.
 | |
| 800 |         # build the readv request
 | |
| 801 |         # for each location, ask for 800 bytes - much more than rows we've seen
 | |
| 802 |         # anywhere.
 | |
| 803 | readv_ranges = [] | |
| 804 | for location, key in location_keys: | |
| 805 |             # can we answer from cache?
 | |
| 2911.3.1
by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). | 806 | if self._bisect_nodes and key in self._bisect_nodes: | 
| 807 |                 # We have the key parsed.
 | |
| 808 |                 continue
 | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 809 | index = self._parsed_key_index(key) | 
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 810 | if (len(self._parsed_key_map) and | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 811 | self._parsed_key_map[index][0] <= key and | 
| 2911.3.1
by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). | 812 | (self._parsed_key_map[index][1] >= key or | 
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 813 |                  # end of the file has been parsed
 | 
| 814 | self._parsed_byte_map[index][1] == self._size)): | |
| 2911.3.1
by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). | 815 |                 # the key has been parsed, so no lookup is needed even if its
 | 
| 816 |                 # not present.
 | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 817 |                 continue
 | 
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 818 |             # - if we have examined this part of the file already - yes
 | 
| 819 | index = self._parsed_byte_index(location) | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 820 | if (len(self._parsed_byte_map) and | 
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 821 | self._parsed_byte_map[index][0] <= location and | 
| 822 | self._parsed_byte_map[index][1] > location): | |
| 823 |                 # the byte region has been parsed, so no read is needed.
 | |
| 824 |                 continue
 | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 825 | length = 800 | 
| 826 | if location + length > self._size: | |
| 827 | length = self._size - location | |
| 828 |             # todo, trim out parsed locations.
 | |
| 829 | if length > 0: | |
| 830 | readv_ranges.append((location, length)) | |
| 831 |         # read the header if needed
 | |
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 832 | if self._bisect_nodes is None: | 
| 2979.1.1
by Robert Collins Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily. | 833 | readv_ranges.append(_HEADER_READV) | 
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 834 | self._read_and_parse(readv_ranges) | 
| 3665.3.1
by John Arbash Meinel Updates to GraphIndex processing. | 835 | result = [] | 
| 836 | if self._nodes is not None: | |
| 837 |             # _read_and_parse triggered a _buffer_all because we requested the
 | |
| 838 |             # whole data range
 | |
| 839 | for location, key in location_keys: | |
| 840 | if key not in self._nodes: # not present | |
| 841 | result.append(((location, key), False)) | |
| 842 | elif self.node_ref_lists: | |
| 843 | value, refs = self._nodes[key] | |
| 844 | result.append(((location, key), | |
| 845 | (self, key, value, refs))) | |
| 846 | else: | |
| 847 | result.append(((location, key), | |
| 848 | (self, key, self._nodes[key]))) | |
| 849 | return result | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 850 |         # generate results:
 | 
| 851 |         #  - figure out <, >, missing, present
 | |
| 852 |         #  - result present references so we can return them.
 | |
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 853 |         # keys that we cannot answer until we resolve references
 | 
| 854 | pending_references = [] | |
| 855 | pending_locations = set() | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 856 | for location, key in location_keys: | 
| 857 |             # can we answer from cache?
 | |
| 2911.3.1
by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). | 858 | if key in self._bisect_nodes: | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 859 |                 # the key has been parsed, so no lookup is needed
 | 
| 2911.3.1
by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). | 860 | if self.node_ref_lists: | 
| 861 |                     # the references may not have been all parsed.
 | |
| 862 | value, refs = self._bisect_nodes[key] | |
| 863 | wanted_locations = [] | |
| 864 | for ref_list in refs: | |
| 865 | for ref in ref_list: | |
| 866 | if ref not in self._keys_by_offset: | |
| 867 | wanted_locations.append(ref) | |
| 868 | if wanted_locations: | |
| 869 | pending_locations.update(wanted_locations) | |
| 870 | pending_references.append((location, key)) | |
| 871 |                         continue
 | |
| 872 | result.append(((location, key), (self, key, | |
| 873 | value, self._resolve_references(refs)))) | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 874 | else: | 
| 2911.3.1
by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). | 875 | result.append(((location, key), | 
| 876 | (self, key, self._bisect_nodes[key]))) | |
| 877 |                 continue
 | |
| 878 | else: | |
| 879 |                 # has the region the key should be in, been parsed?
 | |
| 880 | index = self._parsed_key_index(key) | |
| 881 | if (self._parsed_key_map[index][0] <= key and | |
| 882 | (self._parsed_key_map[index][1] >= key or | |
| 883 |                      # end of the file has been parsed
 | |
| 884 | self._parsed_byte_map[index][1] == self._size)): | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 885 | result.append(((location, key), False)) | 
| 2911.3.1
by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins). | 886 |                     continue
 | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 887 |             # no, is the key above or below the probed location:
 | 
| 888 |             # get the range of the probed & parsed location
 | |
| 889 | index = self._parsed_byte_index(location) | |
| 890 |             # if the key is below the start of the range, its below
 | |
| 891 | if key < self._parsed_key_map[index][0]: | |
| 892 | direction = -1 | |
| 893 | else: | |
| 894 | direction = +1 | |
| 895 | result.append(((location, key), direction)) | |
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 896 | readv_ranges = [] | 
| 897 |         # lookup data to resolve references
 | |
| 898 | for location in pending_locations: | |
| 899 | length = 800 | |
| 900 | if location + length > self._size: | |
| 901 | length = self._size - location | |
| 902 |             # TODO: trim out parsed locations (e.g. if the 800 is into the
 | |
| 2890.2.16
by Robert Collins Review feedback. | 903 |             # parsed region trim it, and dont use the adjust_for_latency
 | 
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 904 |             # facility)
 | 
| 905 | if length > 0: | |
| 906 | readv_ranges.append((location, length)) | |
| 907 | self._read_and_parse(readv_ranges) | |
| 3665.3.5
by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index. | 908 | if self._nodes is not None: | 
| 909 |             # The _read_and_parse triggered a _buffer_all, grab the data and
 | |
| 910 |             # return it
 | |
| 911 | for location, key in pending_references: | |
| 912 | value, refs = self._nodes[key] | |
| 913 | result.append(((location, key), (self, key, value, refs))) | |
| 914 | return result | |
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 915 | for location, key in pending_references: | 
| 916 |             # answer key references we had to look-up-late.
 | |
| 917 | value, refs = self._bisect_nodes[key] | |
| 918 | result.append(((location, key), (self, key, | |
| 919 | value, self._resolve_references(refs)))) | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 920 | return result | 
| 921 | ||
| 922 | def _parse_header_from_bytes(self, bytes): | |
| 923 | """Parse the header from a region of bytes. | |
| 924 | ||
| 925 |         :param bytes: The data to parse.
 | |
| 926 |         :return: An offset, data tuple such as readv yields, for the unparsed
 | |
| 927 |             data. (which may length 0).
 | |
| 928 |         """
 | |
| 929 | signature = bytes[0:len(self._signature())] | |
| 930 | if not signature == self._signature(): | |
| 931 | raise errors.BadIndexFormatSignature(self._name, GraphIndex) | |
| 932 | lines = bytes[len(self._signature()):].splitlines() | |
| 933 | options_line = lines[0] | |
| 934 | if not options_line.startswith(_OPTION_NODE_REFS): | |
| 935 | raise errors.BadIndexOptions(self) | |
| 936 | try: | |
| 937 | self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):]) | |
| 938 | except ValueError: | |
| 939 | raise errors.BadIndexOptions(self) | |
| 940 | options_line = lines[1] | |
| 941 | if not options_line.startswith(_OPTION_KEY_ELEMENTS): | |
| 942 | raise errors.BadIndexOptions(self) | |
| 943 | try: | |
| 944 | self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):]) | |
| 945 | except ValueError: | |
| 946 | raise errors.BadIndexOptions(self) | |
| 947 | options_line = lines[2] | |
| 948 | if not options_line.startswith(_OPTION_LEN): | |
| 949 | raise errors.BadIndexOptions(self) | |
| 950 | try: | |
| 951 | self._key_count = int(options_line[len(_OPTION_LEN):]) | |
| 952 | except ValueError: | |
| 953 | raise errors.BadIndexOptions(self) | |
| 954 |         # calculate the bytes we have processed
 | |
| 955 | header_end = (len(signature) + len(lines[0]) + len(lines[1]) + | |
| 956 | len(lines[2]) + 3) | |
| 957 | self._parsed_bytes(0, None, header_end, None) | |
| 958 |         # setup parsing state
 | |
| 959 | self._expected_elements = 3 + self._key_length | |
| 960 |         # raw data keyed by offset
 | |
| 961 | self._keys_by_offset = {} | |
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 962 |         # keys with the value and node references
 | 
| 963 | self._bisect_nodes = {} | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 964 | return header_end, bytes[header_end:] | 
| 965 | ||
| 966 | def _parse_region(self, offset, data): | |
| 967 | """Parse node data returned from a readv operation. | |
| 968 | ||
| 969 |         :param offset: The byte offset the data starts at.
 | |
| 970 |         :param data: The data to parse.
 | |
| 971 |         """
 | |
| 972 |         # trim the data.
 | |
| 973 |         # end first:
 | |
| 974 | end = offset + len(data) | |
| 2890.2.15
by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that. | 975 | high_parsed = offset | 
| 2890.2.14
by Robert Collins Parse more than one segment of data from a single readv response if needed. | 976 | while True: | 
| 977 |             # Trivial test - if the current index's end is within the
 | |
| 978 |             # low-matching parsed range, we're done.
 | |
| 2890.2.15
by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that. | 979 | index = self._parsed_byte_index(high_parsed) | 
| 2890.2.14
by Robert Collins Parse more than one segment of data from a single readv response if needed. | 980 | if end < self._parsed_byte_map[index][1]: | 
| 981 |                 return
 | |
| 2890.2.15
by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that. | 982 |             # print "[%d:%d]" % (offset, end), \
 | 
| 983 |             #     self._parsed_byte_map[index:index + 2]
 | |
| 984 | high_parsed, last_segment = self._parse_segment( | |
| 985 | offset, data, end, index) | |
| 986 | if last_segment: | |
| 2890.2.14
by Robert Collins Parse more than one segment of data from a single readv response if needed. | 987 |                 return
 | 
| 988 | ||
| 989 | def _parse_segment(self, offset, data, end, index): | |
| 990 | """Parse one segment of data. | |
| 991 | ||
| 992 |         :param offset: Where 'data' begins in the file.
 | |
| 993 |         :param data: Some data to parse a segment of.
 | |
| 994 |         :param end: Where data ends
 | |
| 995 |         :param index: The current index into the parsed bytes map.
 | |
| 996 |         :return: True if the parsed segment is the last possible one in the
 | |
| 997 |             range of data.
 | |
| 2890.2.15
by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that. | 998 |         :return: high_parsed_byte, last_segment.
 | 
| 999 |             high_parsed_byte is the location of the highest parsed byte in this
 | |
| 1000 |             segment, last_segment is True if the parsed segment is the last
 | |
| 1001 |             possible one in the data block.
 | |
| 2890.2.14
by Robert Collins Parse more than one segment of data from a single readv response if needed. | 1002 |         """
 | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1003 |         # default is to use all data
 | 
| 1004 | trim_end = None | |
| 1005 |         # accomodate overlap with data before this.
 | |
| 1006 | if offset < self._parsed_byte_map[index][1]: | |
| 1007 |             # overlaps the lower parsed region
 | |
| 1008 |             # skip the parsed data
 | |
| 1009 | trim_start = self._parsed_byte_map[index][1] - offset | |
| 1010 |             # don't trim the start for \n
 | |
| 1011 | start_adjacent = True | |
| 1012 | elif offset == self._parsed_byte_map[index][1]: | |
| 1013 |             # abuts the lower parsed region
 | |
| 1014 |             # use all data
 | |
| 1015 | trim_start = None | |
| 1016 |             # do not trim anything
 | |
| 1017 | start_adjacent = True | |
| 1018 | else: | |
| 1019 |             # does not overlap the lower parsed region
 | |
| 1020 |             # use all data
 | |
| 1021 | trim_start = None | |
| 1022 |             # but trim the leading \n
 | |
| 1023 | start_adjacent = False | |
| 1024 | if end == self._size: | |
| 1025 |             # lines up to the end of all data:
 | |
| 1026 |             # use it all
 | |
| 1027 | trim_end = None | |
| 1028 |             # do not strip to the last \n
 | |
| 1029 | end_adjacent = True | |
| 2890.2.14
by Robert Collins Parse more than one segment of data from a single readv response if needed. | 1030 | last_segment = True | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1031 | elif index + 1 == len(self._parsed_byte_map): | 
| 1032 |             # at the end of the parsed data
 | |
| 1033 |             # use it all
 | |
| 1034 | trim_end = None | |
| 1035 |             # but strip to the last \n
 | |
| 1036 | end_adjacent = False | |
| 2890.2.14
by Robert Collins Parse more than one segment of data from a single readv response if needed. | 1037 | last_segment = True | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1038 | elif end == self._parsed_byte_map[index + 1][0]: | 
| 1039 |             # buts up against the next parsed region
 | |
| 1040 |             # use it all
 | |
| 1041 | trim_end = None | |
| 1042 |             # do not strip to the last \n
 | |
| 1043 | end_adjacent = True | |
| 2890.2.14
by Robert Collins Parse more than one segment of data from a single readv response if needed. | 1044 | last_segment = True | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1045 | elif end > self._parsed_byte_map[index + 1][0]: | 
| 1046 |             # overlaps into the next parsed region
 | |
| 1047 |             # only consider the unparsed data
 | |
| 1048 | trim_end = self._parsed_byte_map[index + 1][0] - offset | |
| 1049 |             # do not strip to the last \n as we know its an entire record
 | |
| 1050 | end_adjacent = True | |
| 2890.2.14
by Robert Collins Parse more than one segment of data from a single readv response if needed. | 1051 | last_segment = end < self._parsed_byte_map[index + 1][1] | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1052 | else: | 
| 1053 |             # does not overlap into the next region
 | |
| 1054 |             # use it all
 | |
| 1055 | trim_end = None | |
| 1056 |             # but strip to the last \n
 | |
| 1057 | end_adjacent = False | |
| 2890.2.14
by Robert Collins Parse more than one segment of data from a single readv response if needed. | 1058 | last_segment = True | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1059 |         # now find bytes to discard if needed
 | 
| 1060 | if not start_adjacent: | |
| 1061 |             # work around python bug in rfind
 | |
| 1062 | if trim_start is None: | |
| 1063 | trim_start = data.find('\n') + 1 | |
| 1064 | else: | |
| 1065 | trim_start = data.find('\n', trim_start) + 1 | |
| 3376.2.4
by Martin Pool Remove every assert statement from bzrlib! | 1066 | if not (trim_start != 0): | 
| 1067 | raise AssertionError('no \n was present') | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1068 |             # print 'removing start', offset, trim_start, repr(data[:trim_start])
 | 
| 1069 | if not end_adjacent: | |
| 1070 |             # work around python bug in rfind
 | |
| 1071 | if trim_end is None: | |
| 1072 | trim_end = data.rfind('\n') + 1 | |
| 1073 | else: | |
| 1074 | trim_end = data.rfind('\n', None, trim_end) + 1 | |
| 3376.2.4
by Martin Pool Remove every assert statement from bzrlib! | 1075 | if not (trim_end != 0): | 
| 1076 | raise AssertionError('no \n was present') | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1077 |             # print 'removing end', offset, trim_end, repr(data[trim_end:])
 | 
| 1078 |         # adjust offset and data to the parseable data.
 | |
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 1079 | trimmed_data = data[trim_start:trim_end] | 
| 3376.2.4
by Martin Pool Remove every assert statement from bzrlib! | 1080 | if not (trimmed_data): | 
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 1081 | raise AssertionError('read unneeded data [%d:%d] from [%d:%d]' | 
| 3376.2.4
by Martin Pool Remove every assert statement from bzrlib! | 1082 | % (trim_start, trim_end, offset, offset + len(data))) | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1083 | if trim_start: | 
| 1084 | offset += trim_start | |
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 1085 |         # print "parsing", repr(trimmed_data)
 | 
| 2890.2.10
by Robert Collins Add test coverage to ensure \r's are not mangled by bisection parsing. | 1086 |         # splitlines mangles the \r delimiters.. don't use it.
 | 
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 1087 | lines = trimmed_data.split('\n') | 
| 2890.2.9
by Robert Collins Don't use splitlines for index data parsing, we embed \r. | 1088 | del lines[-1] | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1089 | pos = offset | 
| 2890.2.17
by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. | 1090 | first_key, last_key, nodes, _ = self._parse_lines(lines, pos) | 
| 1091 | for key, value in nodes: | |
| 1092 | self._bisect_nodes[key] = value | |
| 1093 | self._parsed_bytes(offset, first_key, | |
| 1094 | offset + len(trimmed_data), last_key) | |
| 1095 | return offset + len(trimmed_data), last_segment | |
| 1096 | ||
| 1097 | def _parse_lines(self, lines, pos): | |
| 1098 | key = None | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1099 | first_key = None | 
| 2890.2.17
by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. | 1100 | trailers = 0 | 
| 1101 | nodes = [] | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1102 | for line in lines: | 
| 1103 | if line == '': | |
| 1104 |                 # must be at the end
 | |
| 2890.2.17
by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. | 1105 | if self._size: | 
| 3376.2.4
by Martin Pool Remove every assert statement from bzrlib! | 1106 | if not (self._size == pos + 1): | 
| 1107 | raise AssertionError("%s %s" % (self._size, pos)) | |
| 2890.2.17
by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. | 1108 | trailers += 1 | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1109 |                 continue
 | 
| 1110 | elements = line.split('\0') | |
| 1111 | if len(elements) != self._expected_elements: | |
| 1112 | raise errors.BadIndexData(self) | |
| 3530.3.3
by Robert Collins Credit and explanation for interning. | 1113 |             # keys are tuples. Each element is a string that may occur many
 | 
| 1114 |             # times, so we intern them to save space. AB, RC, 200807
 | |
| 3711.3.13
by John Arbash Meinel Shave off another 5s by not building 'node_by_key' | 1115 | key = tuple([intern(element) for element in elements[:self._key_length]]) | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1116 | if first_key is None: | 
| 1117 | first_key = key | |
| 1118 | absent, references, value = elements[-3:] | |
| 1119 | ref_lists = [] | |
| 1120 | for ref_string in references.split('\t'): | |
| 1121 | ref_lists.append(tuple([ | |
| 1122 | int(ref) for ref in ref_string.split('\r') if ref | |
| 1123 |                     ]))
 | |
| 1124 | ref_lists = tuple(ref_lists) | |
| 1125 | self._keys_by_offset[pos] = (key, absent, ref_lists, value) | |
| 1126 | pos += len(line) + 1 # +1 for the \n | |
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 1127 | if absent: | 
| 1128 |                 continue
 | |
| 1129 | if self.node_ref_lists: | |
| 1130 | node_value = (value, ref_lists) | |
| 1131 | else: | |
| 1132 | node_value = value | |
| 2890.2.17
by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. | 1133 | nodes.append((key, node_value)) | 
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 1134 |             # print "parsed ", key
 | 
| 2890.2.17
by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing. | 1135 | return first_key, key, nodes, trailers | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1136 | |
| 1137 | def _parsed_bytes(self, start, start_key, end, end_key): | |
| 1138 | """Mark the bytes from start to end as parsed. | |
| 1139 | ||
| 1140 |         Calling self._parsed_bytes(1,2) will mark one byte (the one at offset
 | |
| 1141 |         1) as parsed.
 | |
| 1142 | ||
| 1143 |         :param start: The start of the parsed region.
 | |
| 1144 |         :param end: The end of the parsed region.
 | |
| 1145 |         """
 | |
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 1146 | index = self._parsed_byte_index(start) | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1147 | new_value = (start, end) | 
| 1148 | new_key = (start_key, end_key) | |
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 1149 | if index == -1: | 
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1150 |             # first range parsed is always the beginning.
 | 
| 1151 | self._parsed_byte_map.insert(index, new_value) | |
| 1152 | self._parsed_key_map.insert(index, new_key) | |
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 1153 |             return
 | 
| 1154 |         # four cases:
 | |
| 1155 |         # new region
 | |
| 1156 |         # extend lower region
 | |
| 1157 |         # extend higher region
 | |
| 1158 |         # combine two regions
 | |
| 1159 | if (index + 1 < len(self._parsed_byte_map) and | |
| 1160 | self._parsed_byte_map[index][1] == start and | |
| 1161 | self._parsed_byte_map[index + 1][0] == end): | |
| 1162 |             # combine two regions
 | |
| 1163 | self._parsed_byte_map[index] = (self._parsed_byte_map[index][0], | |
| 1164 | self._parsed_byte_map[index + 1][1]) | |
| 1165 | self._parsed_key_map[index] = (self._parsed_key_map[index][0], | |
| 1166 | self._parsed_key_map[index + 1][1]) | |
| 2890.2.12
by Robert Collins More index tweaks. | 1167 | del self._parsed_byte_map[index + 1] | 
| 1168 | del self._parsed_key_map[index + 1] | |
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 1169 | elif self._parsed_byte_map[index][1] == start: | 
| 1170 |             # extend the lower entry
 | |
| 1171 | self._parsed_byte_map[index] = ( | |
| 1172 | self._parsed_byte_map[index][0], end) | |
| 1173 | self._parsed_key_map[index] = ( | |
| 1174 | self._parsed_key_map[index][0], end_key) | |
| 1175 | elif (index + 1 < len(self._parsed_byte_map) and | |
| 1176 | self._parsed_byte_map[index + 1][0] == end): | |
| 1177 |             # extend the higher entry
 | |
| 1178 | self._parsed_byte_map[index + 1] = ( | |
| 1179 | start, self._parsed_byte_map[index + 1][1]) | |
| 1180 | self._parsed_key_map[index + 1] = ( | |
| 1181 | start_key, self._parsed_key_map[index + 1][1]) | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1182 | else: | 
| 2890.2.11
by Robert Collins Bisection improvements after integrating with packs. | 1183 |             # new entry
 | 
| 1184 | self._parsed_byte_map.insert(index + 1, new_value) | |
| 1185 | self._parsed_key_map.insert(index + 1, new_key) | |
| 2890.2.5
by Robert Collins Create a content lookup function for bisection in GraphIndex. | 1186 | |
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 1187 | def _read_and_parse(self, readv_ranges): | 
| 4775.1.1
by Martin Pool Remove several 'the the' typos | 1188 | """Read the ranges and parse the resulting data. | 
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 1189 | |
| 1190 |         :param readv_ranges: A prepared readv range list.
 | |
| 1191 |         """
 | |
| 3665.3.5
by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index. | 1192 | if not readv_ranges: | 
| 1193 |             return
 | |
| 1194 | if self._nodes is None and self._bytes_read * 2 >= self._size: | |
| 1195 |             # We've already read more than 50% of the file and we are about to
 | |
| 1196 |             # request more data, just _buffer_all() and be done
 | |
| 1197 | self._buffer_all() | |
| 1198 |             return
 | |
| 1199 | ||
| 5074.4.3
by John Arbash Meinel Actually implement offset support for GraphIndex. | 1200 | base_offset = self._base_offset | 
| 1201 | if base_offset != 0: | |
| 1202 |             # Rewrite the ranges for the offset
 | |
| 1203 | readv_ranges = [(start+base_offset, size) | |
| 1204 | for start, size in readv_ranges] | |
| 3665.3.5
by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index. | 1205 | readv_data = self._transport.readv(self._name, readv_ranges, True, | 
| 5074.4.3
by John Arbash Meinel Actually implement offset support for GraphIndex. | 1206 | self._size + self._base_offset) | 
| 3665.3.5
by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index. | 1207 |         # parse
 | 
| 1208 | for offset, data in readv_data: | |
| 5074.4.3
by John Arbash Meinel Actually implement offset support for GraphIndex. | 1209 | offset -= base_offset | 
| 3665.3.5
by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index. | 1210 | self._bytes_read += len(data) | 
| 5074.4.3
by John Arbash Meinel Actually implement offset support for GraphIndex. | 1211 | if offset < 0: | 
| 1212 |                 # transport.readv() expanded to extra data which isn't part of
 | |
| 1213 |                 # this index
 | |
| 1214 | data = data[-offset:] | |
| 1215 | offset = 0 | |
| 3665.3.5
by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index. | 1216 | if offset == 0 and len(data) == self._size: | 
| 1217 |                 # We read the whole range, most likely because the
 | |
| 1218 |                 # Transport upcast our readv ranges into one long request
 | |
| 1219 |                 # for enough total data to grab the whole index.
 | |
| 1220 | self._buffer_all(StringIO(data)) | |
| 1221 |                 return
 | |
| 1222 | if self._bisect_nodes is None: | |
| 1223 |                 # this must be the start
 | |
| 1224 | if not (offset == 0): | |
| 1225 | raise AssertionError() | |
| 1226 | offset, data = self._parse_header_from_bytes(data) | |
| 1227 |             # print readv_ranges, "[%d:%d]" % (offset, offset + len(data))
 | |
| 1228 | self._parse_region(offset, data) | |
| 2890.2.6
by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface. | 1229 | |
| 2592.1.8
by Robert Collins Empty files should validate ok. | 1230 | def _signature(self): | 
| 1231 | """The file signature for this index type.""" | |
| 1232 | return _SIGNATURE | |
| 1233 | ||
| 2592.1.7
by Robert Collins A validate that goes boom. | 1234 | def validate(self): | 
| 1235 | """Validate that everything in the index can be accessed.""" | |
| 2592.1.27
by Robert Collins Test missing end lines with non-empty indices. | 1236 |         # iter_all validates completely at the moment, so just do that.
 | 
| 1237 | for node in self.iter_all_entries(): | |
| 1238 |             pass
 | |
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1239 | |
| 1240 | ||
| 1241 | class CombinedGraphIndex(object): | |
| 1242 | """A GraphIndex made up from smaller GraphIndices. | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 1243 | |
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1244 |     The backing indices must implement GraphIndex, and are presumed to be
 | 
| 1245 |     static data.
 | |
| 2592.1.45
by Robert Collins Tweak documentation as per Aaron's review. | 1246 | |
| 1247 |     Queries against the combined index will be made against the first index,
 | |
| 5086.7.4
by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. | 1248 |     and then the second and so on. The order of indices can thus influence
 | 
| 2592.1.45
by Robert Collins Tweak documentation as per Aaron's review. | 1249 |     performance significantly. For example, if one index is on local disk and a
 | 
| 1250 |     second on a remote server, the local disk index should be before the other
 | |
| 1251 |     in the index list.
 | |
| 5086.7.4
by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. | 1252 |     
 | 
| 1253 |     Also, queries tend to need results from the same indices as previous
 | |
| 1254 |     queries.  So the indices will be reordered after every query to put the
 | |
| 1255 |     indices that had the result(s) of that query first (while otherwise
 | |
| 1256 |     preserving the relative ordering).
 | |
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1257 |     """
 | 
| 1258 | ||
| 5086.7.4
by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. | 1259 | def __init__(self, indices, reload_func=None): | 
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1260 | """Create a CombinedGraphIndex backed by indices. | 
| 1261 | ||
| 2592.1.45
by Robert Collins Tweak documentation as per Aaron's review. | 1262 |         :param indices: An ordered list of indices to query for data.
 | 
| 3789.1.3
by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count(). | 1263 |         :param reload_func: A function to call if we find we are missing an
 | 
| 1264 |             index. Should have the form reload_func() => True/False to indicate
 | |
| 1265 |             if reloading actually changed anything.
 | |
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1266 |         """
 | 
| 1267 | self._indices = indices | |
| 3789.1.3
by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count(). | 1268 | self._reload_func = reload_func | 
| 5086.7.3
by Andrew Bennetts Improve docstrings and refactor slightly for clarity. | 1269 |         # Sibling indices are other CombinedGraphIndex that we should call
 | 
| 1270 |         # _move_to_front_by_name on when we auto-reorder ourself.
 | |
| 5086.7.2
by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. | 1271 | self._sibling_indices = [] | 
| 5086.7.3
by Andrew Bennetts Improve docstrings and refactor slightly for clarity. | 1272 |         # A list of names that corresponds to the instances in self._indices,
 | 
| 1273 |         # so _index_names[0] is always the name for _indices[0], etc.  Sibling
 | |
| 1274 |         # indices must all use the same set of names as each other.
 | |
| 5086.7.4
by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. | 1275 | self._index_names = [None] * len(self._indices) | 
| 2592.1.37
by Robert Collins Add CombinedGraphIndex.insert_index. | 1276 | |
| 2592.5.4
by Martin Pool Add CombinedGraphIndex repr | 1277 | def __repr__(self): | 
| 1278 | return "%s(%s)" % ( | |
| 1279 | self.__class__.__name__, | |
| 1280 | ', '.join(map(repr, self._indices))) | |
| 1281 | ||
| 4744.2.6
by John Arbash Meinel Start exposing an GraphIndex.clear_cache() member. | 1282 | def clear_cache(self): | 
| 1283 | """See GraphIndex.clear_cache()""" | |
| 1284 | for index in self._indices: | |
| 1285 | index.clear_cache() | |
| 1286 | ||
| 3099.3.1
by John Arbash Meinel Implement get_parent_map for ParentProviders | 1287 | def get_parent_map(self, keys): | 
| 4379.3.3
by Gary van der Merwe Rename and add doc string for StackedParentsProvider. | 1288 | """See graph.StackedParentsProvider.get_parent_map""" | 
| 3099.3.1
by John Arbash Meinel Implement get_parent_map for ParentProviders | 1289 | search_keys = set(keys) | 
| 1290 | if NULL_REVISION in search_keys: | |
| 1291 | search_keys.discard(NULL_REVISION) | |
| 1292 | found_parents = {NULL_REVISION:[]} | |
| 1293 | else: | |
| 1294 | found_parents = {} | |
| 2979.2.2
by Robert Collins Per-file graph heads detection during commit for pack repositories. | 1295 | for index, key, value, refs in self.iter_entries(search_keys): | 
| 1296 | parents = refs[0] | |
| 1297 | if not parents: | |
| 1298 | parents = (NULL_REVISION,) | |
| 1299 | found_parents[key] = parents | |
| 3099.3.1
by John Arbash Meinel Implement get_parent_map for ParentProviders | 1300 | return found_parents | 
| 2979.2.2
by Robert Collins Per-file graph heads detection during commit for pack repositories. | 1301 | |
| 3830.3.12
by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks | 1302 | has_key = _has_key_from_parent_map | 
| 3830.3.9
by Martin Pool Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests | 1303 | |
| 5086.7.2
by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. | 1304 | def insert_index(self, pos, index, name=None): | 
| 2592.1.37
by Robert Collins Add CombinedGraphIndex.insert_index. | 1305 | """Insert a new index in the list of indices to query. | 
| 1306 | ||
| 1307 |         :param pos: The position to insert the index.
 | |
| 1308 |         :param index: The index to insert.
 | |
| 5086.7.3
by Andrew Bennetts Improve docstrings and refactor slightly for clarity. | 1309 |         :param name: a name for this index, e.g. a pack name.  These names can
 | 
| 5086.7.4
by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. | 1310 |             be used to reflect index reorderings to related CombinedGraphIndex
 | 
| 5086.7.6
by Andrew Bennetts Add public set_sibling_indices API so that AggregateIndex doesn't have to poke at _sibling_indices. | 1311 |             instances that use the same names.  (see set_sibling_indices)
 | 
| 2592.1.37
by Robert Collins Add CombinedGraphIndex.insert_index. | 1312 |         """
 | 
| 1313 | self._indices.insert(pos, index) | |
| 5086.7.2
by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. | 1314 | self._index_names.insert(pos, name) | 
| 2592.1.37
by Robert Collins Add CombinedGraphIndex.insert_index. | 1315 | |
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1316 | def iter_all_entries(self): | 
| 1317 | """Iterate over all keys within the index | |
| 1318 | ||
| 2592.1.44
by Robert Collins Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. | 1319 |         Duplicate keys across child indices are presumed to have the same
 | 
| 1320 |         value and are only reported once.
 | |
| 1321 | ||
| 2592.5.1
by Martin Pool Fix docstrings for Index.iter_entries etc | 1322 |         :return: An iterable of (index, key, reference_lists, value).
 | 
| 1323 |             There is no defined order for the result iteration - it will be in
 | |
| 1324 |             the most efficient order for the index.
 | |
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1325 |         """
 | 
| 1326 | seen_keys = set() | |
| 3789.1.5
by John Arbash Meinel CombinedGraphIndex.iter_all_entries() can now reload when needed. | 1327 | while True: | 
| 1328 | try: | |
| 1329 | for index in self._indices: | |
| 1330 | for node in index.iter_all_entries(): | |
| 1331 | if node[1] not in seen_keys: | |
| 1332 | yield node | |
| 1333 | seen_keys.add(node[1]) | |
| 1334 |                 return
 | |
| 1335 | except errors.NoSuchFile: | |
| 1336 | self._reload_or_raise() | |
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1337 | |
| 1338 | def iter_entries(self, keys): | |
| 1339 | """Iterate over keys within the index. | |
| 1340 | ||
| 2592.1.44
by Robert Collins Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. | 1341 |         Duplicate keys across child indices are presumed to have the same
 | 
| 1342 |         value and are only reported once.
 | |
| 1343 | ||
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1344 |         :param keys: An iterable providing the keys to be retrieved.
 | 
| 5086.7.3
by Andrew Bennetts Improve docstrings and refactor slightly for clarity. | 1345 |         :return: An iterable of (index, key, reference_lists, value). There is
 | 
| 1346 |             no defined order for the result iteration - it will be in the most
 | |
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1347 |             efficient order for the index.
 | 
| 1348 |         """
 | |
| 1349 | keys = set(keys) | |
| 5086.7.1
by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. | 1350 | hit_indices = [] | 
| 3789.1.4
by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request. | 1351 | while True: | 
| 1352 | try: | |
| 1353 | for index in self._indices: | |
| 1354 | if not keys: | |
| 5086.7.1
by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. | 1355 |                         break
 | 
| 1356 | index_hit = False | |
| 3789.1.4
by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request. | 1357 | for node in index.iter_entries(keys): | 
| 1358 | keys.remove(node[1]) | |
| 1359 | yield node | |
| 5086.7.1
by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. | 1360 | index_hit = True | 
| 1361 | if index_hit: | |
| 1362 | hit_indices.append(index) | |
| 1363 |                 break
 | |
| 3789.1.4
by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request. | 1364 | except errors.NoSuchFile: | 
| 1365 | self._reload_or_raise() | |
| 5086.7.1
by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. | 1366 | self._move_to_front(hit_indices) | 
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1367 | |
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 1368 | def iter_entries_prefix(self, keys): | 
| 1369 | """Iterate over keys within the index using prefix matching. | |
| 1370 | ||
| 1371 |         Duplicate keys across child indices are presumed to have the same
 | |
| 1372 |         value and are only reported once.
 | |
| 1373 | ||
| 1374 |         Prefix matching is applied within the tuple of a key, not to within
 | |
| 1375 |         the bytestring of each key element. e.g. if you have the keys ('foo',
 | |
| 1376 |         'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
 | |
| 1377 |         only the former key is returned.
 | |
| 1378 | ||
| 1379 |         :param keys: An iterable providing the key prefixes to be retrieved.
 | |
| 1380 |             Each key prefix takes the form of a tuple the length of a key, but
 | |
| 1381 |             with the last N elements 'None' rather than a regular bytestring.
 | |
| 1382 |             The first element cannot be 'None'.
 | |
| 1383 |         :return: An iterable as per iter_all_entries, but restricted to the
 | |
| 1384 |             keys with a matching prefix to those supplied. No additional keys
 | |
| 1385 |             will be returned, and every match that is in the index will be
 | |
| 1386 |             returned.
 | |
| 1387 |         """
 | |
| 1388 | keys = set(keys) | |
| 1389 | if not keys: | |
| 1390 |             return
 | |
| 1391 | seen_keys = set() | |
| 5086.7.1
by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. | 1392 | hit_indices = [] | 
| 3789.1.6
by John Arbash Meinel CombinedGraphIndex.iter_entries_prefix can now reload when needed. | 1393 | while True: | 
| 1394 | try: | |
| 1395 | for index in self._indices: | |
| 5086.7.1
by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. | 1396 | index_hit = False | 
| 3789.1.6
by John Arbash Meinel CombinedGraphIndex.iter_entries_prefix can now reload when needed. | 1397 | for node in index.iter_entries_prefix(keys): | 
| 1398 | if node[1] in seen_keys: | |
| 1399 |                             continue
 | |
| 1400 | seen_keys.add(node[1]) | |
| 1401 | yield node | |
| 5086.7.1
by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. | 1402 | index_hit = True | 
| 1403 | if index_hit: | |
| 1404 | hit_indices.append(index) | |
| 1405 |                 break
 | |
| 3789.1.6
by John Arbash Meinel CombinedGraphIndex.iter_entries_prefix can now reload when needed. | 1406 | except errors.NoSuchFile: | 
| 1407 | self._reload_or_raise() | |
| 5086.7.1
by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. | 1408 | self._move_to_front(hit_indices) | 
| 1409 | ||
| 5086.7.3
by Andrew Bennetts Improve docstrings and refactor slightly for clarity. | 1410 | def _move_to_front(self, hit_indices): | 
| 5086.7.1
by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. | 1411 | """Rearrange self._indices so that hit_indices are first. | 
| 1412 | ||
| 1413 |         Order is maintained as much as possible, e.g. the first unhit index
 | |
| 1414 |         will be the first index in _indices after the hit_indices, and the
 | |
| 1415 |         hit_indices will be present in exactly the order they are passed to
 | |
| 1416 |         _move_to_front.
 | |
| 5086.7.3
by Andrew Bennetts Improve docstrings and refactor slightly for clarity. | 1417 | |
| 1418 |         _move_to_front propagates to all objects in self._sibling_indices by
 | |
| 1419 |         calling _move_to_front_by_name.
 | |
| 5086.7.1
by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP. | 1420 |         """
 | 
| 5151.2.1
by John Arbash Meinel Avoid reordering when unnecessary. Fixes bug #562429 | 1421 | if self._indices[:len(hit_indices)] == hit_indices: | 
| 1422 |             # The 'hit_indices' are already at the front (and in the same
 | |
| 1423 |             # order), no need to re-order
 | |
| 1424 |             return
 | |
| 5086.7.3
by Andrew Bennetts Improve docstrings and refactor slightly for clarity. | 1425 | hit_names = self._move_to_front_by_index(hit_indices) | 
| 1426 | for sibling_idx in self._sibling_indices: | |
| 1427 | sibling_idx._move_to_front_by_name(hit_names) | |
| 1428 | ||
| 1429 | def _move_to_front_by_index(self, hit_indices): | |
| 1430 | """Core logic for _move_to_front. | |
| 1431 |         
 | |
| 1432 |         Returns a list of names corresponding to the hit_indices param.
 | |
| 1433 |         """
 | |
| 5151.2.3
by John Arbash Meinel Restore the indices_info variable. | 1434 | indices_info = zip(self._index_names, self._indices) | 
| 5086.7.4
by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. | 1435 | if 'index' in debug.debug_flags: | 
| 1436 | mutter('CombinedGraphIndex reordering: currently %r, promoting %r', | |
| 1437 | indices_info, hit_indices) | |
| 5086.7.2
by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. | 1438 | hit_names = [] | 
| 5151.2.2
by John Arbash Meinel Avoid packing and unpacking the indices, and shortcut once you've found all | 1439 | unhit_names = [] | 
| 1440 | new_hit_indices = [] | |
| 1441 | unhit_indices = [] | |
| 1442 | ||
| 5151.2.3
by John Arbash Meinel Restore the indices_info variable. | 1443 | for offset, (name, idx) in enumerate(indices_info): | 
| 5086.7.2
by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. | 1444 | if idx in hit_indices: | 
| 5151.2.4
by John Arbash Meinel Minor tweak | 1445 | hit_names.append(name) | 
| 5151.2.2
by John Arbash Meinel Avoid packing and unpacking the indices, and shortcut once you've found all | 1446 | new_hit_indices.append(idx) | 
| 1447 | if len(new_hit_indices) == len(hit_indices): | |
| 1448 |                     # We've found all of the hit entries, everything else is
 | |
| 1449 |                     # unhit
 | |
| 1450 | unhit_names.extend(self._index_names[offset+1:]) | |
| 1451 | unhit_indices.extend(self._indices[offset+1:]) | |
| 1452 |                     break
 | |
| 5086.7.2
by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. | 1453 | else: | 
| 5151.2.2
by John Arbash Meinel Avoid packing and unpacking the indices, and shortcut once you've found all | 1454 | unhit_names.append(name) | 
| 1455 | unhit_indices.append(idx) | |
| 1456 | ||
| 1457 | self._indices = new_hit_indices + unhit_indices | |
| 1458 | self._index_names = hit_names + unhit_names | |
| 5086.7.4
by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters. | 1459 | if 'index' in debug.debug_flags: | 
| 1460 | mutter('CombinedGraphIndex reordered: %r', self._indices) | |
| 5086.7.3
by Andrew Bennetts Improve docstrings and refactor slightly for clarity. | 1461 | return hit_names | 
| 5086.7.2
by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. | 1462 | |
| 1463 | def _move_to_front_by_name(self, hit_names): | |
| 5086.7.3
by Andrew Bennetts Improve docstrings and refactor slightly for clarity. | 1464 | """Moves indices named by 'hit_names' to front of the search order, as | 
| 1465 |         described in _move_to_front.
 | |
| 1466 |         """
 | |
| 1467 |         # Translate names to index instances, and then call
 | |
| 1468 |         # _move_to_front_by_index.
 | |
| 5086.7.2
by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files. | 1469 | indices_info = zip(self._index_names, self._indices) | 
| 1470 | hit_indices = [] | |
| 1471 | for name, idx in indices_info: | |
| 1472 | if name in hit_names: | |
| 1473 | hit_indices.append(idx) | |
| 5086.7.3
by Andrew Bennetts Improve docstrings and refactor slightly for clarity. | 1474 | self._move_to_front_by_index(hit_indices) | 
| 2624.2.9
by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one. | 1475 | |
| 4593.4.12
by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry() | 1476 | def find_ancestry(self, keys, ref_list_num): | 
| 4593.4.8
by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry() | 1477 | """Find the complete ancestry for the given set of keys. | 
| 1478 | ||
| 4593.4.12
by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry() | 1479 |         Note that this is a whole-ancestry request, so it should be used
 | 
| 1480 |         sparingly.
 | |
| 1481 | ||
| 4593.4.8
by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry() | 1482 |         :param keys: An iterable of keys to look for
 | 
| 4593.4.12
by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry() | 1483 |         :param ref_list_num: The reference list which references the parents
 | 
| 1484 |             we care about.
 | |
| 4593.4.8
by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry() | 1485 |         :return: (parent_map, missing_keys)
 | 
| 1486 |         """
 | |
| 5086.7.3
by Andrew Bennetts Improve docstrings and refactor slightly for clarity. | 1487 |         # XXX: make this call _move_to_front?
 | 
| 4593.4.8
by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry() | 1488 | missing_keys = set() | 
| 1489 | parent_map = {} | |
| 1490 | keys_to_lookup = set(keys) | |
| 4593.4.9
by John Arbash Meinel Add some debugging statements for now. | 1491 | generation = 0 | 
| 4593.4.8
by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry() | 1492 | while keys_to_lookup: | 
| 1493 |             # keys that *all* indexes claim are missing, stop searching them
 | |
| 4593.4.9
by John Arbash Meinel Add some debugging statements for now. | 1494 | generation += 1 | 
| 4593.4.8
by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry() | 1495 | all_index_missing = None | 
| 4593.4.9
by John Arbash Meinel Add some debugging statements for now. | 1496 |             # print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss'
 | 
| 1497 |             # print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup),
 | |
| 1498 |             #                                   len(parent_map),
 | |
| 1499 |             #                                   len(missing_keys))
 | |
| 1500 | for index_idx, index in enumerate(self._indices): | |
| 1501 |                 # TODO: we should probably be doing something with
 | |
| 1502 |                 #       'missing_keys' since we've already determined that
 | |
| 1503 |                 #       those revisions have not been found anywhere
 | |
| 4593.4.8
by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry() | 1504 | index_missing_keys = set() | 
| 1505 |                 # Find all of the ancestry we can from this index
 | |
| 1506 |                 # keep looking until the search_keys set is empty, which means
 | |
| 1507 |                 # things we didn't find should be in index_missing_keys
 | |
| 1508 | search_keys = keys_to_lookup | |
| 4593.4.9
by John Arbash Meinel Add some debugging statements for now. | 1509 | sub_generation = 0 | 
| 1510 |                 # print '    \t%2d\t\t%4d\t%5d\t%5d' % (
 | |
| 1511 |                 #     index_idx, len(search_keys),
 | |
| 1512 |                 #     len(parent_map), len(index_missing_keys))
 | |
| 4593.4.8
by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry() | 1513 | while search_keys: | 
| 4593.4.9
by John Arbash Meinel Add some debugging statements for now. | 1514 | sub_generation += 1 | 
| 1515 |                     # TODO: ref_list_num should really be a parameter, since
 | |
| 1516 |                     #       CombinedGraphIndex does not know what the ref lists
 | |
| 1517 |                     #       mean.
 | |
| 4593.4.12
by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry() | 1518 | search_keys = index._find_ancestors(search_keys, | 
| 1519 | ref_list_num, parent_map, index_missing_keys) | |
| 4593.4.9
by John Arbash Meinel Add some debugging statements for now. | 1520 |                     # print '    \t  \t%2d\t%4d\t%5d\t%5d' % (
 | 
| 1521 |                     #     sub_generation, len(search_keys),
 | |
| 1522 |                     #     len(parent_map), len(index_missing_keys))
 | |
| 4593.4.8
by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry() | 1523 |                 # Now set whatever was missing to be searched in the next index
 | 
| 1524 | keys_to_lookup = index_missing_keys | |
| 1525 | if all_index_missing is None: | |
| 1526 | all_index_missing = set(index_missing_keys) | |
| 1527 | else: | |
| 1528 | all_index_missing.intersection_update(index_missing_keys) | |
| 1529 | if not keys_to_lookup: | |
| 1530 |                     break
 | |
| 1531 | if all_index_missing is None: | |
| 1532 |                 # There were no indexes, so all search keys are 'missing'
 | |
| 1533 | missing_keys.update(keys_to_lookup) | |
| 1534 | keys_to_lookup = None | |
| 1535 | else: | |
| 1536 | missing_keys.update(all_index_missing) | |
| 1537 | keys_to_lookup.difference_update(all_index_missing) | |
| 1538 | return parent_map, missing_keys | |
| 1539 | ||
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 1540 | def key_count(self): | 
| 1541 | """Return an estimate of the number of keys in this index. | |
| 3789.1.3
by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count(). | 1542 | |
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 1543 |         For CombinedGraphIndex this is approximated by the sum of the keys of
 | 
| 1544 |         the child indices. As child indices may have duplicate keys this can
 | |
| 1545 |         have a maximum error of the number of child indices * largest number of
 | |
| 1546 |         keys in any index.
 | |
| 1547 |         """
 | |
| 3789.1.4
by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request. | 1548 | while True: | 
| 3789.1.3
by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count(). | 1549 | try: | 
| 1550 | return sum((index.key_count() for index in self._indices), 0) | |
| 1551 | except errors.NoSuchFile: | |
| 3789.1.4
by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request. | 1552 | self._reload_or_raise() | 
| 1553 | ||
| 3830.3.12
by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks | 1554 | missing_keys = _missing_keys_from_parent_map | 
| 1555 | ||
| 3789.1.4
by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request. | 1556 | def _reload_or_raise(self): | 
| 1557 | """We just got a NoSuchFile exception. | |
| 1558 | ||
| 1559 |         Try to reload the indices, if it fails, just raise the current
 | |
| 1560 |         exception.
 | |
| 1561 |         """
 | |
| 1562 | if self._reload_func is None: | |
| 1563 |             raise
 | |
| 1564 | exc_type, exc_value, exc_traceback = sys.exc_info() | |
| 3789.1.10
by John Arbash Meinel Review comments from Martin. | 1565 | trace.mutter('Trying to reload after getting exception: %s', | 
| 1566 | exc_value) | |
| 3789.1.4
by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request. | 1567 | if not self._reload_func(): | 
| 1568 |             # We tried to reload, but nothing changed, so we fail anyway
 | |
| 3789.1.10
by John Arbash Meinel Review comments from Martin. | 1569 | trace.mutter('_reload_func indicated nothing has changed.' | 
| 1570 | ' Raising original exception.') | |
| 3789.1.4
by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request. | 1571 | raise exc_type, exc_value, exc_traceback | 
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 1572 | |
| 5086.7.6
by Andrew Bennetts Add public set_sibling_indices API so that AggregateIndex doesn't have to poke at _sibling_indices. | 1573 | def set_sibling_indices(self, sibling_combined_graph_indices): | 
| 1574 | """Set the CombinedGraphIndex objects to reorder after reordering self. | |
| 1575 |         """
 | |
| 1576 | self._sibling_indices = sibling_combined_graph_indices | |
| 1577 | ||
| 2592.1.31
by Robert Collins Build a combined graph index to use multiple indices at once. | 1578 | def validate(self): | 
| 1579 | """Validate that everything in the index can be accessed.""" | |
| 3789.1.7
by John Arbash Meinel CombinedGraphIndex.validate() will now reload. | 1580 | while True: | 
| 1581 | try: | |
| 1582 | for index in self._indices: | |
| 1583 | index.validate() | |
| 1584 |                 return
 | |
| 1585 | except errors.NoSuchFile: | |
| 1586 | self._reload_or_raise() | |
| 2592.1.38
by Robert Collins Create an InMemoryGraphIndex for temporary indexing. | 1587 | |
| 1588 | ||
| 1589 | class InMemoryGraphIndex(GraphIndexBuilder): | |
| 1590 | """A GraphIndex which operates entirely out of memory and is mutable. | |
| 1591 | ||
| 1592 |     This is designed to allow the accumulation of GraphIndex entries during a
 | |
| 1593 |     single write operation, where the accumulated entries need to be immediately
 | |
| 1594 |     available - for example via a CombinedGraphIndex.
 | |
| 1595 |     """
 | |
| 1596 | ||
| 1597 | def add_nodes(self, nodes): | |
| 1598 | """Add nodes to the index. | |
| 1599 | ||
| 1600 |         :param nodes: An iterable of (key, node_refs, value) entries to add.
 | |
| 1601 |         """
 | |
| 2592.3.39
by Robert Collins Fugly version to remove signatures.kndx | 1602 | if self.reference_lists: | 
| 1603 | for (key, value, node_refs) in nodes: | |
| 1604 | self.add_node(key, value, node_refs) | |
| 1605 | else: | |
| 1606 | for (key, value) in nodes: | |
| 1607 | self.add_node(key, value) | |
| 2592.1.38
by Robert Collins Create an InMemoryGraphIndex for temporary indexing. | 1608 | |
| 1609 | def iter_all_entries(self): | |
| 1610 | """Iterate over all keys within the index | |
| 1611 | ||
| 2592.5.1
by Martin Pool Fix docstrings for Index.iter_entries etc | 1612 |         :return: An iterable of (index, key, reference_lists, value). There is no
 | 
| 2592.1.38
by Robert Collins Create an InMemoryGraphIndex for temporary indexing. | 1613 |             defined order for the result iteration - it will be in the most
 | 
| 1614 |             efficient order for the index (in this case dictionary hash order).
 | |
| 1615 |         """
 | |
| 2745.1.1
by Robert Collins Add a number of -Devil checkpoints. | 1616 | if 'evil' in debug.debug_flags: | 
| 2592.3.112
by Robert Collins Various fixups found dogfooding. | 1617 | trace.mutter_callsite(3, | 
| 2745.1.2
by Robert Collins Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly. | 1618 | "iter_all_entries scales with size of history.") | 
| 2592.1.46
by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method | 1619 | if self.reference_lists: | 
| 1620 | for key, (absent, references, value) in self._nodes.iteritems(): | |
| 1621 | if not absent: | |
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 1622 | yield self, key, value, references | 
| 2592.1.46
by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method | 1623 | else: | 
| 1624 | for key, (absent, references, value) in self._nodes.iteritems(): | |
| 1625 | if not absent: | |
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 1626 | yield self, key, value | 
| 2592.1.38
by Robert Collins Create an InMemoryGraphIndex for temporary indexing. | 1627 | |
| 1628 | def iter_entries(self, keys): | |
| 1629 | """Iterate over keys within the index. | |
| 1630 | ||
| 1631 |         :param keys: An iterable providing the keys to be retrieved.
 | |
| 2979.2.4
by Robert Collins Docstring fixes from review. | 1632 |         :return: An iterable of (index, key, value, reference_lists). There is no
 | 
| 2592.1.38
by Robert Collins Create an InMemoryGraphIndex for temporary indexing. | 1633 |             defined order for the result iteration - it will be in the most
 | 
| 1634 |             efficient order for the index (keys iteration order in this case).
 | |
| 1635 |         """
 | |
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 1636 |         # Note: See BTreeBuilder.iter_entries for an explanation of why we
 | 
| 1637 |         #       aren't using set().intersection() here
 | |
| 1638 | nodes = self._nodes | |
| 1639 | keys = [key for key in keys if key in nodes] | |
| 2592.1.46
by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method | 1640 | if self.reference_lists: | 
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 1641 | for key in keys: | 
| 1642 | node = nodes[key] | |
| 2592.1.46
by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method | 1643 | if not node[0]: | 
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 1644 | yield self, key, node[2], node[1] | 
| 2592.1.46
by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method | 1645 | else: | 
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 1646 | for key in keys: | 
| 1647 | node = nodes[key] | |
| 2592.1.46
by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method | 1648 | if not node[0]: | 
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 1649 | yield self, key, node[2] | 
| 2592.1.38
by Robert Collins Create an InMemoryGraphIndex for temporary indexing. | 1650 | |
| 2624.2.10
by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex. | 1651 | def iter_entries_prefix(self, keys): | 
| 1652 | """Iterate over keys within the index using prefix matching. | |
| 1653 | ||
| 1654 |         Prefix matching is applied within the tuple of a key, not to within
 | |
| 1655 |         the bytestring of each key element. e.g. if you have the keys ('foo',
 | |
| 1656 |         'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
 | |
| 1657 |         only the former key is returned.
 | |
| 1658 | ||
| 1659 |         :param keys: An iterable providing the key prefixes to be retrieved.
 | |
| 1660 |             Each key prefix takes the form of a tuple the length of a key, but
 | |
| 1661 |             with the last N elements 'None' rather than a regular bytestring.
 | |
| 1662 |             The first element cannot be 'None'.
 | |
| 1663 |         :return: An iterable as per iter_all_entries, but restricted to the
 | |
| 1664 |             keys with a matching prefix to those supplied. No additional keys
 | |
| 1665 |             will be returned, and every match that is in the index will be
 | |
| 1666 |             returned.
 | |
| 1667 |         """
 | |
| 1668 |         # XXX: To much duplication with the GraphIndex class; consider finding
 | |
| 1669 |         # a good place to pull out the actual common logic.
 | |
| 1670 | keys = set(keys) | |
| 1671 | if not keys: | |
| 1672 |             return
 | |
| 1673 | if self._key_length == 1: | |
| 1674 | for key in keys: | |
| 1675 |                 # sanity check
 | |
| 1676 | if key[0] is None: | |
| 1677 | raise errors.BadIndexKey(key) | |
| 1678 | if len(key) != self._key_length: | |
| 1679 | raise errors.BadIndexKey(key) | |
| 1680 | node = self._nodes[key] | |
| 1681 | if node[0]: | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 1682 |                     continue
 | 
| 2624.2.10
by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex. | 1683 | if self.reference_lists: | 
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 1684 | yield self, key, node[2], node[1] | 
| 2624.2.10
by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex. | 1685 | else: | 
| 2624.2.17
by Robert Collins Review feedback. | 1686 | yield self, key, node[2] | 
| 2624.2.10
by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex. | 1687 |             return
 | 
| 3644.2.4
by John Arbash Meinel Change GraphIndex to also have a _get_nodes_by_key | 1688 | nodes_by_key = self._get_nodes_by_key() | 
| 2624.2.10
by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex. | 1689 | for key in keys: | 
| 1690 |             # sanity check
 | |
| 1691 | if key[0] is None: | |
| 1692 | raise errors.BadIndexKey(key) | |
| 1693 | if len(key) != self._key_length: | |
| 1694 | raise errors.BadIndexKey(key) | |
| 1695 |             # find what it refers to:
 | |
| 3644.2.4
by John Arbash Meinel Change GraphIndex to also have a _get_nodes_by_key | 1696 | key_dict = nodes_by_key | 
| 2624.2.10
by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex. | 1697 | elements = list(key) | 
| 1698 |             # find the subdict to return
 | |
| 1699 | try: | |
| 1700 | while len(elements) and elements[0] is not None: | |
| 1701 | key_dict = key_dict[elements[0]] | |
| 1702 | elements.pop(0) | |
| 1703 | except KeyError: | |
| 1704 |                 # a non-existant lookup.
 | |
| 1705 |                 continue
 | |
| 1706 | if len(elements): | |
| 1707 | dicts = [key_dict] | |
| 1708 | while dicts: | |
| 1709 | key_dict = dicts.pop(-1) | |
| 1710 |                     # can't be empty or would not exist
 | |
| 1711 | item, value = key_dict.iteritems().next() | |
| 1712 | if type(value) == dict: | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 1713 |                         # push keys
 | 
| 2624.2.10
by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex. | 1714 | dicts.extend(key_dict.itervalues()) | 
| 1715 | else: | |
| 1716 |                         # yield keys
 | |
| 1717 | for value in key_dict.itervalues(): | |
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 1718 | yield (self, ) + value | 
| 2624.2.10
by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex. | 1719 | else: | 
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 1720 | yield (self, ) + key_dict | 
| 2624.2.10
by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex. | 1721 | |
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 1722 | def key_count(self): | 
| 1723 | """Return an estimate of the number of keys in this index. | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 1724 | |
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 1725 |         For InMemoryGraphIndex the estimate is exact.
 | 
| 1726 |         """
 | |
| 4789.28.2
by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute. | 1727 | return len(self._nodes) - len(self._absent_keys) | 
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 1728 | |
| 2592.1.38
by Robert Collins Create an InMemoryGraphIndex for temporary indexing. | 1729 | def validate(self): | 
| 1730 | """In memory index's have no known corruption at the moment.""" | |
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1731 | |
| 1732 | ||
| 1733 | class GraphIndexPrefixAdapter(object): | |
| 1734 | """An adapter between GraphIndex with different key lengths. | |
| 1735 | ||
| 1736 |     Queries against this will emit queries against the adapted Graph with the
 | |
| 1737 |     prefix added, queries for all items use iter_entries_prefix. The returned
 | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 1738 |     nodes will have their keys and node references adjusted to remove the
 | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1739 |     prefix. Finally, an add_nodes_callback can be supplied - when called the
 | 
| 1740 |     nodes and references being added will have prefix prepended.
 | |
| 1741 |     """
 | |
| 1742 | ||
| 2624.2.17
by Robert Collins Review feedback. | 1743 | def __init__(self, adapted, prefix, missing_key_length, | 
| 1744 | add_nodes_callback=None): | |
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1745 | """Construct an adapter against adapted with prefix.""" | 
| 1746 | self.adapted = adapted | |
| 2624.2.19
by Robert Collins Why we should always test before committing. | 1747 | self.prefix_key = prefix + (None,)*missing_key_length | 
| 2624.2.17
by Robert Collins Review feedback. | 1748 | self.prefix = prefix | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1749 | self.prefix_len = len(prefix) | 
| 1750 | self.add_nodes_callback = add_nodes_callback | |
| 1751 | ||
| 2624.2.13
by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter. | 1752 | def add_nodes(self, nodes): | 
| 1753 | """Add nodes to the index. | |
| 1754 | ||
| 1755 |         :param nodes: An iterable of (key, node_refs, value) entries to add.
 | |
| 1756 |         """
 | |
| 1757 |         # save nodes in case its an iterator
 | |
| 1758 | nodes = tuple(nodes) | |
| 1759 | translated_nodes = [] | |
| 1760 | try: | |
| 2624.2.17
by Robert Collins Review feedback. | 1761 |             # Add prefix_key to each reference node_refs is a tuple of tuples,
 | 
| 1762 |             # so split it apart, and add prefix_key to the internal reference
 | |
| 2624.2.13
by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter. | 1763 | for (key, value, node_refs) in nodes: | 
| 1764 | adjusted_references = ( | |
| 2624.2.17
by Robert Collins Review feedback. | 1765 | tuple(tuple(self.prefix + ref_node for ref_node in ref_list) | 
| 2624.2.13
by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter. | 1766 | for ref_list in node_refs)) | 
| 2624.2.17
by Robert Collins Review feedback. | 1767 | translated_nodes.append((self.prefix + key, value, | 
| 2624.2.13
by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter. | 1768 | adjusted_references)) | 
| 1769 | except ValueError: | |
| 1770 |             # XXX: TODO add an explicit interface for getting the reference list
 | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 1771 |             # status, to handle this bit of user-friendliness in the API more
 | 
| 2624.2.13
by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter. | 1772 |             # explicitly.
 | 
| 1773 | for (key, value) in nodes: | |
| 2624.2.17
by Robert Collins Review feedback. | 1774 | translated_nodes.append((self.prefix + key, value)) | 
| 2624.2.13
by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter. | 1775 | self.add_nodes_callback(translated_nodes) | 
| 1776 | ||
| 1777 | def add_node(self, key, value, references=()): | |
| 1778 | """Add a node to the index. | |
| 1779 | ||
| 1780 |         :param key: The key. keys are non-empty tuples containing
 | |
| 1781 |             as many whitespace-free utf8 bytestrings as the key length
 | |
| 1782 |             defined for this index.
 | |
| 1783 |         :param references: An iterable of iterables of keys. Each is a
 | |
| 1784 |             reference to another key.
 | |
| 1785 |         :param value: The value to associate with the key. It may be any
 | |
| 1786 |             bytes as long as it does not contain \0 or \n.
 | |
| 1787 |         """
 | |
| 1788 | self.add_nodes(((key, value, references), )) | |
| 1789 | ||
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1790 | def _strip_prefix(self, an_iter): | 
| 1791 | """Strip prefix data from nodes and return it.""" | |
| 1792 | for node in an_iter: | |
| 1793 |             # cross checks
 | |
| 2624.2.17
by Robert Collins Review feedback. | 1794 | if node[1][:self.prefix_len] != self.prefix: | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1795 | raise errors.BadIndexData(self) | 
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 1796 | for ref_list in node[3]: | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1797 | for ref_node in ref_list: | 
| 2624.2.17
by Robert Collins Review feedback. | 1798 | if ref_node[:self.prefix_len] != self.prefix: | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1799 | raise errors.BadIndexData(self) | 
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 1800 | yield node[0], node[1][self.prefix_len:], node[2], ( | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1801 | tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list) | 
| 2624.2.14
by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data. | 1802 | for ref_list in node[3])) | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1803 | |
| 1804 | def iter_all_entries(self): | |
| 1805 | """Iterate over all keys within the index | |
| 1806 | ||
| 1807 |         iter_all_entries is implemented against the adapted index using
 | |
| 1808 |         iter_entries_prefix.
 | |
| 1809 | ||
| 2592.5.1
by Martin Pool Fix docstrings for Index.iter_entries etc | 1810 |         :return: An iterable of (index, key, reference_lists, value). There is no
 | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1811 |             defined order for the result iteration - it will be in the most
 | 
| 1812 |             efficient order for the index (in this case dictionary hash order).
 | |
| 1813 |         """
 | |
| 2624.2.19
by Robert Collins Why we should always test before committing. | 1814 | return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key])) | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1815 | |
| 1816 | def iter_entries(self, keys): | |
| 1817 | """Iterate over keys within the index. | |
| 1818 | ||
| 1819 |         :param keys: An iterable providing the keys to be retrieved.
 | |
| 2979.2.4
by Robert Collins Docstring fixes from review. | 1820 |         :return: An iterable of (index, key, value, reference_lists). There is no
 | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1821 |             defined order for the result iteration - it will be in the most
 | 
| 1822 |             efficient order for the index (keys iteration order in this case).
 | |
| 1823 |         """
 | |
| 1824 | return self._strip_prefix(self.adapted.iter_entries( | |
| 2624.2.17
by Robert Collins Review feedback. | 1825 | self.prefix + key for key in keys)) | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1826 | |
| 1827 | def iter_entries_prefix(self, keys): | |
| 1828 | """Iterate over keys within the index using prefix matching. | |
| 1829 | ||
| 1830 |         Prefix matching is applied within the tuple of a key, not to within
 | |
| 1831 |         the bytestring of each key element. e.g. if you have the keys ('foo',
 | |
| 1832 |         'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
 | |
| 1833 |         only the former key is returned.
 | |
| 1834 | ||
| 1835 |         :param keys: An iterable providing the key prefixes to be retrieved.
 | |
| 1836 |             Each key prefix takes the form of a tuple the length of a key, but
 | |
| 1837 |             with the last N elements 'None' rather than a regular bytestring.
 | |
| 1838 |             The first element cannot be 'None'.
 | |
| 1839 |         :return: An iterable as per iter_all_entries, but restricted to the
 | |
| 1840 |             keys with a matching prefix to those supplied. No additional keys
 | |
| 1841 |             will be returned, and every match that is in the index will be
 | |
| 1842 |             returned.
 | |
| 1843 |         """
 | |
| 1844 | return self._strip_prefix(self.adapted.iter_entries_prefix( | |
| 2624.2.17
by Robert Collins Review feedback. | 1845 | self.prefix + key for key in keys)) | 
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1846 | |
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 1847 | def key_count(self): | 
| 1848 | """Return an estimate of the number of keys in this index. | |
| 3943.8.1
by Marius Kruger remove all trailing whitespace from bzr source | 1849 | |
| 2624.2.16
by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index. | 1850 |         For GraphIndexPrefixAdapter this is relatively expensive - key
 | 
| 1851 |         iteration with the prefix is done.
 | |
| 1852 |         """
 | |
| 1853 | return len(list(self.iter_all_entries())) | |
| 1854 | ||
| 2624.2.12
by Robert Collins Create an adapter between indices with differing key lengths. | 1855 | def validate(self): | 
| 1856 | """Call the adapted's validate.""" | |
| 1857 | self.adapted.validate() |