/brz/remove-bazaar : contents of breezy/index.py at revision 6656.2.5

: (revision 6656.2.5)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

5752.3.8 by John Arbash Meinel Merge bzr.dev 5764 to resolve release-notes (aka NEWS) conflicts	1	# Copyright (C) 2007-2011 Canonical Ltd
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	16
6379.6.7 by Jelmer Vernooij Move importing from future until after doc string, otherwise the doc string will disappear.	17	"""Indexing facilities."""
	18
6379.6.1 by Jelmer Vernooij Import absolute_import in a few places.	19	from __future__ import absolute_import
	20
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	21	__all__ = [
	22	'CombinedGraphIndex',
	23	'GraphIndex',
	24	'GraphIndexBuilder',
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	25	'GraphIndexPrefixAdapter',
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	26	'InMemoryGraphIndex',
	27	]
2592.1.32 by Robert Collins Add __all__ to index.	28
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	29	from bisect import bisect_right
2592.1.12 by Robert Collins Handle basic node adds.	30	import re
3789.1.3 by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count().	31	import sys
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	32
6624 by Jelmer Vernooĳ Merge Python3 porting work ('py3 pokes')	33	from .lazy_import import lazy_import
2624.2.15 by Robert Collins Add useful -Dindex flag.	34	lazy_import(globals(), """
6622.1.34 by Jelmer Vernooĳ Rename brzlib => breezy.	35	from breezy import (
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	36	bisect_multi,
	37	revision as _mod_revision,
	38	trace,
	39	)
2624.2.15 by Robert Collins Add useful -Dindex flag.	40	""")
6624 by Jelmer Vernooĳ Merge Python3 porting work ('py3 pokes')	41	from . import (
3099.3.3 by John Arbash Meinel Deprecate get_parents() in favor of get_parent_map()	42	debug,
	43	errors,
	44	)
6624 by Jelmer Vernooĳ Merge Python3 porting work ('py3 pokes')	45	from .sixish import (
6621.22.2 by Martin Use BytesIO or StringIO from bzrlib.sixish	46	BytesIO,
6654.1.1 by Martin Factor out some copycode in iter_entries_prefix implementations	47	viewvalues,
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	48	viewitems,
6621.22.2 by Martin Use BytesIO or StringIO from bzrlib.sixish	49	)
6624 by Jelmer Vernooĳ Merge Python3 porting work ('py3 pokes')	50	from .static_tuple import StaticTuple
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	51
2979.1.1 by Robert Collins Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily.	52	_HEADER_READV = (0, 200)
2624.2.8 by Robert Collins Explicitly mark the number of keys elements in use in GraphIndex files.	53	_OPTION_KEY_ELEMENTS = "key_elements="
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	54	_OPTION_LEN = "len="
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	55	_OPTION_NODE_REFS = "node_ref_lists="
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	56	_SIGNATURE = "Bazaar Graph Index 1\n"
	57
	58
2592.1.14 by Robert Collins Detect bad reference key values.	59	_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
2592.1.12 by Robert Collins Handle basic node adds.	60	_newline_null_re = re.compile('[\n\0]')
	61
	62
3830.3.12 by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks	63	def _has_key_from_parent_map(self, key):
	64	"""Check if this index has one key.
	65
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	66	If it's possible to check for multiple keys at once through
3830.3.12 by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks	67	calling get_parent_map that should be faster.
	68	"""
	69	return (key in self.get_parent_map([key]))
	70
3830.3.20 by John Arbash Meinel Minor PEP8 and copyright updates.	71
3830.3.12 by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks	72	def _missing_keys_from_parent_map(self, keys):
	73	return set(keys) - set(self.get_parent_map(keys))
	74
	75
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	76	class GraphIndexBuilder(object):
2592.1.18 by Robert Collins Add space to mark absent nodes.	77	"""A builder that can build a GraphIndex.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	78
5891.1.3 by Andrew Bennetts Move docstring formatting fixes.	79	The resulting graph has the structure::
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	80
5891.1.3 by Andrew Bennetts Move docstring formatting fixes.	81	_SIGNATURE OPTIONS NODES NEWLINE
	82	_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
	83	OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
	84	NODES := NODE*
	85	NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
	86	KEY := Not-whitespace-utf8
	87	ABSENT := 'a'
	88	REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
	89	REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
	90	REFERENCE := DIGITS ; digits is the byte offset in the index of the
	91	; referenced key.
	92	VALUE := no-newline-no-null-bytes
2592.1.18 by Robert Collins Add space to mark absent nodes.	93	"""
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	94
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	95	def __init__(self, reference_lists=0, key_elements=1):
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	96	"""Create a GraphIndex builder.
	97
	98	:param reference_lists: The number of node references lists for each
	99	entry.
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	100	:param key_elements: The number of bytestrings in each key.
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	101	"""
	102	self.reference_lists = reference_lists
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	103	# A dict of {key: (absent, ref_lists, value)}
2592.1.15 by Robert Collins Detect duplicate key insertion.	104	self._nodes = {}
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	105	# Keys that are referenced but not actually present in this index
	106	self._absent_keys = set()
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	107	self._nodes_by_key = None
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	108	self._key_length = key_elements
3777.5.3 by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.	109	self._optimize_for_size = False
4168.3.6 by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().	110	self._combine_backing_indices = True
2624.2.5 by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings.	111
	112	def _check_key(self, key):
	113	"""Raise BadIndexKey if key is not a valid key for this index."""
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	114	if type(key) not in (tuple, StaticTuple):
2624.2.5 by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings.	115	raise errors.BadIndexKey(key)
	116	if self._key_length != len(key):
	117	raise errors.BadIndexKey(key)
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	118	for element in key:
	119	if not element or _whitespace_re.search(element) is not None:
	120	raise errors.BadIndexKey(element)
2592.1.12 by Robert Collins Handle basic node adds.	121
3830.3.5 by Martin Pool GraphIndexBuilder shouldn't know references are for compression so rename	122	def _external_references(self):
	123	"""Return references that are not present in this index.
3830.3.4 by Martin Pool Move _external_compression_references onto the GraphIndexBuilder, and check them for inventories too	124	"""
	125	keys = set()
	126	refs = set()
3830.3.19 by John Arbash Meinel Small update to GraphIndexBuilder._external_references	127	# TODO: JAM 2008-11-21 This makes an assumption about how the reference
	128	# lists are used. It is currently correct for pack-0.92 through
	129	# 1.9, which use the node references (3rd column) second
	130	# reference list as the compression parent. Perhaps this should
	131	# be moved into something higher up the stack, since it
	132	# makes assumptions about how the index is used.
	133	if self.reference_lists > 1:
	134	for node in self.iter_all_entries():
	135	keys.add(node[1])
	136	refs.update(node[3][1])
	137	return refs - keys
	138	else:
	139	# If reference_lists == 0 there can be no external references, and
	140	# if reference_lists == 1, then there isn't a place to store the
	141	# compression parent
	142	return set()
3830.3.4 by Martin Pool Move _external_compression_references onto the GraphIndexBuilder, and check them for inventories too	143
3644.2.4 by John Arbash Meinel Change GraphIndex to also have a _get_nodes_by_key	144	def _get_nodes_by_key(self):
	145	if self._nodes_by_key is None:
	146	nodes_by_key = {}
	147	if self.reference_lists:
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	148	for key, (absent, references, value) in viewitems(self._nodes):
3644.2.4 by John Arbash Meinel Change GraphIndex to also have a _get_nodes_by_key	149	if absent:
	150	continue
	151	key_dict = nodes_by_key
	152	for subkey in key[:-1]:
	153	key_dict = key_dict.setdefault(subkey, {})
	154	key_dict[key[-1]] = key, value, references
	155	else:
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	156	for key, (absent, references, value) in viewitems(self._nodes):
3644.2.4 by John Arbash Meinel Change GraphIndex to also have a _get_nodes_by_key	157	if absent:
	158	continue
	159	key_dict = nodes_by_key
	160	for subkey in key[:-1]:
	161	key_dict = key_dict.setdefault(subkey, {})
	162	key_dict[key[-1]] = key, value
	163	self._nodes_by_key = nodes_by_key
	164	return self._nodes_by_key
	165
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	166	def _update_nodes_by_key(self, key, value, node_refs):
	167	"""Update the _nodes_by_key dict with a new key.
	168
	169	For a key of (foo, bar, baz) create
	170	_nodes_by_key[foo][bar][baz] = key_value
	171	"""
	172	if self._nodes_by_key is None:
	173	return
	174	key_dict = self._nodes_by_key
	175	if self.reference_lists:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	176	key_value = StaticTuple(key, value, node_refs)
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	177	else:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	178	key_value = StaticTuple(key, value)
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	179	for subkey in key[:-1]:
	180	key_dict = key_dict.setdefault(subkey, {})
	181	key_dict[key[-1]] = key_value
	182
3644.2.9 by John Arbash Meinel Refactor some code.	183	def _check_key_ref_value(self, key, references, value):
3644.2.9 by John Arbash Meinel Refactor some code.	184	"""Check that 'key' and 'references' are all valid.
2592.1.12 by Robert Collins Handle basic node adds.	185
3644.2.9 by John Arbash Meinel Refactor some code.	186	:param key: A key tuple. Must conform to the key interface (be a tuple,
	187	be of the right length, not have any whitespace or nulls in any key
	188	element.)
	189	:param references: An iterable of reference lists. Something like
	190	[[(ref, key)], [(ref, key), (other, key)]]
	191	:param value: The value associate with this key. Must not contain
	192	newlines or null characters.
	193	:return: (node_refs, absent_references)
5891.1.3 by Andrew Bennetts Move docstring formatting fixes.	194
	195	* node_refs: basically a packed form of 'references' where all
	196	iterables are tuples
	197	* absent_references: reference keys that are not in self._nodes.
	198	This may contain duplicates if the same key is referenced in
	199	multiple lists.
2592.1.12 by Robert Collins Handle basic node adds.	200	"""
4789.28.1 by John Arbash Meinel Use StaticTuple as part of the builder process.	201	as_st = StaticTuple.from_sequence
2624.2.5 by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings.	202	self._check_key(key)
2592.1.12 by Robert Collins Handle basic node adds.	203	if _newline_null_re.search(value) is not None:
2592.1.12 by Robert Collins Handle basic node adds.	204	raise errors.BadIndexValue(value)
2592.1.13 by Robert Collins Handle mismatched numbers of reference lists.	205	if len(references) != self.reference_lists:
	206	raise errors.BadIndexValue(references)
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	207	node_refs = []
3644.2.9 by John Arbash Meinel Refactor some code.	208	absent_references = []
2592.1.14 by Robert Collins Detect bad reference key values.	209	for reference_list in references:
	210	for reference in reference_list:
3644.2.9 by John Arbash Meinel Refactor some code.	211	# If reference is in self._nodes, then we know it has already
3644.2.9 by John Arbash Meinel Refactor some code.	212	# been checked.
2592.1.25 by Robert Collins Fix and tune node offset calculation.	213	if reference not in self._nodes:
3644.2.9 by John Arbash Meinel Refactor some code.	214	self._check_key(reference)
3644.2.9 by John Arbash Meinel Refactor some code.	215	absent_references.append(reference)
4848.1.1 by John Arbash Meinel Track down one more location that needs casting to static tuple for the new builder code	216	reference_list = as_st([as_st(ref).intern()
	217	for ref in reference_list])
	218	node_refs.append(reference_list)
4789.28.1 by John Arbash Meinel Use StaticTuple as part of the builder process.	219	return as_st(node_refs), absent_references
3644.2.9 by John Arbash Meinel Refactor some code.	220
	221	def add_node(self, key, value, references=()):
	222	"""Add a node to the index.
	223
	224	:param key: The key. keys are non-empty tuples containing
	225	as many whitespace-free utf8 bytestrings as the key length
	226	defined for this index.
	227	:param references: An iterable of iterables of keys. Each is a
	228	reference to another key.
	229	:param value: The value to associate with the key. It may be any
5891.1.3 by Andrew Bennetts Move docstring formatting fixes.	230	bytes as long as it does not contain \\0 or \\n.
3644.2.9 by John Arbash Meinel Refactor some code.	231	"""
	232	(node_refs,
	233	absent_references) = self._check_key_ref_value(key, references, value)
	234	if key in self._nodes and self._nodes[key][0] != 'a':
2592.1.15 by Robert Collins Detect duplicate key insertion.	235	raise errors.BadIndexDuplicateKey(key, self)
3644.2.9 by John Arbash Meinel Refactor some code.	236	for reference in absent_references:
	237	# There may be duplicates, but I don't think it is worth worrying
	238	# about
	239	self._nodes[reference] = ('a', (), '')
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	240	self._absent_keys.update(absent_references)
	241	self._absent_keys.discard(key)
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	242	self._nodes[key] = ('', node_refs, value)
3644.2.9 by John Arbash Meinel Refactor some code.	243	if self._nodes_by_key is not None and self._key_length > 1:
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	244	self._update_nodes_by_key(key, value, node_refs)
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	245
4744.2.7 by John Arbash Meinel Add .clear_cache() members to GraphIndexBuilder and BTreeBuilder.	246	def clear_cache(self):
	247	"""See GraphIndex.clear_cache()
	248
	249	This is a no-op, but we need the api to conform to a generic 'Index'
	250	abstraction.
	251	"""
	252
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	253	def finish(self):
6006.4.5 by Martin Pool Flush pack, index, and dirstate files to disk on closing	254	"""Finish the index.
	255
6621.22.2 by Martin Use BytesIO or StringIO from bzrlib.sixish	256	:returns: cBytesIO holding the full context of the index as it
6006.4.5 by Martin Pool Flush pack, index, and dirstate files to disk on closing	257	should be written to disk.
	258	"""
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	259	lines = [_SIGNATURE]
	260	lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
2624.2.8 by Robert Collins Explicitly mark the number of keys elements in use in GraphIndex files.	261	lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	262	key_count = len(self._nodes) - len(self._absent_keys)
	263	lines.append(_OPTION_LEN + str(key_count) + '\n')
2624.2.11 by Robert Collins Review comments.	264	prefix_length = sum(len(x) for x in lines)
2592.1.22 by Robert Collins Node references are byte offsets.	265	# references are byte offsets. To avoid having to do nasty
3644.2.9 by John Arbash Meinel Refactor some code.	266	# polynomial work to resolve offsets (references to later in the
2592.1.22 by Robert Collins Node references are byte offsets.	267	# file cannot be determined until all the inbetween references have
	268	# been calculated too) we pad the offsets with 0's to make them be
	269	# of consistent length. Using binary offsets would break the trivial
	270	# file parsing.
	271	# to calculate the width of zero's needed we do three passes:
	272	# one to gather all the non-reference data and the number of references.
	273	# one to pad all the data with reference-length and determine entry
	274	# addresses.
	275	# One to serialise.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	276
2592.1.40 by Robert Collins Reverse index ordering - we do not have date prefixed revids.	277	# forward sorted by key. In future we may consider topological sorting,
	278	# at the cost of table scans for direct lookup, or a second index for
	279	# direct lookup
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	280	nodes = sorted(viewitems(self._nodes))
2592.1.42 by Robert Collins Check the index length is as expected, when we have done preprocessing.	281	# if we do not prepass, we don't know how long it will be up front.
	282	expected_bytes = None
2592.1.25 by Robert Collins Fix and tune node offset calculation.	283	# we only need to pre-pass if we have reference lists at all.
	284	if self.reference_lists:
2592.1.41 by Robert Collins Remove duplication in the index serialisation logic with John's suggestion.	285	key_offset_info = []
2592.1.25 by Robert Collins Fix and tune node offset calculation.	286	non_ref_bytes = prefix_length
	287	total_references = 0
	288	# TODO use simple multiplication for the constants in this loop.
	289	for key, (absent, references, value) in nodes:
2592.1.41 by Robert Collins Remove duplication in the index serialisation logic with John's suggestion.	290	# record the offset known so far for this key:
	291	# the non reference bytes to date, and the total references to
	292	# date - saves reaccumulating on the second pass
	293	key_offset_info.append((key, non_ref_bytes, total_references))
2592.1.25 by Robert Collins Fix and tune node offset calculation.	294	# key is literal, value is literal, there are 3 null's, 1 NL
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	295	# key is variable length tuple, \x00 between elements
2624.2.5 by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings.	296	non_ref_bytes += sum(len(element) for element in key)
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	297	if self._key_length > 1:
	298	non_ref_bytes += self._key_length - 1
2624.2.5 by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings.	299	# value is literal bytes, there are 3 null's, 1 NL.
	300	non_ref_bytes += len(value) + 3 + 1
2592.1.25 by Robert Collins Fix and tune node offset calculation.	301	# one byte for absent if set.
	302	if absent:
	303	non_ref_bytes += 1
2592.1.36 by Robert Collins Bugfix incorrect offset generation when an absent record is before a referenced record.	304	elif self.reference_lists:
2592.1.25 by Robert Collins Fix and tune node offset calculation.	305	# (ref_lists -1) tabs
	306	non_ref_bytes += self.reference_lists - 1
	307	# (ref-1 cr's per ref_list)
	308	for ref_list in references:
	309	# how many references across the whole file?
	310	total_references += len(ref_list)
	311	# accrue reference separators
	312	if ref_list:
	313	non_ref_bytes += len(ref_list) - 1
	314	# how many digits are needed to represent the total byte count?
	315	digits = 1
2592.1.22 by Robert Collins Node references are byte offsets.	316	possible_total_bytes = non_ref_bytes + total_references*digits
2592.1.25 by Robert Collins Fix and tune node offset calculation.	317	while 10 ** digits < possible_total_bytes:
	318	digits += 1
	319	possible_total_bytes = non_ref_bytes + total_references*digits
2592.1.42 by Robert Collins Check the index length is as expected, when we have done preprocessing.	320	expected_bytes = possible_total_bytes + 1 # terminating newline
2592.1.25 by Robert Collins Fix and tune node offset calculation.	321	# resolve key addresses.
	322	key_addresses = {}
2592.1.41 by Robert Collins Remove duplication in the index serialisation logic with John's suggestion.	323	for key, non_ref_bytes, total_references in key_offset_info:
	324	key_addresses[key] = non_ref_bytes + total_references*digits
2592.1.25 by Robert Collins Fix and tune node offset calculation.	325	# serialise
	326	format_string = '%%0%sd' % digits
	327	for key, (absent, references, value) in nodes:
2592.1.19 by Robert Collins Node references are tab separated.	328	flattened_references = []
	329	for ref_list in references:
2592.1.22 by Robert Collins Node references are byte offsets.	330	ref_addresses = []
	331	for reference in ref_list:
	332	ref_addresses.append(format_string % key_addresses[reference])
	333	flattened_references.append('\r'.join(ref_addresses))
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	334	string_key = '\x00'.join(key)
2624.2.11 by Robert Collins Review comments.	335	lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
2592.1.19 by Robert Collins Node references are tab separated.	336	'\t'.join(flattened_references), value))
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	337	lines.append('\n')
6621.22.2 by Martin Use BytesIO or StringIO from bzrlib.sixish	338	result = BytesIO(''.join(lines))
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	339	if expected_bytes and len(result.getvalue()) != expected_bytes:
	340	raise errors.BzrError('Failed index creation. Internal error:'
	341	' mismatched output length and expected length: %d %d' %
	342	(len(result.getvalue()), expected_bytes))
3498.1.1 by James Westby Don't join the lines of the index twice.	343	return result
2592.1.5 by Robert Collins Trivial index reading.	344
4168.3.6 by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().	345	def set_optimize(self, for_size=None, combine_backing_indices=None):
3777.5.3 by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.	346	"""Change how the builder tries to optimize the result.
	347
	348	:param for_size: Tell the builder to try and make the index as small as
	349	possible.
4168.3.6 by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().	350	:param combine_backing_indices: If the builder spills to disk to save
	351	memory, should the on-disk indices be combined. Set to True if you
	352	are going to be probing the index, but to False if you are not. (If
	353	you are not querying, then the time spent combining is wasted.)
3777.5.3 by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.	354	:return: None
	355	"""
	356	# GraphIndexBuilder itself doesn't pay attention to the flag yet, but
	357	# other builders do.
4168.3.6 by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().	358	if for_size is not None:
	359	self._optimize_for_size = for_size
	360	if combine_backing_indices is not None:
	361	self._combine_backing_indices = combine_backing_indices
3777.5.3 by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.	362
4593.5.37 by John Arbash Meinel Finish implementation tests.	363	def find_ancestry(self, keys, ref_list_num):
	364	"""See CombinedGraphIndex.find_ancestry()"""
	365	pending = set(keys)
	366	parent_map = {}
	367	missing_keys = set()
	368	while pending:
	369	next_pending = set()
	370	for _, key, value, ref_lists in self.iter_entries(pending):
	371	parent_keys = ref_lists[ref_list_num]
	372	parent_map[key] = parent_keys
	373	next_pending.update([p for p in parent_keys if p not in
	374	parent_map])
	375	missing_keys.update(pending.difference(parent_map))
	376	pending = next_pending
	377	return parent_map, missing_keys
	378
2592.1.5 by Robert Collins Trivial index reading.	379
	380	class GraphIndex(object):
	381	"""An index for data with embedded graphs.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	382
2592.1.10 by Robert Collins Make validate detect node reference parsing errors.	383	The index maps keys to a list of key reference lists, and a value.
	384	Each node has the same number of key reference lists. Each key reference
	385	list can be empty or an arbitrary length. The value is an opaque NULL
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	386	terminated string without any newlines. The storage of the index is
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	387	hidden in the interface: keys and key references are always tuples of
	388	bytestrings, never the internal representation (e.g. dictionary offsets).
2592.1.30 by Robert Collins Absent entries are not yeilded.	389
2592.1.30 by Robert Collins Absent entries are not yeilded.	390	It is presumed that the index will not be mutated - it is static data.
2592.1.34 by Robert Collins Cleanup docs.	391
2592.1.44 by Robert Collins Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.	392	Successive iter_all_entries calls will read the entire index each time.
	393	Additionally, iter_entries calls will read the index linearly until the
	394	desired keys are found. XXX: This must be fixed before the index is
2592.1.34 by Robert Collins Cleanup docs.	395	suitable for production use. :XXX
2592.1.5 by Robert Collins Trivial index reading.	396	"""
2592.1.5 by Robert Collins Trivial index reading.	397
5074.4.2 by John Arbash Meinel Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now.	398	def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
2592.1.5 by Robert Collins Trivial index reading.	399	"""Open an index called name on transport.
2592.1.5 by Robert Collins Trivial index reading.	400
6622.1.34 by Jelmer Vernooĳ Rename brzlib => breezy.	401	:param transport: A breezy.transport.Transport.
2592.1.5 by Robert Collins Trivial index reading.	402	:param name: A path to provide to transport API calls.
2890.2.1 by Robert Collins * ``bzrlib.index.GraphIndex`` now requires a size parameter to the	403	:param size: The size of the index in bytes. This is used for bisection
	404	logic to perform partial index reads. While the size could be
	405	obtained by statting the file this introduced an additional round
2890.2.8 by Robert Collins Make the size of the index optionally None for the pack-names index.	406	trip as well as requiring stat'able transports, both of which are
	407	avoided by having it supplied. If size is None, then bisection
	408	support will be disabled and accessing the index will just stream
	409	all the data.
5074.4.2 by John Arbash Meinel Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now.	410	:param offset: Instead of starting the index data at offset 0, start it
	411	at an arbitrary offset.
2592.1.5 by Robert Collins Trivial index reading.	412	"""
	413	self._transport = transport
	414	self._name = name
2890.2.16 by Robert Collins Review feedback.	415	# Becomes a dict of key:(value, reference-list-byte-locations) used by
2890.2.16 by Robert Collins Review feedback.	416	# the bisection interface to store parsed but not resolved keys.
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	417	self._bisect_nodes = None
2890.2.16 by Robert Collins Review feedback.	418	# Becomes a dict of key:(value, reference-list-keys) which are ready to
2890.2.16 by Robert Collins Review feedback.	419	# be returned directly to callers.
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	420	self._nodes = None
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	421	# a sorted list of slice-addresses for the parsed bytes of the file.
	422	# e.g. (0,1) would mean that byte 0 is parsed.
2890.2.2 by Robert Collins Opening an index creates a map for the parsed bytes.	423	self._parsed_byte_map = []
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	424	# a sorted list of keys matching each slice address for parsed bytes
	425	# e.g. (None, 'foo@bar') would mean that the first byte contained no
	426	# key, and the end byte of the slice is the of the data for 'foo@bar'
	427	self._parsed_key_map = []
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	428	self._key_count = None
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	429	self._keys_by_offset = None
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	430	self._nodes_by_key = None
2890.2.1 by Robert Collins * ``bzrlib.index.GraphIndex`` now requires a size parameter to the	431	self._size = size
3665.3.3 by John Arbash Meinel If we read more than 50% of the whole index,	432	# The number of bytes we've read so far in trying to process this file
	433	self._bytes_read = 0
5074.4.2 by John Arbash Meinel Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now.	434	self._base_offset = offset
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	435
2592.3.176 by Robert Collins Various pack refactorings.	436	def __eq__(self, other):
2592.3.215 by Robert Collins Review feedback.	437	"""Equal when self and other were created with the same parameters."""
2592.3.176 by Robert Collins Various pack refactorings.	438	return (
6619.3.18 by Jelmer Vernooĳ Run 2to3 idioms fixer.	439	isinstance(self, type(other)) and
2592.3.176 by Robert Collins Various pack refactorings.	440	self._transport == other._transport and
	441	self._name == other._name and
	442	self._size == other._size)
	443
	444	def __ne__(self, other):
	445	return not self.__eq__(other)
	446
3517.4.13 by Martin Pool Add repr methods	447	def __repr__(self):
	448	return "%s(%r)" % (self.__class__.__name__,
	449	self._transport.abspath(self._name))
	450
3665.3.1 by John Arbash Meinel Updates to GraphIndex processing.	451	def _buffer_all(self, stream=None):
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	452	"""Buffer all the index data.
	453
	454	Mutates self._nodes and self.keys_by_offset.
2592.1.5 by Robert Collins Trivial index reading.	455	"""
3665.3.1 by John Arbash Meinel Updates to GraphIndex processing.	456	if self._nodes is not None:
	457	# We already did this
	458	return
2624.2.15 by Robert Collins Add useful -Dindex flag.	459	if 'index' in debug.debug_flags:
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	460	trace.mutter('Reading entire index %s',
	461	self._transport.abspath(self._name))
3665.3.1 by John Arbash Meinel Updates to GraphIndex processing.	462	if stream is None:
	463	stream = self._transport.get(self._name)
5074.4.3 by John Arbash Meinel Actually implement offset support for GraphIndex.	464	if self._base_offset != 0:
	465	# This is wasteful, but it is better than dealing with
	466	# adjusting all the offsets, etc.
6621.22.2 by Martin Use BytesIO or StringIO from bzrlib.sixish	467	stream = BytesIO(stream.read()[self._base_offset:])
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	468	self._read_prefix(stream)
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	469	self._expected_elements = 3 + self._key_length
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	470	line_count = 0
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	471	# raw data keyed by offset
	472	self._keys_by_offset = {}
	473	# ready-to-return key:value or key:value, node_ref_lists
	474	self._nodes = {}
3711.3.13 by John Arbash Meinel Shave off another 5s by not building 'node_by_key'	475	self._nodes_by_key = None
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	476	trailers = 0
	477	pos = stream.tell()
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	478	lines = stream.read().split('\n')
4708.2.1 by Martin Ensure all files opened by bazaar proper are explicitly closed	479	# GZ 2009-09-20: Should really use a try/finally block to ensure close
	480	stream.close()
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	481	del lines[-1]
	482	_, _, _, trailers = self._parse_lines(lines, pos)
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	483	for key, absent, references, value in viewvalues(self._keys_by_offset):
2592.1.30 by Robert Collins Absent entries are not yeilded.	484	if absent:
2592.1.30 by Robert Collins Absent entries are not yeilded.	485	continue
2592.1.28 by Robert Collins Basic two pass iter_all_entries.	486	# resolve references:
	487	if self.node_ref_lists:
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	488	node_value = (value, self._resolve_references(references))
2592.1.28 by Robert Collins Basic two pass iter_all_entries.	489	else:
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	490	node_value = value
	491	self._nodes[key] = node_value
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	492	# cache the keys for quick set intersections
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	493	if trailers != 1:
	494	# there must be one line - the empty trailer line.
	495	raise errors.BadIndexData(self)
	496
4744.2.6 by John Arbash Meinel Start exposing an GraphIndex.clear_cache() member.	497	def clear_cache(self):
	498	"""Clear out any cached/memoized values.
	499
	500	This can be called at any time, but generally it is used when we have
	501	extracted some information, but don't expect to be requesting any more
	502	from this index.
	503	"""
	504
4011.5.11 by Robert Collins Polish the KnitVersionedFiles.scan_unvalidated_index api.	505	def external_references(self, ref_list_num):
4011.5.2 by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references()	506	"""Return references that are not present in this index.
	507	"""
	508	self._buffer_all()
4011.5.3 by Andrew Bennetts Implement and test external_references on GraphIndex and BTreeGraphIndex.	509	if ref_list_num + 1 > self.node_ref_lists:
	510	raise ValueError('No ref list %d, index has %d ref lists'
	511	% (ref_list_num, self.node_ref_lists))
4011.5.2 by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references()	512	refs = set()
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	513	nodes = self._nodes
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	514	for key, (value, ref_lists) in viewitems(nodes):
4011.5.2 by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references()	515	ref_list = ref_lists[ref_list_num]
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	516	refs.update([ref for ref in ref_list if ref not in nodes])
	517	return refs
4011.5.2 by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references()	518
3711.3.21 by John Arbash Meinel Fix GraphIndex to properly generate _nodes_by_keys on demand.	519	def _get_nodes_by_key(self):
	520	if self._nodes_by_key is None:
	521	nodes_by_key = {}
	522	if self.node_ref_lists:
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	523	for key, (value, references) in viewitems(self._nodes):
3711.3.21 by John Arbash Meinel Fix GraphIndex to properly generate _nodes_by_keys on demand.	524	key_dict = nodes_by_key
	525	for subkey in key[:-1]:
	526	key_dict = key_dict.setdefault(subkey, {})
	527	key_dict[key[-1]] = key, value, references
	528	else:
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	529	for key, value in viewitems(self._nodes):
3711.3.21 by John Arbash Meinel Fix GraphIndex to properly generate _nodes_by_keys on demand.	530	key_dict = nodes_by_key
	531	for subkey in key[:-1]:
	532	key_dict = key_dict.setdefault(subkey, {})
	533	key_dict[key[-1]] = key, value
	534	self._nodes_by_key = nodes_by_key
	535	return self._nodes_by_key
	536
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	537	def iter_all_entries(self):
	538	"""Iterate over all keys within the index.
	539
2592.5.1 by Martin Pool Fix docstrings for Index.iter_entries etc	540	:return: An iterable of (index, key, value) or (index, key, value, reference_lists).
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	541	The former tuple is used when there are no reference lists in the
	542	index, making the API compatible with simple key:value index types.
	543	There is no defined order for the result iteration - it will be in
	544	the most efficient order for the index.
	545	"""
2745.1.1 by Robert Collins Add a number of -Devil checkpoints.	546	if 'evil' in debug.debug_flags:
2592.3.112 by Robert Collins Various fixups found dogfooding.	547	trace.mutter_callsite(3,
2745.1.2 by Robert Collins Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly.	548	"iter_all_entries scales with size of history.")
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	549	if self._nodes is None:
	550	self._buffer_all()
	551	if self.node_ref_lists:
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	552	for key, (value, node_ref_lists) in viewitems(self._nodes):
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	553	yield self, key, value, node_ref_lists
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	554	else:
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	555	for key, value in viewitems(self._nodes):
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	556	yield self, key, value
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	557
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	558	def _read_prefix(self, stream):
	559	signature = stream.read(len(self._signature()))
	560	if not signature == self._signature():
	561	raise errors.BadIndexFormatSignature(self._name, GraphIndex)
	562	options_line = stream.readline()
	563	if not options_line.startswith(_OPTION_NODE_REFS):
	564	raise errors.BadIndexOptions(self)
	565	try:
	566	self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
	567	except ValueError:
	568	raise errors.BadIndexOptions(self)
2624.2.8 by Robert Collins Explicitly mark the number of keys elements in use in GraphIndex files.	569	options_line = stream.readline()
	570	if not options_line.startswith(_OPTION_KEY_ELEMENTS):
	571	raise errors.BadIndexOptions(self)
	572	try:
	573	self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
	574	except ValueError:
	575	raise errors.BadIndexOptions(self)
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	576	options_line = stream.readline()
	577	if not options_line.startswith(_OPTION_LEN):
	578	raise errors.BadIndexOptions(self)
	579	try:
	580	self._key_count = int(options_line[len(_OPTION_LEN):-1])
	581	except ValueError:
	582	raise errors.BadIndexOptions(self)
2592.1.5 by Robert Collins Trivial index reading.	583
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	584	def _resolve_references(self, references):
2890.2.16 by Robert Collins Review feedback.	585	"""Return the resolved key references for references.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	586
2890.2.16 by Robert Collins Review feedback.	587	References are resolved by looking up the location of the key in the
	588	_keys_by_offset map and substituting the key name, preserving ordering.
	589
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	590	:param references: An iterable of iterables of key locations. e.g.
2890.2.16 by Robert Collins Review feedback.	591	[[123, 456], [123]]
	592	:return: A tuple of tuples of keys.
	593	"""
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	594	node_refs = []
	595	for ref_list in references:
	596	node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
	597	return tuple(node_refs)
	598
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	599	def _find_index(self, range_map, key):
	600	"""Helper for the _parsed_*_index calls.
	601
	602	Given a range map - [(start, end), ...], finds the index of the range
	603	in the map for key if it is in the map, and if it is not there, the
	604	immediately preceeding range in the map.
	605	"""
	606	result = bisect_right(range_map, key) - 1
	607	if result + 1 < len(range_map):
	608	# check the border condition, it may be in result + 1
	609	if range_map[result + 1][0] == key[0]:
	610	return result + 1
	611	return result
	612
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	613	def _parsed_byte_index(self, offset):
	614	"""Return the index of the entry immediately before offset.
	615
	616	e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that
	617	there is one unparsed byte (the 11th, addressed as[10]). then:
	618	asking for 0 will return 0
	619	asking for 10 will return 0
	620	asking for 11 will return 1
	621	asking for 12 will return 1
	622	"""
	623	key = (offset, 0)
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	624	return self._find_index(self._parsed_byte_map, key)
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	625
	626	def _parsed_key_index(self, key):
	627	"""Return the index of the entry immediately before key.
	628
	629	e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed,
	630	meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive
	631	have been parsed, then:
	632	asking for '' will return 0
	633	asking for 'a' will return 0
	634	asking for 'b' will return 1
	635	asking for 'e' will return 1
	636	"""
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	637	search_key = (key, None)
	638	return self._find_index(self._parsed_key_map, search_key)
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	639
	640	def _is_parsed(self, offset):
	641	"""Returns True if offset has been parsed."""
	642	index = self._parsed_byte_index(offset)
	643	if index == len(self._parsed_byte_map):
	644	return offset < self._parsed_byte_map[index - 1][1]
	645	start, end = self._parsed_byte_map[index]
	646	return offset >= start and offset < end
	647
2890.2.7 by Robert Collins * Pack indices are now partially parsed for specific key lookup using a	648	def _iter_entries_from_total_buffer(self, keys):
	649	"""Iterate over keys when the entire index is parsed."""
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	650	# Note: See the note in BTreeBuilder.iter_entries for why we don't use
	651	# .intersection() here
	652	nodes = self._nodes
	653	keys = [key for key in keys if key in nodes]
2624.2.3 by Robert Collins Make GraphIndex.iter_entries do hash lookups rather than table scans.	654	if self.node_ref_lists:
	655	for key in keys:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	656	value, node_refs = nodes[key]
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	657	yield self, key, value, node_refs
2624.2.3 by Robert Collins Make GraphIndex.iter_entries do hash lookups rather than table scans.	658	else:
	659	for key in keys:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	660	yield self, key, nodes[key]
2592.1.7 by Robert Collins A validate that goes boom.	661
2890.2.7 by Robert Collins * Pack indices are now partially parsed for specific key lookup using a	662	def iter_entries(self, keys):
	663	"""Iterate over keys within the index.
	664
	665	:param keys: An iterable providing the keys to be retrieved.
	666	:return: An iterable as per iter_all_entries, but restricted to the
	667	keys supplied. No additional keys will be returned, and every
	668	key supplied that is in the index will be returned.
	669	"""
	670	keys = set(keys)
	671	if not keys:
	672	return []
2890.2.8 by Robert Collins Make the size of the index optionally None for the pack-names index.	673	if self._size is None and self._nodes is None:
	674	self._buffer_all()
3665.3.3 by John Arbash Meinel If we read more than 50% of the whole index,	675
3606.6.1 by Robert Collins Cherry-pick Robert's index buffering.	676	# We fit about 20 keys per minimum-read (4K), so if we are looking for
	677	# more than 1/20th of the index its likely (assuming homogenous key
	678	# spread) that we'll read the entire index. If we're going to do that,
	679	# buffer the whole thing. A better analysis might take key spread into
	680	# account - but B+Tree indices are better anyway.
	681	# We could look at all data read, and use a threshold there, which will
	682	# trigger on ancestry walks, but that is not yet fully mapped out.
	683	if self._nodes is None and len(keys) * 20 > self.key_count():
	684	self._buffer_all()
2890.2.7 by Robert Collins * Pack indices are now partially parsed for specific key lookup using a	685	if self._nodes is not None:
	686	return self._iter_entries_from_total_buffer(keys)
	687	else:
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	688	return (result[1] for result in bisect_multi.bisect_multi_bytes(
2890.2.18 by Robert Collins Review feedback.	689	self._lookup_keys_via_location, self._size, keys))
2890.2.7 by Robert Collins * Pack indices are now partially parsed for specific key lookup using a	690
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	691	def iter_entries_prefix(self, keys):
	692	"""Iterate over keys within the index using prefix matching.
	693
	694	Prefix matching is applied within the tuple of a key, not to within
	695	the bytestring of each key element. e.g. if you have the keys ('foo',
	696	'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
	697	only the former key is returned.
	698
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	699	WARNING: Note that this method currently causes a full index parse
	700	unconditionally (which is reasonably appropriate as it is a means for
	701	thunking many small indices into one larger one and still supplies
	702	iter_all_entries at the thunk layer).
	703
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	704	:param keys: An iterable providing the key prefixes to be retrieved.
	705	Each key prefix takes the form of a tuple the length of a key, but
	706	with the last N elements 'None' rather than a regular bytestring.
	707	The first element cannot be 'None'.
	708	:return: An iterable as per iter_all_entries, but restricted to the
	709	keys with a matching prefix to those supplied. No additional keys
	710	will be returned, and every match that is in the index will be
	711	returned.
	712	"""
	713	keys = set(keys)
	714	if not keys:
	715	return
	716	# load data - also finds key lengths
	717	if self._nodes is None:
	718	self._buffer_all()
	719	if self._key_length == 1:
	720	for key in keys:
6654.1.1 by Martin Factor out some copycode in iter_entries_prefix implementations	721	_sanity_check_key(self, key)
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	722	if self.node_ref_lists:
	723	value, node_refs = self._nodes[key]
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	724	yield self, key, value, node_refs
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	725	else:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	726	yield self, key, self._nodes[key]
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	727	return
3711.3.21 by John Arbash Meinel Fix GraphIndex to properly generate _nodes_by_keys on demand.	728	nodes_by_key = self._get_nodes_by_key()
6654.1.1 by Martin Factor out some copycode in iter_entries_prefix implementations	729	for entry in _iter_entries_prefix(self, nodes_by_key, keys):
	730	yield entry
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	731
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	732	def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
	733	"""See BTreeIndex._find_ancestors."""
4593.4.7 by John Arbash Meinel Basic implementation of a conforming interface for GraphIndex.	734	# The api can be implemented as a trivial overlay on top of
	735	# iter_entries, it is not an efficient implementation, but it at least
	736	# gets the job done.
	737	found_keys = set()
	738	search_keys = set()
	739	for index, key, value, refs in self.iter_entries(keys):
	740	parent_keys = refs[ref_list_num]
	741	found_keys.add(key)
	742	parent_map[key] = parent_keys
	743	search_keys.update(parent_keys)
	744	# Figure out what, if anything, was missing
	745	missing_keys.update(set(keys).difference(found_keys))
	746	search_keys = search_keys.difference(parent_map)
	747	return search_keys
	748
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	749	def key_count(self):
	750	"""Return an estimate of the number of keys in this index.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	751
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	752	For GraphIndex the estimate is exact.
	753	"""
	754	if self._key_count is None:
2979.1.1 by Robert Collins Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily.	755	self._read_and_parse([_HEADER_READV])
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	756	return self._key_count
	757
2890.2.18 by Robert Collins Review feedback.	758	def _lookup_keys_via_location(self, location_keys):
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	759	"""Public interface for implementing bisection.
	760
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	761	If _buffer_all has been called, then all the data for the index is in
	762	memory, and this method should not be called, as it uses a separate
	763	cache because it cannot pre-resolve all indices, which buffer_all does
	764	for performance.
	765
2890.2.16 by Robert Collins Review feedback.	766	:param location_keys: A list of location(byte offset), key tuples.
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	767	:return: A list of (location_key, result) tuples as expected by
6622.1.34 by Jelmer Vernooĳ Rename brzlib => breezy.	768	breezy.bisect_multi.bisect_multi_bytes.
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	769	"""
	770	# Possible improvements:
	771	# - only bisect lookup each key once
	772	# - sort the keys first, and use that to reduce the bisection window
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	773	# -----
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	774	# this progresses in three parts:
	775	# read data
	776	# parse it
	777	# attempt to answer the question from the now in memory data.
	778	# build the readv request
	779	# for each location, ask for 800 bytes - much more than rows we've seen
	780	# anywhere.
	781	readv_ranges = []
	782	for location, key in location_keys:
	783	# can we answer from cache?
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	784	if self._bisect_nodes and key in self._bisect_nodes:
	785	# We have the key parsed.
	786	continue
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	787	index = self._parsed_key_index(key)
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	788	if (len(self._parsed_key_map) and
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	789	self._parsed_key_map[index][0] <= key and
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	790	(self._parsed_key_map[index][1] >= key or
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	791	# end of the file has been parsed
	792	self._parsed_byte_map[index][1] == self._size)):
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	793	# the key has been parsed, so no lookup is needed even if its
	794	# not present.
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	795	continue
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	796	# - if we have examined this part of the file already - yes
	797	index = self._parsed_byte_index(location)
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	798	if (len(self._parsed_byte_map) and
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	799	self._parsed_byte_map[index][0] <= location and
	800	self._parsed_byte_map[index][1] > location):
	801	# the byte region has been parsed, so no read is needed.
	802	continue
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	803	length = 800
	804	if location + length > self._size:
	805	length = self._size - location
	806	# todo, trim out parsed locations.
	807	if length > 0:
	808	readv_ranges.append((location, length))
	809	# read the header if needed
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	810	if self._bisect_nodes is None:
2979.1.1 by Robert Collins Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily.	811	readv_ranges.append(_HEADER_READV)
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	812	self._read_and_parse(readv_ranges)
3665.3.1 by John Arbash Meinel Updates to GraphIndex processing.	813	result = []
	814	if self._nodes is not None:
	815	# _read_and_parse triggered a _buffer_all because we requested the
	816	# whole data range
	817	for location, key in location_keys:
	818	if key not in self._nodes: # not present
	819	result.append(((location, key), False))
	820	elif self.node_ref_lists:
	821	value, refs = self._nodes[key]
	822	result.append(((location, key),
	823	(self, key, value, refs)))
	824	else:
	825	result.append(((location, key),
	826	(self, key, self._nodes[key])))
	827	return result
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	828	# generate results:
	829	# - figure out <, >, missing, present
	830	# - result present references so we can return them.
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	831	# keys that we cannot answer until we resolve references
	832	pending_references = []
	833	pending_locations = set()
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	834	for location, key in location_keys:
	835	# can we answer from cache?
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	836	if key in self._bisect_nodes:
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	837	# the key has been parsed, so no lookup is needed
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	838	if self.node_ref_lists:
	839	# the references may not have been all parsed.
	840	value, refs = self._bisect_nodes[key]
	841	wanted_locations = []
	842	for ref_list in refs:
	843	for ref in ref_list:
	844	if ref not in self._keys_by_offset:
	845	wanted_locations.append(ref)
	846	if wanted_locations:
	847	pending_locations.update(wanted_locations)
	848	pending_references.append((location, key))
	849	continue
	850	result.append(((location, key), (self, key,
	851	value, self._resolve_references(refs))))
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	852	else:
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	853	result.append(((location, key),
	854	(self, key, self._bisect_nodes[key])))
	855	continue
	856	else:
	857	# has the region the key should be in, been parsed?
	858	index = self._parsed_key_index(key)
	859	if (self._parsed_key_map[index][0] <= key and
	860	(self._parsed_key_map[index][1] >= key or
	861	# end of the file has been parsed
	862	self._parsed_byte_map[index][1] == self._size)):
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	863	result.append(((location, key), False))
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	864	continue
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	865	# no, is the key above or below the probed location:
	866	# get the range of the probed & parsed location
	867	index = self._parsed_byte_index(location)
	868	# if the key is below the start of the range, its below
	869	if key < self._parsed_key_map[index][0]:
	870	direction = -1
	871	else:
	872	direction = +1
	873	result.append(((location, key), direction))
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	874	readv_ranges = []
	875	# lookup data to resolve references
	876	for location in pending_locations:
	877	length = 800
	878	if location + length > self._size:
	879	length = self._size - location
	880	# TODO: trim out parsed locations (e.g. if the 800 is into the
2890.2.16 by Robert Collins Review feedback.	881	# parsed region trim it, and dont use the adjust_for_latency
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	882	# facility)
	883	if length > 0:
	884	readv_ranges.append((location, length))
	885	self._read_and_parse(readv_ranges)
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	886	if self._nodes is not None:
	887	# The _read_and_parse triggered a _buffer_all, grab the data and
	888	# return it
	889	for location, key in pending_references:
	890	value, refs = self._nodes[key]
	891	result.append(((location, key), (self, key, value, refs)))
	892	return result
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	893	for location, key in pending_references:
	894	# answer key references we had to look-up-late.
	895	value, refs = self._bisect_nodes[key]
	896	result.append(((location, key), (self, key,
	897	value, self._resolve_references(refs))))
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	898	return result
	899
	900	def _parse_header_from_bytes(self, bytes):
	901	"""Parse the header from a region of bytes.
	902
	903	:param bytes: The data to parse.
	904	:return: An offset, data tuple such as readv yields, for the unparsed
	905	data. (which may length 0).
	906	"""
	907	signature = bytes[0:len(self._signature())]
	908	if not signature == self._signature():
	909	raise errors.BadIndexFormatSignature(self._name, GraphIndex)
	910	lines = bytes[len(self._signature()):].splitlines()
	911	options_line = lines[0]
	912	if not options_line.startswith(_OPTION_NODE_REFS):
	913	raise errors.BadIndexOptions(self)
	914	try:
	915	self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
	916	except ValueError:
	917	raise errors.BadIndexOptions(self)
	918	options_line = lines[1]
	919	if not options_line.startswith(_OPTION_KEY_ELEMENTS):
	920	raise errors.BadIndexOptions(self)
	921	try:
	922	self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
	923	except ValueError:
	924	raise errors.BadIndexOptions(self)
	925	options_line = lines[2]
	926	if not options_line.startswith(_OPTION_LEN):
	927	raise errors.BadIndexOptions(self)
	928	try:
	929	self._key_count = int(options_line[len(_OPTION_LEN):])
	930	except ValueError:
	931	raise errors.BadIndexOptions(self)
	932	# calculate the bytes we have processed
	933	header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
	934	len(lines[2]) + 3)
	935	self._parsed_bytes(0, None, header_end, None)
	936	# setup parsing state
	937	self._expected_elements = 3 + self._key_length
	938	# raw data keyed by offset
	939	self._keys_by_offset = {}
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	940	# keys with the value and node references
	941	self._bisect_nodes = {}
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	942	return header_end, bytes[header_end:]
	943
	944	def _parse_region(self, offset, data):
	945	"""Parse node data returned from a readv operation.
	946
	947	:param offset: The byte offset the data starts at.
	948	:param data: The data to parse.
	949	"""
	950	# trim the data.
	951	# end first:
	952	end = offset + len(data)
2890.2.15 by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.	953	high_parsed = offset
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	954	while True:
	955	# Trivial test - if the current index's end is within the
	956	# low-matching parsed range, we're done.
2890.2.15 by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.	957	index = self._parsed_byte_index(high_parsed)
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	958	if end < self._parsed_byte_map[index][1]:
	959	return
2890.2.15 by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.	960	# print "[%d:%d]" % (offset, end), \
	961	# self._parsed_byte_map[index:index + 2]
	962	high_parsed, last_segment = self._parse_segment(
	963	offset, data, end, index)
	964	if last_segment:
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	965	return
	966
	967	def _parse_segment(self, offset, data, end, index):
	968	"""Parse one segment of data.
	969
	970	:param offset: Where 'data' begins in the file.
	971	:param data: Some data to parse a segment of.
	972	:param end: Where data ends
	973	:param index: The current index into the parsed bytes map.
	974	:return: True if the parsed segment is the last possible one in the
	975	range of data.
2890.2.15 by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.	976	:return: high_parsed_byte, last_segment.
	977	high_parsed_byte is the location of the highest parsed byte in this
	978	segment, last_segment is True if the parsed segment is the last
	979	possible one in the data block.
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	980	"""
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	981	# default is to use all data
	982	trim_end = None
	983	# accomodate overlap with data before this.
	984	if offset < self._parsed_byte_map[index][1]:
	985	# overlaps the lower parsed region
	986	# skip the parsed data
	987	trim_start = self._parsed_byte_map[index][1] - offset
	988	# don't trim the start for \n
	989	start_adjacent = True
	990	elif offset == self._parsed_byte_map[index][1]:
	991	# abuts the lower parsed region
	992	# use all data
	993	trim_start = None
	994	# do not trim anything
	995	start_adjacent = True
	996	else:
	997	# does not overlap the lower parsed region
	998	# use all data
	999	trim_start = None
	1000	# but trim the leading \n
	1001	start_adjacent = False
	1002	if end == self._size:
	1003	# lines up to the end of all data:
	1004	# use it all
	1005	trim_end = None
	1006	# do not strip to the last \n
	1007	end_adjacent = True
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	1008	last_segment = True
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1009	elif index + 1 == len(self._parsed_byte_map):
	1010	# at the end of the parsed data
	1011	# use it all
	1012	trim_end = None
	1013	# but strip to the last \n
	1014	end_adjacent = False
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	1015	last_segment = True
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1016	elif end == self._parsed_byte_map[index + 1][0]:
	1017	# buts up against the next parsed region
	1018	# use it all
	1019	trim_end = None
	1020	# do not strip to the last \n
	1021	end_adjacent = True
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	1022	last_segment = True
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1023	elif end > self._parsed_byte_map[index + 1][0]:
	1024	# overlaps into the next parsed region
	1025	# only consider the unparsed data
	1026	trim_end = self._parsed_byte_map[index + 1][0] - offset
	1027	# do not strip to the last \n as we know its an entire record
	1028	end_adjacent = True
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	1029	last_segment = end < self._parsed_byte_map[index + 1][1]
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1030	else:
	1031	# does not overlap into the next region
	1032	# use it all
	1033	trim_end = None
	1034	# but strip to the last \n
	1035	end_adjacent = False
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	1036	last_segment = True
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1037	# now find bytes to discard if needed
	1038	if not start_adjacent:
	1039	# work around python bug in rfind
	1040	if trim_start is None:
	1041	trim_start = data.find('\n') + 1
	1042	else:
	1043	trim_start = data.find('\n', trim_start) + 1
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	1044	if not (trim_start != 0):
	1045	raise AssertionError('no \n was present')
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1046	# print 'removing start', offset, trim_start, repr(data[:trim_start])
	1047	if not end_adjacent:
	1048	# work around python bug in rfind
	1049	if trim_end is None:
	1050	trim_end = data.rfind('\n') + 1
	1051	else:
	1052	trim_end = data.rfind('\n', None, trim_end) + 1
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	1053	if not (trim_end != 0):
	1054	raise AssertionError('no \n was present')
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1055	# print 'removing end', offset, trim_end, repr(data[trim_end:])
	1056	# adjust offset and data to the parseable data.
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1057	trimmed_data = data[trim_start:trim_end]
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	1058	if not (trimmed_data):
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1059	raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	1060	% (trim_start, trim_end, offset, offset + len(data)))
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1061	if trim_start:
	1062	offset += trim_start
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1063	# print "parsing", repr(trimmed_data)
2890.2.10 by Robert Collins Add test coverage to ensure \r's are not mangled by bisection parsing.	1064	# splitlines mangles the \r delimiters.. don't use it.
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1065	lines = trimmed_data.split('\n')
2890.2.9 by Robert Collins Don't use splitlines for index data parsing, we embed \r.	1066	del lines[-1]
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1067	pos = offset
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1068	first_key, last_key, nodes, _ = self._parse_lines(lines, pos)
	1069	for key, value in nodes:
	1070	self._bisect_nodes[key] = value
	1071	self._parsed_bytes(offset, first_key,
	1072	offset + len(trimmed_data), last_key)
	1073	return offset + len(trimmed_data), last_segment
	1074
	1075	def _parse_lines(self, lines, pos):
	1076	key = None
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1077	first_key = None
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1078	trailers = 0
	1079	nodes = []
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1080	for line in lines:
	1081	if line == '':
	1082	# must be at the end
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1083	if self._size:
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	1084	if not (self._size == pos + 1):
	1085	raise AssertionError("%s %s" % (self._size, pos))
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1086	trailers += 1
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1087	continue
	1088	elements = line.split('\0')
	1089	if len(elements) != self._expected_elements:
	1090	raise errors.BadIndexData(self)
3530.3.3 by Robert Collins Credit and explanation for interning.	1091	# keys are tuples. Each element is a string that may occur many
	1092	# times, so we intern them to save space. AB, RC, 200807
3711.3.13 by John Arbash Meinel Shave off another 5s by not building 'node_by_key'	1093	key = tuple([intern(element) for element in elements[:self._key_length]])
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1094	if first_key is None:
	1095	first_key = key
	1096	absent, references, value = elements[-3:]
	1097	ref_lists = []
	1098	for ref_string in references.split('\t'):
	1099	ref_lists.append(tuple([
	1100	int(ref) for ref in ref_string.split('\r') if ref
	1101	]))
	1102	ref_lists = tuple(ref_lists)
	1103	self._keys_by_offset[pos] = (key, absent, ref_lists, value)
	1104	pos += len(line) + 1 # +1 for the \n
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	1105	if absent:
	1106	continue
	1107	if self.node_ref_lists:
	1108	node_value = (value, ref_lists)
	1109	else:
	1110	node_value = value
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1111	nodes.append((key, node_value))
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	1112	# print "parsed ", key
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1113	return first_key, key, nodes, trailers
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1114
	1115	def _parsed_bytes(self, start, start_key, end, end_key):
	1116	"""Mark the bytes from start to end as parsed.
	1117
	1118	Calling self._parsed_bytes(1,2) will mark one byte (the one at offset
	1119	1) as parsed.
	1120
	1121	:param start: The start of the parsed region.
	1122	:param end: The end of the parsed region.
	1123	"""
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1124	index = self._parsed_byte_index(start)
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1125	new_value = (start, end)
	1126	new_key = (start_key, end_key)
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1127	if index == -1:
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1128	# first range parsed is always the beginning.
	1129	self._parsed_byte_map.insert(index, new_value)
	1130	self._parsed_key_map.insert(index, new_key)
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1131	return
	1132	# four cases:
	1133	# new region
	1134	# extend lower region
	1135	# extend higher region
	1136	# combine two regions
	1137	if (index + 1 < len(self._parsed_byte_map) and
	1138	self._parsed_byte_map[index][1] == start and
	1139	self._parsed_byte_map[index + 1][0] == end):
	1140	# combine two regions
	1141	self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],
	1142	self._parsed_byte_map[index + 1][1])
	1143	self._parsed_key_map[index] = (self._parsed_key_map[index][0],
	1144	self._parsed_key_map[index + 1][1])
2890.2.12 by Robert Collins More index tweaks.	1145	del self._parsed_byte_map[index + 1]
2890.2.12 by Robert Collins More index tweaks.	1146	del self._parsed_key_map[index + 1]
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1147	elif self._parsed_byte_map[index][1] == start:
	1148	# extend the lower entry
	1149	self._parsed_byte_map[index] = (
	1150	self._parsed_byte_map[index][0], end)
	1151	self._parsed_key_map[index] = (
	1152	self._parsed_key_map[index][0], end_key)
	1153	elif (index + 1 < len(self._parsed_byte_map) and
	1154	self._parsed_byte_map[index + 1][0] == end):
	1155	# extend the higher entry
	1156	self._parsed_byte_map[index + 1] = (
	1157	start, self._parsed_byte_map[index + 1][1])
	1158	self._parsed_key_map[index + 1] = (
	1159	start_key, self._parsed_key_map[index + 1][1])
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1160	else:
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1161	# new entry
	1162	self._parsed_byte_map.insert(index + 1, new_value)
	1163	self._parsed_key_map.insert(index + 1, new_key)
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1164
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	1165	def _read_and_parse(self, readv_ranges):
4775.1.1 by Martin Pool Remove several 'the the' typos	1166	"""Read the ranges and parse the resulting data.
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	1167
	1168	:param readv_ranges: A prepared readv range list.
	1169	"""
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	1170	if not readv_ranges:
	1171	return
	1172	if self._nodes is None and self._bytes_read * 2 >= self._size:
	1173	# We've already read more than 50% of the file and we are about to
	1174	# request more data, just _buffer_all() and be done
	1175	self._buffer_all()
	1176	return
	1177
5074.4.3 by John Arbash Meinel Actually implement offset support for GraphIndex.	1178	base_offset = self._base_offset
	1179	if base_offset != 0:
	1180	# Rewrite the ranges for the offset
	1181	readv_ranges = [(start+base_offset, size)
	1182	for start, size in readv_ranges]
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	1183	readv_data = self._transport.readv(self._name, readv_ranges, True,
5074.4.3 by John Arbash Meinel Actually implement offset support for GraphIndex.	1184	self._size + self._base_offset)
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	1185	# parse
	1186	for offset, data in readv_data:
5074.4.3 by John Arbash Meinel Actually implement offset support for GraphIndex.	1187	offset -= base_offset
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	1188	self._bytes_read += len(data)
5074.4.3 by John Arbash Meinel Actually implement offset support for GraphIndex.	1189	if offset < 0:
	1190	# transport.readv() expanded to extra data which isn't part of
	1191	# this index
	1192	data = data[-offset:]
	1193	offset = 0
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	1194	if offset == 0 and len(data) == self._size:
	1195	# We read the whole range, most likely because the
	1196	# Transport upcast our readv ranges into one long request
	1197	# for enough total data to grab the whole index.
6621.22.2 by Martin Use BytesIO or StringIO from bzrlib.sixish	1198	self._buffer_all(BytesIO(data))
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	1199	return
	1200	if self._bisect_nodes is None:
	1201	# this must be the start
	1202	if not (offset == 0):
	1203	raise AssertionError()
	1204	offset, data = self._parse_header_from_bytes(data)
	1205	# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))
	1206	self._parse_region(offset, data)
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	1207
2592.1.8 by Robert Collins Empty files should validate ok.	1208	def _signature(self):
	1209	"""The file signature for this index type."""
	1210	return _SIGNATURE
	1211
2592.1.7 by Robert Collins A validate that goes boom.	1212	def validate(self):
2592.1.7 by Robert Collins A validate that goes boom.	1213	"""Validate that everything in the index can be accessed."""
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	1214	# iter_all validates completely at the moment, so just do that.
	1215	for node in self.iter_all_entries():
	1216	pass
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1217
	1218
	1219	class CombinedGraphIndex(object):
	1220	"""A GraphIndex made up from smaller GraphIndices.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1221
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1222	The backing indices must implement GraphIndex, and are presumed to be
	1223	static data.
2592.1.45 by Robert Collins Tweak documentation as per Aaron's review.	1224
	1225	Queries against the combined index will be made against the first index,
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1226	and then the second and so on. The order of indices can thus influence
2592.1.45 by Robert Collins Tweak documentation as per Aaron's review.	1227	performance significantly. For example, if one index is on local disk and a
	1228	second on a remote server, the local disk index should be before the other
	1229	in the index list.
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1230
	1231	Also, queries tend to need results from the same indices as previous
	1232	queries. So the indices will be reordered after every query to put the
	1233	indices that had the result(s) of that query first (while otherwise
	1234	preserving the relative ordering).
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1235	"""
	1236
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1237	def __init__(self, indices, reload_func=None):
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1238	"""Create a CombinedGraphIndex backed by indices.
	1239
2592.1.45 by Robert Collins Tweak documentation as per Aaron's review.	1240	:param indices: An ordered list of indices to query for data.
3789.1.3 by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count().	1241	:param reload_func: A function to call if we find we are missing an
	1242	index. Should have the form reload_func() => True/False to indicate
	1243	if reloading actually changed anything.
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1244	"""
	1245	self._indices = indices
3789.1.3 by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count().	1246	self._reload_func = reload_func
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1247	# Sibling indices are other CombinedGraphIndex that we should call
	1248	# _move_to_front_by_name on when we auto-reorder ourself.
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1249	self._sibling_indices = []
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1250	# A list of names that corresponds to the instances in self._indices,
	1251	# so _index_names[0] is always the name for _indices[0], etc. Sibling
	1252	# indices must all use the same set of names as each other.
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1253	self._index_names = [None] * len(self._indices)
2592.1.37 by Robert Collins Add CombinedGraphIndex.insert_index.	1254
2592.5.4 by Martin Pool Add CombinedGraphIndex repr	1255	def __repr__(self):
	1256	return "%s(%s)" % (
	1257	self.__class__.__name__,
	1258	', '.join(map(repr, self._indices)))
	1259
4744.2.6 by John Arbash Meinel Start exposing an GraphIndex.clear_cache() member.	1260	def clear_cache(self):
	1261	"""See GraphIndex.clear_cache()"""
	1262	for index in self._indices:
	1263	index.clear_cache()
	1264
3099.3.1 by John Arbash Meinel Implement get_parent_map for ParentProviders	1265	def get_parent_map(self, keys):
4379.3.3 by Gary van der Merwe Rename and add doc string for StackedParentsProvider.	1266	"""See graph.StackedParentsProvider.get_parent_map"""
3099.3.1 by John Arbash Meinel Implement get_parent_map for ParentProviders	1267	search_keys = set(keys)
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	1268	if _mod_revision.NULL_REVISION in search_keys:
	1269	search_keys.discard(_mod_revision.NULL_REVISION)
	1270	found_parents = {_mod_revision.NULL_REVISION:[]}
3099.3.1 by John Arbash Meinel Implement get_parent_map for ParentProviders	1271	else:
	1272	found_parents = {}
2979.2.2 by Robert Collins Per-file graph heads detection during commit for pack repositories.	1273	for index, key, value, refs in self.iter_entries(search_keys):
	1274	parents = refs[0]
	1275	if not parents:
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	1276	parents = (_mod_revision.NULL_REVISION,)
2979.2.2 by Robert Collins Per-file graph heads detection during commit for pack repositories.	1277	found_parents[key] = parents
3099.3.1 by John Arbash Meinel Implement get_parent_map for ParentProviders	1278	return found_parents
2979.2.2 by Robert Collins Per-file graph heads detection during commit for pack repositories.	1279
6619.3.8 by Jelmer Vernooĳ Cope with has_key -> contains rename.	1280	__contains__ = _has_key_from_parent_map
3830.3.9 by Martin Pool Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests	1281
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1282	def insert_index(self, pos, index, name=None):
2592.1.37 by Robert Collins Add CombinedGraphIndex.insert_index.	1283	"""Insert a new index in the list of indices to query.
	1284
	1285	:param pos: The position to insert the index.
	1286	:param index: The index to insert.
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1287	:param name: a name for this index, e.g. a pack name. These names can
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1288	be used to reflect index reorderings to related CombinedGraphIndex
5086.7.6 by Andrew Bennetts Add public set_sibling_indices API so that AggregateIndex doesn't have to poke at _sibling_indices.	1289	instances that use the same names. (see set_sibling_indices)
2592.1.37 by Robert Collins Add CombinedGraphIndex.insert_index.	1290	"""
	1291	self._indices.insert(pos, index)
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1292	self._index_names.insert(pos, name)
2592.1.37 by Robert Collins Add CombinedGraphIndex.insert_index.	1293
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1294	def iter_all_entries(self):
	1295	"""Iterate over all keys within the index
	1296
2592.1.44 by Robert Collins Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.	1297	Duplicate keys across child indices are presumed to have the same
	1298	value and are only reported once.
	1299
2592.5.1 by Martin Pool Fix docstrings for Index.iter_entries etc	1300	:return: An iterable of (index, key, reference_lists, value).
	1301	There is no defined order for the result iteration - it will be in
	1302	the most efficient order for the index.
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1303	"""
	1304	seen_keys = set()
3789.1.5 by John Arbash Meinel CombinedGraphIndex.iter_all_entries() can now reload when needed.	1305	while True:
	1306	try:
	1307	for index in self._indices:
	1308	for node in index.iter_all_entries():
	1309	if node[1] not in seen_keys:
	1310	yield node
	1311	seen_keys.add(node[1])
	1312	return
6621.16.1 by Martin Make _reload_or_raise into _try_reload and have callers reraise	1313	except errors.NoSuchFile as e:
	1314	if not self._try_reload(e):
	1315	raise
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1316
	1317	def iter_entries(self, keys):
	1318	"""Iterate over keys within the index.
	1319
2592.1.44 by Robert Collins Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.	1320	Duplicate keys across child indices are presumed to have the same
	1321	value and are only reported once.
	1322
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1323	:param keys: An iterable providing the keys to be retrieved.
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1324	:return: An iterable of (index, key, reference_lists, value). There is
	1325	no defined order for the result iteration - it will be in the most
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1326	efficient order for the index.
	1327	"""
	1328	keys = set(keys)
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1329	hit_indices = []
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1330	while True:
	1331	try:
	1332	for index in self._indices:
	1333	if not keys:
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1334	break
	1335	index_hit = False
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1336	for node in index.iter_entries(keys):
	1337	keys.remove(node[1])
	1338	yield node
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1339	index_hit = True
	1340	if index_hit:
	1341	hit_indices.append(index)
	1342	break
6621.16.1 by Martin Make _reload_or_raise into _try_reload and have callers reraise	1343	except errors.NoSuchFile as e:
	1344	if not self._try_reload(e):
	1345	raise
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1346	self._move_to_front(hit_indices)
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1347
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	1348	def iter_entries_prefix(self, keys):
	1349	"""Iterate over keys within the index using prefix matching.
	1350
	1351	Duplicate keys across child indices are presumed to have the same
	1352	value and are only reported once.
	1353
	1354	Prefix matching is applied within the tuple of a key, not to within
	1355	the bytestring of each key element. e.g. if you have the keys ('foo',
	1356	'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
	1357	only the former key is returned.
	1358
	1359	:param keys: An iterable providing the key prefixes to be retrieved.
	1360	Each key prefix takes the form of a tuple the length of a key, but
	1361	with the last N elements 'None' rather than a regular bytestring.
	1362	The first element cannot be 'None'.
	1363	:return: An iterable as per iter_all_entries, but restricted to the
	1364	keys with a matching prefix to those supplied. No additional keys
	1365	will be returned, and every match that is in the index will be
	1366	returned.
	1367	"""
	1368	keys = set(keys)
	1369	if not keys:
	1370	return
	1371	seen_keys = set()
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1372	hit_indices = []
3789.1.6 by John Arbash Meinel CombinedGraphIndex.iter_entries_prefix can now reload when needed.	1373	while True:
	1374	try:
	1375	for index in self._indices:
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1376	index_hit = False
3789.1.6 by John Arbash Meinel CombinedGraphIndex.iter_entries_prefix can now reload when needed.	1377	for node in index.iter_entries_prefix(keys):
	1378	if node[1] in seen_keys:
	1379	continue
	1380	seen_keys.add(node[1])
	1381	yield node
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1382	index_hit = True
	1383	if index_hit:
	1384	hit_indices.append(index)
	1385	break
6621.16.1 by Martin Make _reload_or_raise into _try_reload and have callers reraise	1386	except errors.NoSuchFile as e:
	1387	if not self._try_reload(e):
	1388	raise
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1389	self._move_to_front(hit_indices)
	1390
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1391	def _move_to_front(self, hit_indices):
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1392	"""Rearrange self._indices so that hit_indices are first.
	1393
	1394	Order is maintained as much as possible, e.g. the first unhit index
	1395	will be the first index in _indices after the hit_indices, and the
	1396	hit_indices will be present in exactly the order they are passed to
	1397	_move_to_front.
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1398
	1399	_move_to_front propagates to all objects in self._sibling_indices by
	1400	calling _move_to_front_by_name.
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1401	"""
5151.2.1 by John Arbash Meinel Avoid reordering when unnecessary. Fixes bug #562429	1402	if self._indices[:len(hit_indices)] == hit_indices:
	1403	# The 'hit_indices' are already at the front (and in the same
	1404	# order), no need to re-order
	1405	return
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1406	hit_names = self._move_to_front_by_index(hit_indices)
	1407	for sibling_idx in self._sibling_indices:
	1408	sibling_idx._move_to_front_by_name(hit_names)
	1409
	1410	def _move_to_front_by_index(self, hit_indices):
	1411	"""Core logic for _move_to_front.
	1412
	1413	Returns a list of names corresponding to the hit_indices param.
	1414	"""
5151.2.3 by John Arbash Meinel Restore the indices_info variable.	1415	indices_info = zip(self._index_names, self._indices)
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1416	if 'index' in debug.debug_flags:
6631.2.1 by Martin Run 2to3 zip fixer and refactor	1417	indices_info = list(indices_info)
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	1418	trace.mutter('CombinedGraphIndex reordering: currently %r, '
	1419	'promoting %r', indices_info, hit_indices)
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1420	hit_names = []
5151.2.2 by John Arbash Meinel Avoid packing and unpacking the indices, and shortcut once you've found all	1421	unhit_names = []
	1422	new_hit_indices = []
	1423	unhit_indices = []
	1424
5151.2.3 by John Arbash Meinel Restore the indices_info variable.	1425	for offset, (name, idx) in enumerate(indices_info):
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1426	if idx in hit_indices:
5151.2.4 by John Arbash Meinel Minor tweak	1427	hit_names.append(name)
5151.2.2 by John Arbash Meinel Avoid packing and unpacking the indices, and shortcut once you've found all	1428	new_hit_indices.append(idx)
	1429	if len(new_hit_indices) == len(hit_indices):
	1430	# We've found all of the hit entries, everything else is
	1431	# unhit
	1432	unhit_names.extend(self._index_names[offset+1:])
	1433	unhit_indices.extend(self._indices[offset+1:])
	1434	break
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1435	else:
5151.2.2 by John Arbash Meinel Avoid packing and unpacking the indices, and shortcut once you've found all	1436	unhit_names.append(name)
	1437	unhit_indices.append(idx)
	1438
	1439	self._indices = new_hit_indices + unhit_indices
	1440	self._index_names = hit_names + unhit_names
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1441	if 'index' in debug.debug_flags:
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	1442	trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1443	return hit_names
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1444
	1445	def _move_to_front_by_name(self, hit_names):
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1446	"""Moves indices named by 'hit_names' to front of the search order, as
	1447	described in _move_to_front.
	1448	"""
	1449	# Translate names to index instances, and then call
	1450	# _move_to_front_by_index.
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1451	indices_info = zip(self._index_names, self._indices)
	1452	hit_indices = []
	1453	for name, idx in indices_info:
	1454	if name in hit_names:
	1455	hit_indices.append(idx)
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1456	self._move_to_front_by_index(hit_indices)
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	1457
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	1458	def find_ancestry(self, keys, ref_list_num):
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1459	"""Find the complete ancestry for the given set of keys.
	1460
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	1461	Note that this is a whole-ancestry request, so it should be used
	1462	sparingly.
	1463
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1464	:param keys: An iterable of keys to look for
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	1465	:param ref_list_num: The reference list which references the parents
	1466	we care about.
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1467	:return: (parent_map, missing_keys)
	1468	"""
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1469	# XXX: make this call _move_to_front?
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1470	missing_keys = set()
	1471	parent_map = {}
	1472	keys_to_lookup = set(keys)
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1473	generation = 0
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1474	while keys_to_lookup:
	1475	# keys that all indexes claim are missing, stop searching them
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1476	generation += 1
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1477	all_index_missing = None
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1478	# print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss'
	1479	# print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup),
	1480	# len(parent_map),
	1481	# len(missing_keys))
	1482	for index_idx, index in enumerate(self._indices):
	1483	# TODO: we should probably be doing something with
	1484	# 'missing_keys' since we've already determined that
	1485	# those revisions have not been found anywhere
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1486	index_missing_keys = set()
	1487	# Find all of the ancestry we can from this index
	1488	# keep looking until the search_keys set is empty, which means
	1489	# things we didn't find should be in index_missing_keys
	1490	search_keys = keys_to_lookup
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1491	sub_generation = 0
	1492	# print ' \t%2d\t\t%4d\t%5d\t%5d' % (
	1493	# index_idx, len(search_keys),
	1494	# len(parent_map), len(index_missing_keys))
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1495	while search_keys:
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1496	sub_generation += 1
	1497	# TODO: ref_list_num should really be a parameter, since
	1498	# CombinedGraphIndex does not know what the ref lists
	1499	# mean.
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	1500	search_keys = index._find_ancestors(search_keys,
	1501	ref_list_num, parent_map, index_missing_keys)
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1502	# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
	1503	# sub_generation, len(search_keys),
	1504	# len(parent_map), len(index_missing_keys))
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1505	# Now set whatever was missing to be searched in the next index
	1506	keys_to_lookup = index_missing_keys
	1507	if all_index_missing is None:
	1508	all_index_missing = set(index_missing_keys)
	1509	else:
	1510	all_index_missing.intersection_update(index_missing_keys)
	1511	if not keys_to_lookup:
	1512	break
	1513	if all_index_missing is None:
	1514	# There were no indexes, so all search keys are 'missing'
	1515	missing_keys.update(keys_to_lookup)
	1516	keys_to_lookup = None
	1517	else:
	1518	missing_keys.update(all_index_missing)
	1519	keys_to_lookup.difference_update(all_index_missing)
	1520	return parent_map, missing_keys
	1521
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1522	def key_count(self):
	1523	"""Return an estimate of the number of keys in this index.
3789.1.3 by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count().	1524
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1525	For CombinedGraphIndex this is approximated by the sum of the keys of
	1526	the child indices. As child indices may have duplicate keys this can
	1527	have a maximum error of the number of child indices * largest number of
	1528	keys in any index.
	1529	"""
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1530	while True:
3789.1.3 by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count().	1531	try:
	1532	return sum((index.key_count() for index in self._indices), 0)
6621.16.1 by Martin Make _reload_or_raise into _try_reload and have callers reraise	1533	except errors.NoSuchFile as e:
	1534	if not self._try_reload(e):
	1535	raise
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1536
3830.3.12 by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks	1537	missing_keys = _missing_keys_from_parent_map
	1538
6621.16.1 by Martin Make _reload_or_raise into _try_reload and have callers reraise	1539	def _try_reload(self, error):
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1540	"""We just got a NoSuchFile exception.
	1541
	1542	Try to reload the indices, if it fails, just raise the current
	1543	exception.
	1544	"""
	1545	if self._reload_func is None:
6621.16.1 by Martin Make _reload_or_raise into _try_reload and have callers reraise	1546	return False
	1547	trace.mutter('Trying to reload after getting exception: %s', error)
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1548	if not self._reload_func():
	1549	# We tried to reload, but nothing changed, so we fail anyway
3789.1.10 by John Arbash Meinel Review comments from Martin.	1550	trace.mutter('_reload_func indicated nothing has changed.'
	1551	' Raising original exception.')
6621.16.1 by Martin Make _reload_or_raise into _try_reload and have callers reraise	1552	return False
	1553	return True
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1554
5086.7.6 by Andrew Bennetts Add public set_sibling_indices API so that AggregateIndex doesn't have to poke at _sibling_indices.	1555	def set_sibling_indices(self, sibling_combined_graph_indices):
	1556	"""Set the CombinedGraphIndex objects to reorder after reordering self.
	1557	"""
	1558	self._sibling_indices = sibling_combined_graph_indices
	1559
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1560	def validate(self):
	1561	"""Validate that everything in the index can be accessed."""
3789.1.7 by John Arbash Meinel CombinedGraphIndex.validate() will now reload.	1562	while True:
	1563	try:
	1564	for index in self._indices:
	1565	index.validate()
	1566	return
6621.16.1 by Martin Make _reload_or_raise into _try_reload and have callers reraise	1567	except errors.NoSuchFile as e:
	1568	if not self._try_reload(e):
	1569	raise
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1570
	1571
	1572	class InMemoryGraphIndex(GraphIndexBuilder):
	1573	"""A GraphIndex which operates entirely out of memory and is mutable.
	1574
	1575	This is designed to allow the accumulation of GraphIndex entries during a
	1576	single write operation, where the accumulated entries need to be immediately
	1577	available - for example via a CombinedGraphIndex.
	1578	"""
	1579
	1580	def add_nodes(self, nodes):
	1581	"""Add nodes to the index.
	1582
	1583	:param nodes: An iterable of (key, node_refs, value) entries to add.
	1584	"""
2592.3.39 by Robert Collins Fugly version to remove signatures.kndx	1585	if self.reference_lists:
	1586	for (key, value, node_refs) in nodes:
	1587	self.add_node(key, value, node_refs)
	1588	else:
	1589	for (key, value) in nodes:
	1590	self.add_node(key, value)
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1591
	1592	def iter_all_entries(self):
	1593	"""Iterate over all keys within the index
	1594
2592.5.1 by Martin Pool Fix docstrings for Index.iter_entries etc	1595	:return: An iterable of (index, key, reference_lists, value). There is no
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1596	defined order for the result iteration - it will be in the most
	1597	efficient order for the index (in this case dictionary hash order).
	1598	"""
2745.1.1 by Robert Collins Add a number of -Devil checkpoints.	1599	if 'evil' in debug.debug_flags:
2592.3.112 by Robert Collins Various fixups found dogfooding.	1600	trace.mutter_callsite(3,
2745.1.2 by Robert Collins Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly.	1601	"iter_all_entries scales with size of history.")
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1602	if self.reference_lists:
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	1603	for key, (absent, references, value) in viewitems(self._nodes):
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1604	if not absent:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1605	yield self, key, value, references
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1606	else:
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	1607	for key, (absent, references, value) in viewitems(self._nodes):
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1608	if not absent:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1609	yield self, key, value
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1610
	1611	def iter_entries(self, keys):
	1612	"""Iterate over keys within the index.
	1613
	1614	:param keys: An iterable providing the keys to be retrieved.
2979.2.4 by Robert Collins Docstring fixes from review.	1615	:return: An iterable of (index, key, value, reference_lists). There is no
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1616	defined order for the result iteration - it will be in the most
	1617	efficient order for the index (keys iteration order in this case).
	1618	"""
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	1619	# Note: See BTreeBuilder.iter_entries for an explanation of why we
	1620	# aren't using set().intersection() here
	1621	nodes = self._nodes
	1622	keys = [key for key in keys if key in nodes]
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1623	if self.reference_lists:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	1624	for key in keys:
	1625	node = nodes[key]
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1626	if not node[0]:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1627	yield self, key, node[2], node[1]
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1628	else:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	1629	for key in keys:
	1630	node = nodes[key]
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1631	if not node[0]:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1632	yield self, key, node[2]
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1633
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1634	def iter_entries_prefix(self, keys):
	1635	"""Iterate over keys within the index using prefix matching.
	1636
	1637	Prefix matching is applied within the tuple of a key, not to within
	1638	the bytestring of each key element. e.g. if you have the keys ('foo',
	1639	'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
	1640	only the former key is returned.
	1641
	1642	:param keys: An iterable providing the key prefixes to be retrieved.
	1643	Each key prefix takes the form of a tuple the length of a key, but
	1644	with the last N elements 'None' rather than a regular bytestring.
	1645	The first element cannot be 'None'.
	1646	:return: An iterable as per iter_all_entries, but restricted to the
	1647	keys with a matching prefix to those supplied. No additional keys
	1648	will be returned, and every match that is in the index will be
	1649	returned.
	1650	"""
	1651	keys = set(keys)
	1652	if not keys:
	1653	return
	1654	if self._key_length == 1:
	1655	for key in keys:
6654.1.1 by Martin Factor out some copycode in iter_entries_prefix implementations	1656	_sanity_check_key(self, key)
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1657	node = self._nodes[key]
	1658	if node[0]:
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1659	continue
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1660	if self.reference_lists:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1661	yield self, key, node[2], node[1]
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1662	else:
2624.2.17 by Robert Collins Review feedback.	1663	yield self, key, node[2]
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1664	return
3644.2.4 by John Arbash Meinel Change GraphIndex to also have a _get_nodes_by_key	1665	nodes_by_key = self._get_nodes_by_key()
6654.1.1 by Martin Factor out some copycode in iter_entries_prefix implementations	1666	for entry in _iter_entries_prefix(self, nodes_by_key, keys):
	1667	yield entry
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1668
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1669	def key_count(self):
	1670	"""Return an estimate of the number of keys in this index.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1671
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1672	For InMemoryGraphIndex the estimate is exact.
	1673	"""
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	1674	return len(self._nodes) - len(self._absent_keys)
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1675
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1676	def validate(self):
	1677	"""In memory index's have no known corruption at the moment."""
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1678
	1679
	1680	class GraphIndexPrefixAdapter(object):
	1681	"""An adapter between GraphIndex with different key lengths.
	1682
	1683	Queries against this will emit queries against the adapted Graph with the
	1684	prefix added, queries for all items use iter_entries_prefix. The returned
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1685	nodes will have their keys and node references adjusted to remove the
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1686	prefix. Finally, an add_nodes_callback can be supplied - when called the
	1687	nodes and references being added will have prefix prepended.
	1688	"""
	1689
2624.2.17 by Robert Collins Review feedback.	1690	def __init__(self, adapted, prefix, missing_key_length,
2624.2.17 by Robert Collins Review feedback.	1691	add_nodes_callback=None):
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1692	"""Construct an adapter against adapted with prefix."""
	1693	self.adapted = adapted
2624.2.19 by Robert Collins Why we should always test before committing.	1694	self.prefix_key = prefix + (None,)*missing_key_length
2624.2.17 by Robert Collins Review feedback.	1695	self.prefix = prefix
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1696	self.prefix_len = len(prefix)
	1697	self.add_nodes_callback = add_nodes_callback
	1698
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1699	def add_nodes(self, nodes):
	1700	"""Add nodes to the index.
	1701
	1702	:param nodes: An iterable of (key, node_refs, value) entries to add.
	1703	"""
	1704	# save nodes in case its an iterator
	1705	nodes = tuple(nodes)
	1706	translated_nodes = []
	1707	try:
2624.2.17 by Robert Collins Review feedback.	1708	# Add prefix_key to each reference node_refs is a tuple of tuples,
2624.2.17 by Robert Collins Review feedback.	1709	# so split it apart, and add prefix_key to the internal reference
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1710	for (key, value, node_refs) in nodes:
	1711	adjusted_references = (
2624.2.17 by Robert Collins Review feedback.	1712	tuple(tuple(self.prefix + ref_node for ref_node in ref_list)
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1713	for ref_list in node_refs))
2624.2.17 by Robert Collins Review feedback.	1714	translated_nodes.append((self.prefix + key, value,
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1715	adjusted_references))
	1716	except ValueError:
	1717	# XXX: TODO add an explicit interface for getting the reference list
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1718	# status, to handle this bit of user-friendliness in the API more
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1719	# explicitly.
	1720	for (key, value) in nodes:
2624.2.17 by Robert Collins Review feedback.	1721	translated_nodes.append((self.prefix + key, value))
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1722	self.add_nodes_callback(translated_nodes)
	1723
	1724	def add_node(self, key, value, references=()):
	1725	"""Add a node to the index.
	1726
	1727	:param key: The key. keys are non-empty tuples containing
	1728	as many whitespace-free utf8 bytestrings as the key length
	1729	defined for this index.
	1730	:param references: An iterable of iterables of keys. Each is a
	1731	reference to another key.
	1732	:param value: The value to associate with the key. It may be any
	1733	bytes as long as it does not contain \0 or \n.
	1734	"""
	1735	self.add_nodes(((key, value, references), ))
	1736
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1737	def _strip_prefix(self, an_iter):
	1738	"""Strip prefix data from nodes and return it."""
	1739	for node in an_iter:
	1740	# cross checks
2624.2.17 by Robert Collins Review feedback.	1741	if node[1][:self.prefix_len] != self.prefix:
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1742	raise errors.BadIndexData(self)
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1743	for ref_list in node[3]:
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1744	for ref_node in ref_list:
2624.2.17 by Robert Collins Review feedback.	1745	if ref_node[:self.prefix_len] != self.prefix:
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1746	raise errors.BadIndexData(self)
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1747	yield node[0], node[1][self.prefix_len:], node[2], (
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1748	tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1749	for ref_list in node[3]))
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1750
	1751	def iter_all_entries(self):
	1752	"""Iterate over all keys within the index
	1753
	1754	iter_all_entries is implemented against the adapted index using
	1755	iter_entries_prefix.
	1756
2592.5.1 by Martin Pool Fix docstrings for Index.iter_entries etc	1757	:return: An iterable of (index, key, reference_lists, value). There is no
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1758	defined order for the result iteration - it will be in the most
	1759	efficient order for the index (in this case dictionary hash order).
	1760	"""
2624.2.19 by Robert Collins Why we should always test before committing.	1761	return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1762
	1763	def iter_entries(self, keys):
	1764	"""Iterate over keys within the index.
	1765
	1766	:param keys: An iterable providing the keys to be retrieved.
2979.2.4 by Robert Collins Docstring fixes from review.	1767	:return: An iterable of (index, key, value, reference_lists). There is no
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1768	defined order for the result iteration - it will be in the most
	1769	efficient order for the index (keys iteration order in this case).
	1770	"""
	1771	return self._strip_prefix(self.adapted.iter_entries(
2624.2.17 by Robert Collins Review feedback.	1772	self.prefix + key for key in keys))
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1773
	1774	def iter_entries_prefix(self, keys):
	1775	"""Iterate over keys within the index using prefix matching.
	1776
	1777	Prefix matching is applied within the tuple of a key, not to within
	1778	the bytestring of each key element. e.g. if you have the keys ('foo',
	1779	'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
	1780	only the former key is returned.
	1781
	1782	:param keys: An iterable providing the key prefixes to be retrieved.
	1783	Each key prefix takes the form of a tuple the length of a key, but
	1784	with the last N elements 'None' rather than a regular bytestring.
	1785	The first element cannot be 'None'.
	1786	:return: An iterable as per iter_all_entries, but restricted to the
	1787	keys with a matching prefix to those supplied. No additional keys
	1788	will be returned, and every match that is in the index will be
	1789	returned.
	1790	"""
	1791	return self._strip_prefix(self.adapted.iter_entries_prefix(
2624.2.17 by Robert Collins Review feedback.	1792	self.prefix + key for key in keys))
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1793
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1794	def key_count(self):
	1795	"""Return an estimate of the number of keys in this index.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1796
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1797	For GraphIndexPrefixAdapter this is relatively expensive - key
	1798	iteration with the prefix is done.
	1799	"""
	1800	return len(list(self.iter_all_entries()))
	1801
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1802	def validate(self):
	1803	"""Call the adapted's validate."""
	1804	self.adapted.validate()
6654.1.1 by Martin Factor out some copycode in iter_entries_prefix implementations	1805
	1806
	1807	def _sanity_check_key(index_or_builder, key):
	1808	"""Raise BadIndexKey if key cannot be used for prefix matching."""
	1809	if key[0] is None:
	1810	raise errors.BadIndexKey(key)
	1811	if len(key) != index_or_builder._key_length:
	1812	raise errors.BadIndexKey(key)
	1813
	1814
	1815	def _iter_entries_prefix(index_or_builder, nodes_by_key, keys):
	1816	"""Helper for implementing prefix matching iterators."""
	1817	for key in keys:
	1818	_sanity_check_key(index_or_builder, key)
	1819	# find what it refers to:
	1820	key_dict = nodes_by_key
	1821	elements = list(key)
	1822	# find the subdict whose contents should be returned.
	1823	try:
	1824	while len(elements) and elements[0] is not None:
	1825	key_dict = key_dict[elements[0]]
	1826	elements.pop(0)
	1827	except KeyError:
	1828	# a non-existant lookup.
	1829	continue
	1830	if len(elements):
	1831	dicts = [key_dict]
	1832	while dicts:
	1833	values_view = viewvalues(dicts.pop())
	1834	# can't be empty or would not exist
	1835	value = next(iter(values_view))
	1836	if isinstance(value, dict):
	1837	# still descending, push values
	1838	dicts.extend(values_view)
	1839	else:
	1840	# at leaf tuples, yield values
	1841	for value in values_view:
	1842	# each value is the key:value:node refs tuple
	1843	# ready to yield.
	1844	yield (index_or_builder, ) + value
	1845	else:
	1846	# the last thing looked up was a terminal element
	1847	yield (index_or_builder, ) + key_dict