/brz/remove-bazaar : contents of breezy/btree

: (revision 6630.1.4)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

5752.3.8 by John Arbash Meinel Merge bzr.dev 5764 to resolve release-notes (aka NEWS) conflicts	1	# Copyright (C) 2008-2011 Canonical Ltd
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	2	#
	3	# This program is free software; you can redistribute it and/or modify
3641.3.29 by John Arbash Meinel Cleanup the copyright headers	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	16	#
	17
6379.6.7 by Jelmer Vernooij Move importing from future until after doc string, otherwise the doc string will disappear.	18	"""B+Tree indices"""
	19
6379.6.1 by Jelmer Vernooij Import absolute_import in a few places.	20	from __future__ import absolute_import
	21
6624 by Jelmer Vernooĳ Merge Python3 porting work ('py3 pokes')	22	from .lazy_import import lazy_import
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	23	lazy_import(globals(), """
5753.2.4 by Jelmer Vernooij Review feedback from John.	24	import bisect
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	25	import math
	26	import tempfile
	27	import zlib
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	28	""")
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	29
6624 by Jelmer Vernooĳ Merge Python3 porting work ('py3 pokes')	30	from . import (
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	31	chunk_writer,
	32	debug,
	33	errors,
4208.1.2 by John Arbash Meinel Switch to using a FIFOCache.	34	fifo_cache,
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	35	index,
	36	lru_cache,
	37	osutils,
4789.28.1 by John Arbash Meinel Use StaticTuple as part of the builder process.	38	static_tuple,
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	39	trace,
5273.1.7 by Vincent Ladeuil No more use of the get_transport imported symbol, all uses are through	40	transport,
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	41	)
6624 by Jelmer Vernooĳ Merge Python3 porting work ('py3 pokes')	42	from .index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
	43	from .sixish import (
6621.22.2 by Martin Use BytesIO or StringIO from bzrlib.sixish	44	BytesIO,
6634.1.1 by Martin Remove direct use of future_builtins module	45	map,
6621.22.2 by Martin Use BytesIO or StringIO from bzrlib.sixish	46	)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	47
	48
3641.3.3 by John Arbash Meinel Change the header to indicate these indexes are	49	_BTSIGNATURE = "B+Tree Graph Index 2\n"
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	50	_OPTION_ROW_LENGTHS = "row_lengths="
	51	_LEAF_FLAG = "type=leaf\n"
	52	_INTERNAL_FLAG = "type=internal\n"
	53	_INTERNAL_OFFSET = "offset="
	54
	55	_RESERVED_HEADER_BYTES = 120
	56	_PAGE_SIZE = 4096
	57
	58	# 4K per page: 4MB - 1000 entries
	59	_NODE_CACHE_SIZE = 1000
	60
	61
	62	class _BuilderRow(object):
	63	"""The stored state accumulated while writing out a row in the index.
	64
	65	:ivar spool: A temporary file used to accumulate nodes for this row
	66	in the tree.
	67	:ivar nodes: The count of nodes emitted so far.
	68	"""
	69
	70	def __init__(self):
	71	"""Create a _BuilderRow."""
	72	self.nodes = 0
4708.1.1 by John Arbash Meinel Use a cStringIO.StringIO for 1-page btree indexes.	73	self.spool = None# tempfile.TemporaryFile(prefix='bzr-index-row-')
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	74	self.writer = None
	75
	76	def finish_node(self, pad=True):
	77	byte_lines, _, padding = self.writer.finish()
	78	if self.nodes == 0:
6621.22.2 by Martin Use BytesIO or StringIO from bzrlib.sixish	79	self.spool = BytesIO()
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	80	# padded note:
	81	self.spool.write("\x00" * _RESERVED_HEADER_BYTES)
4708.1.1 by John Arbash Meinel Use a cStringIO.StringIO for 1-page btree indexes.	82	elif self.nodes == 1:
	83	# We got bigger than 1 node, switch to a temp file
	84	spool = tempfile.TemporaryFile(prefix='bzr-index-row-')
	85	spool.write(self.spool.getvalue())
	86	self.spool = spool
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	87	skipped_bytes = 0
	88	if not pad and padding:
	89	del byte_lines[-1]
	90	skipped_bytes = padding
	91	self.spool.writelines(byte_lines)
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	92	remainder = (self.spool.tell() + skipped_bytes) % _PAGE_SIZE
	93	if remainder != 0:
	94	raise AssertionError("incorrect node length: %d, %d"
	95	% (self.spool.tell(), remainder))
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	96	self.nodes += 1
	97	self.writer = None
	98
	99
	100	class _InternalBuilderRow(_BuilderRow):
	101	"""The stored state accumulated while writing out internal rows."""
	102
	103	def finish_node(self, pad=True):
	104	if not pad:
	105	raise AssertionError("Must pad internal nodes only.")
	106	_BuilderRow.finish_node(self)
	107
	108
	109	class _LeafBuilderRow(_BuilderRow):
	110	"""The stored state accumulated while writing out a leaf rows."""
	111
	112
	113	class BTreeBuilder(index.GraphIndexBuilder):
	114	"""A Builder for B+Tree based Graph indices.
	115
	116	The resulting graph has the structure:
	117
	118	_SIGNATURE OPTIONS NODES
	119	_SIGNATURE := 'B+Tree Graph Index 1' NEWLINE
	120	OPTIONS := REF_LISTS KEY_ELEMENTS LENGTH
	121	REF_LISTS := 'node_ref_lists=' DIGITS NEWLINE
	122	KEY_ELEMENTS := 'key_elements=' DIGITS NEWLINE
	123	LENGTH := 'len=' DIGITS NEWLINE
	124	ROW_LENGTHS := 'row_lengths' DIGITS (COMMA DIGITS)*
	125	NODES := NODE_COMPRESSED*
	126	NODE_COMPRESSED:= COMPRESSED_BYTES{4096}
	127	NODE_RAW := INTERNAL \| LEAF
	128	INTERNAL := INTERNAL_FLAG POINTERS
	129	LEAF := LEAF_FLAG ROWS
	130	KEY_ELEMENT := Not-whitespace-utf8
	131	KEY := KEY_ELEMENT (NULL KEY_ELEMENT)*
	132	ROWS := ROW*
	133	ROW := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
	134	ABSENT := 'a'
	135	REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
	136	REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
	137	REFERENCE := KEY
	138	VALUE := no-newline-no-null-bytes
	139	"""
	140
	141	def __init__(self, reference_lists=0, key_elements=1, spill_at=100000):
	142	"""See GraphIndexBuilder.__init__.
	143
	144	:param spill_at: Optional parameter controlling the maximum number
	145	of nodes that BTreeBuilder will hold in memory.
	146	"""
	147	index.GraphIndexBuilder.__init__(self, reference_lists=reference_lists,
	148	key_elements=key_elements)
	149	self._spill_at = spill_at
	150	self._backing_indices = []
3644.2.11 by John Arbash Meinel Document the new form of _nodes and remove an unnecessary cast.	151	# A map of {key: (node_refs, value)}
	152	self._nodes = {}
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	153	# Indicate it hasn't been built yet
	154	self._nodes_by_key = None
3777.5.2 by John Arbash Meinel Change the name to ChunkWriter.set_optimize()	155	self._optimize_for_size = False
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	156
	157	def add_node(self, key, value, references=()):
	158	"""Add a node to the index.
	159
	160	If adding the node causes the builder to reach its spill_at threshold,
	161	disk spilling will be triggered.
	162
	163	:param key: The key. keys are non-empty tuples containing
	164	as many whitespace-free utf8 bytestrings as the key length
	165	defined for this index.
	166	:param references: An iterable of iterables of keys. Each is a
	167	reference to another key.
	168	:param value: The value to associate with the key. It may be any
5891.1.3 by Andrew Bennetts Move docstring formatting fixes.	169	bytes as long as it does not contain \\0 or \\n.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	170	"""
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	171	# Ensure that 'key' is a StaticTuple
	172	key = static_tuple.StaticTuple.from_sequence(key).intern()
3644.2.9 by John Arbash Meinel Refactor some code.	173	# we don't care about absent_references
3644.2.9 by John Arbash Meinel Refactor some code.	174	node_refs, _ = self._check_key_ref_value(key, references, value)
3644.2.2 by John Arbash Meinel the new btree index doesn't have 'absent' keys in its _nodes	175	if key in self._nodes:
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	176	raise errors.BadIndexDuplicateKey(key, self)
4789.28.1 by John Arbash Meinel Use StaticTuple as part of the builder process.	177	self._nodes[key] = static_tuple.StaticTuple(node_refs, value)
3644.2.9 by John Arbash Meinel Refactor some code.	178	if self._nodes_by_key is not None and self._key_length > 1:
3644.2.9 by John Arbash Meinel Refactor some code.	179	self._update_nodes_by_key(key, value, node_refs)
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	180	if len(self._nodes) < self._spill_at:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	181	return
3644.2.9 by John Arbash Meinel Refactor some code.	182	self._spill_mem_keys_to_disk()
	183
	184	def _spill_mem_keys_to_disk(self):
	185	"""Write the in memory keys down to disk to cap memory consumption.
	186
	187	If we already have some keys written to disk, we will combine them so
	188	as to preserve the sorted order. The algorithm for combining uses
	189	powers of two. So on the first spill, write all mem nodes into a
	190	single index. On the second spill, combine the mem nodes with the nodes
	191	on disk to create a 2x sized disk index and get rid of the first index.
	192	On the third spill, create a single new disk index, which will contain
	193	the mem nodes, and preserve the existing 2x sized index. On the fourth,
	194	combine mem with the first and second indexes, creating a new one of
	195	size 4x. On the fifth create a single new one, etc.
	196	"""
4168.3.6 by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().	197	if self._combine_backing_indices:
4168.3.5 by John Arbash Meinel Check that setting _combine_spilled_indices has the expected effect.	198	(new_backing_file, size,
	199	backing_pos) = self._spill_mem_keys_and_combine()
	200	else:
	201	new_backing_file, size = self._spill_mem_keys_without_combining()
	202	# Note: The transport here isn't strictly needed, because we will use
	203	# direct access to the new_backing._file object
6083.1.1 by Jelmer Vernooij Use get_transport_from_{url,path} in more places.	204	new_backing = BTreeGraphIndex(transport.get_transport_from_path('.'),
5273.1.7 by Vincent Ladeuil No more use of the get_transport imported symbol, all uses are through	205	'<temp>', size)
4168.3.5 by John Arbash Meinel Check that setting _combine_spilled_indices has the expected effect.	206	# GC will clean up the file
	207	new_backing._file = new_backing_file
4168.3.6 by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().	208	if self._combine_backing_indices:
4168.3.5 by John Arbash Meinel Check that setting _combine_spilled_indices has the expected effect.	209	if len(self._backing_indices) == backing_pos:
	210	self._backing_indices.append(None)
	211	self._backing_indices[backing_pos] = new_backing
	212	for backing_pos in range(backing_pos):
	213	self._backing_indices[backing_pos] = None
	214	else:
	215	self._backing_indices.append(new_backing)
	216	self._nodes = {}
	217	self._nodes_by_key = None
	218
	219	def _spill_mem_keys_without_combining(self):
	220	return self._write_nodes(self._iter_mem_nodes(), allow_optimize=False)
	221
	222	def _spill_mem_keys_and_combine(self):
4168.3.4 by John Arbash Meinel Restore the ability to spill, but prepare a flag to disable it.	223	iterators_to_combine = [self._iter_mem_nodes()]
	224	pos = -1
	225	for pos, backing in enumerate(self._backing_indices):
	226	if backing is None:
	227	pos -= 1
	228	break
	229	iterators_to_combine.append(backing.iter_all_entries())
	230	backing_pos = pos + 1
	231	new_backing_file, size = \
	232	self._write_nodes(self._iter_smallest(iterators_to_combine),
	233	allow_optimize=False)
4168.3.5 by John Arbash Meinel Check that setting _combine_spilled_indices has the expected effect.	234	return new_backing_file, size, backing_pos
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	235
	236	def add_nodes(self, nodes):
	237	"""Add nodes to the index.
	238
	239	:param nodes: An iterable of (key, node_refs, value) entries to add.
	240	"""
	241	if self.reference_lists:
	242	for (key, value, node_refs) in nodes:
	243	self.add_node(key, value, node_refs)
	244	else:
	245	for (key, value) in nodes:
	246	self.add_node(key, value)
	247
	248	def _iter_mem_nodes(self):
	249	"""Iterate over the nodes held in memory."""
3644.2.8 by John Arbash Meinel Two quick tweaks.	250	nodes = self._nodes
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	251	if self.reference_lists:
3644.2.8 by John Arbash Meinel Two quick tweaks.	252	for key in sorted(nodes):
	253	references, value = nodes[key]
	254	yield self, key, value, references
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	255	else:
3644.2.8 by John Arbash Meinel Two quick tweaks.	256	for key in sorted(nodes):
	257	references, value = nodes[key]
	258	yield self, key, value
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	259
	260	def _iter_smallest(self, iterators_to_combine):
3641.3.9 by John Arbash Meinel Special case around _iter_smallest when we have only	261	if len(iterators_to_combine) == 1:
	262	for value in iterators_to_combine[0]:
	263	yield value
	264	return
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	265	current_values = []
	266	for iterator in iterators_to_combine:
	267	try:
6634.2.1 by Martin Apply 2to3 next fixer and make compatible	268	current_values.append(next(iterator))
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	269	except StopIteration:
	270	current_values.append(None)
	271	last = None
	272	while True:
	273	# Decorate candidates with the value to allow 2.4's min to be used.
	274	candidates = [(item[1][1], item) for item
	275	in enumerate(current_values) if item[1] is not None]
	276	if not len(candidates):
	277	return
	278	selected = min(candidates)
	279	# undecorate back to (pos, node)
	280	selected = selected[1]
	281	if last == selected[1][1]:
	282	raise errors.BadIndexDuplicateKey(last, self)
	283	last = selected[1][1]
	284	# Yield, with self as the index
	285	yield (self,) + selected[1][1:]
	286	pos = selected[0]
	287	try:
6634.2.1 by Martin Apply 2to3 next fixer and make compatible	288	current_values[pos] = next(iterators_to_combine[pos])
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	289	except StopIteration:
	290	current_values[pos] = None
	291
4168.2.1 by John Arbash Meinel Disable optimizations when spilling content to disk.	292	def _add_key(self, string_key, line, rows, allow_optimize=True):
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	293	"""Add a key to the current chunk.
	294
	295	:param string_key: The key to add.
3641.3.11 by John Arbash Meinel Start working on an alternate way to track compressed_chunk state.	296	:param line: The fully serialised key and value.
4168.2.1 by John Arbash Meinel Disable optimizations when spilling content to disk.	297	:param allow_optimize: If set to False, prevent setting the optimize
	298	flag when writing out. This is used by the _spill_mem_keys_to_disk
	299	functionality.
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	300	"""
6015.58.1 by John Arbash Meinel Fix bug #1010339.	301	new_leaf = False
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	302	if rows[-1].writer is None:
	303	# opening a new leaf chunk;
6015.58.1 by John Arbash Meinel Fix bug #1010339.	304	new_leaf = True
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	305	for pos, internal_row in enumerate(rows[:-1]):
	306	# flesh out any internal nodes that are needed to
3641.3.11 by John Arbash Meinel Start working on an alternate way to track compressed_chunk state.	307	# preserve the height of the tree
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	308	if internal_row.writer is None:
	309	length = _PAGE_SIZE
	310	if internal_row.nodes == 0:
	311	length -= _RESERVED_HEADER_BYTES # padded
4168.2.1 by John Arbash Meinel Disable optimizations when spilling content to disk.	312	if allow_optimize:
	313	optimize_for_size = self._optimize_for_size
	314	else:
	315	optimize_for_size = False
3777.5.2 by John Arbash Meinel Change the name to ChunkWriter.set_optimize()	316	internal_row.writer = chunk_writer.ChunkWriter(length, 0,
4168.2.1 by John Arbash Meinel Disable optimizations when spilling content to disk.	317	optimize_for_size=optimize_for_size)
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	318	internal_row.writer.write(_INTERNAL_FLAG)
	319	internal_row.writer.write(_INTERNAL_OFFSET +
	320	str(rows[pos + 1].nodes) + "\n")
	321	# add a new leaf
	322	length = _PAGE_SIZE
	323	if rows[-1].nodes == 0:
	324	length -= _RESERVED_HEADER_BYTES # padded
3777.5.2 by John Arbash Meinel Change the name to ChunkWriter.set_optimize()	325	rows[-1].writer = chunk_writer.ChunkWriter(length,
	326	optimize_for_size=self._optimize_for_size)
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	327	rows[-1].writer.write(_LEAF_FLAG)
3641.3.11 by John Arbash Meinel Start working on an alternate way to track compressed_chunk state.	328	if rows[-1].writer.write(line):
6178.2.9 by Shannon Weyrick A version of the patch, based on suggestions from John Meinel, which detects an empty page differently to avoid false positives.	329	# if we failed to write, despite having an empty page to write to,
	330	# then line is too big. raising the error avoids infinite recursion
	331	# searching for a suitably large page that will not be found.
6015.58.1 by John Arbash Meinel Fix bug #1010339.	332	if new_leaf:
6178.2.9 by Shannon Weyrick A version of the patch, based on suggestions from John Meinel, which detects an empty page differently to avoid false positives.	333	raise errors.BadIndexKey(string_key)
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	334	# this key did not fit in the node:
	335	rows[-1].finish_node()
3641.3.11 by John Arbash Meinel Start working on an alternate way to track compressed_chunk state.	336	key_line = string_key + "\n"
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	337	new_row = True
	338	for row in reversed(rows[:-1]):
	339	# Mark the start of the next node in the node above. If it
4031.3.1 by Frank Aspell Fixing various typos	340	# doesn't fit then propagate upwards until we find one that
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	341	# it does fit into.
3641.3.11 by John Arbash Meinel Start working on an alternate way to track compressed_chunk state.	342	if row.writer.write(key_line):
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	343	row.finish_node()
	344	else:
	345	# We've found a node that can handle the pointer.
	346	new_row = False
	347	break
	348	# If we reached the current root without being able to mark the
	349	# division point, then we need a new root:
	350	if new_row:
	351	# We need a new row
	352	if 'index' in debug.debug_flags:
	353	trace.mutter('Inserting new global row.')
	354	new_row = _InternalBuilderRow()
	355	reserved_bytes = 0
	356	rows.insert(0, new_row)
	357	# This will be padded, hence the -100
	358	new_row.writer = chunk_writer.ChunkWriter(
	359	_PAGE_SIZE - _RESERVED_HEADER_BYTES,
3777.5.2 by John Arbash Meinel Change the name to ChunkWriter.set_optimize()	360	reserved_bytes,
	361	optimize_for_size=self._optimize_for_size)
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	362	new_row.writer.write(_INTERNAL_FLAG)
	363	new_row.writer.write(_INTERNAL_OFFSET +
	364	str(rows[1].nodes - 1) + "\n")
3641.3.11 by John Arbash Meinel Start working on an alternate way to track compressed_chunk state.	365	new_row.writer.write(key_line)
6178.2.4 by Shannon Weyrick raise BadIndexKey instead of skipping	366	self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
3641.3.8 by John Arbash Meinel Move the add_key helper function into a separate func	367
4168.2.1 by John Arbash Meinel Disable optimizations when spilling content to disk.	368	def _write_nodes(self, node_iterator, allow_optimize=True):
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	369	"""Write node_iterator out as a B+Tree.
	370
	371	:param node_iterator: An iterator of sorted nodes. Each node should
	372	match the output given by iter_all_entries.
4168.2.1 by John Arbash Meinel Disable optimizations when spilling content to disk.	373	:param allow_optimize: If set to False, prevent setting the optimize
	374	flag when writing out. This is used by the _spill_mem_keys_to_disk
	375	functionality.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	376	:return: A file handle for a temporary file containing a B+Tree for
	377	the nodes.
	378	"""
	379	# The index rows - rows[0] is the root, rows[1] is the layer under it
	380	# etc.
	381	rows = []
	382	# forward sorted by key. In future we may consider topological sorting,
	383	# at the cost of table scans for direct lookup, or a second index for
	384	# direct lookup
	385	key_count = 0
	386	# A stack with the number of nodes of each size. 0 is the root node
	387	# and must always be 1 (if there are any nodes in the tree).
	388	self.row_lengths = []
	389	# Loop over all nodes adding them to the bottom row
	390	# (rows[-1]). When we finish a chunk in a row,
4031.3.1 by Frank Aspell Fixing various typos	391	# propagate the key that didn't fit (comes after the chunk) to the
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	392	# row above, transitively.
	393	for node in node_iterator:
	394	if key_count == 0:
	395	# First key triggers the first row
	396	rows.append(_LeafBuilderRow())
6178.2.4 by Shannon Weyrick raise BadIndexKey instead of skipping	397	key_count += 1
3641.3.30 by John Arbash Meinel Rename _parse_btree to _btree_serializer	398	string_key, line = _btree_serializer._flatten_node(node,
	399	self.reference_lists)
6178.2.4 by Shannon Weyrick raise BadIndexKey instead of skipping	400	self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	401	for row in reversed(rows):
6619.3.18 by Jelmer Vernooĳ Run 2to3 idioms fixer.	402	pad = (not isinstance(row, _LeafBuilderRow))
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	403	row.finish_node(pad=pad)
	404	lines = [_BTSIGNATURE]
	405	lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
	406	lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
	407	lines.append(_OPTION_LEN + str(key_count) + '\n')
	408	row_lengths = [row.nodes for row in rows]
	409	lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')
4708.1.1 by John Arbash Meinel Use a cStringIO.StringIO for 1-page btree indexes.	410	if row_lengths and row_lengths[-1] > 1:
	411	result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
	412	else:
6621.22.2 by Martin Use BytesIO or StringIO from bzrlib.sixish	413	result = BytesIO()
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	414	result.writelines(lines)
	415	position = sum(map(len, lines))
	416	root_row = True
	417	if position > _RESERVED_HEADER_BYTES:
	418	raise AssertionError("Could not fit the header in the"
	419	" reserved space: %d > %d"
	420	% (position, _RESERVED_HEADER_BYTES))
	421	# write the rows out:
	422	for row in rows:
	423	reserved = _RESERVED_HEADER_BYTES # reserved space for first node
	424	row.spool.flush()
	425	row.spool.seek(0)
	426	# copy nodes to the finalised file.
	427	# Special case the first node as it may be prefixed
	428	node = row.spool.read(_PAGE_SIZE)
	429	result.write(node[reserved:])
4771.3.1 by John Arbash Meinel We don't have to pad 'short' records.	430	if len(node) == _PAGE_SIZE:
	431	result.write("\x00" * (reserved - position))
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	432	position = 0 # Only the root row actually has an offset
	433	copied_len = osutils.pumpfile(row.spool, result)
	434	if copied_len != (row.nodes - 1) * _PAGE_SIZE:
6619.3.18 by Jelmer Vernooĳ Run 2to3 idioms fixer.	435	if not isinstance(row, _LeafBuilderRow):
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	436	raise AssertionError("Incorrect amount of data copied"
	437	" expected: %d, got: %d"
	438	% ((row.nodes - 1) * _PAGE_SIZE,
	439	copied_len))
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	440	result.flush()
	441	size = result.tell()
	442	result.seek(0)
	443	return result, size
	444
	445	def finish(self):
	446	"""Finalise the index.
	447
	448	:return: A file handle for a temporary file containing the nodes added
	449	to the index.
	450	"""
	451	return self._write_nodes(self.iter_all_entries())[0]
	452
	453	def iter_all_entries(self):
	454	"""Iterate over all keys within the index
	455
4343.2.2 by John Arbash Meinel Fix an important doc bug about the api of iter_all_entries()	456	:return: An iterable of (index, key, value, reference_lists). There is
	457	no defined order for the result iteration - it will be in the most
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	458	efficient order for the index (in this case dictionary hash order).
	459	"""
	460	if 'evil' in debug.debug_flags:
	461	trace.mutter_callsite(3,
	462	"iter_all_entries scales with size of history.")
	463	# Doing serial rather than ordered would be faster; but this shouldn't
	464	# be getting called routinely anyway.
3644.2.8 by John Arbash Meinel Two quick tweaks.	465	iterators = [self._iter_mem_nodes()]
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	466	for backing in self._backing_indices:
	467	if backing is not None:
	468	iterators.append(backing.iter_all_entries())
3641.3.9 by John Arbash Meinel Special case around _iter_smallest when we have only	469	if len(iterators) == 1:
	470	return iterators[0]
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	471	return self._iter_smallest(iterators)
	472
	473	def iter_entries(self, keys):
	474	"""Iterate over keys within the index.
	475
	476	:param keys: An iterable providing the keys to be retrieved.
	477	:return: An iterable of (index, key, value, reference_lists). There is no
	478	defined order for the result iteration - it will be in the most
	479	efficient order for the index (keys iteration order in this case).
	480	"""
	481	keys = set(keys)
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	482	# Note: We don't use keys.intersection() here. If you read the C api,
	483	# set.intersection(other) special cases when other is a set and
	484	# will iterate the smaller of the two and lookup in the other.
	485	# It does not do this for any other type (even dict, unlike
	486	# some other set functions.) Since we expect keys is generally <<
	487	# self._nodes, it is faster to iterate over it in a list
	488	# comprehension
	489	nodes = self._nodes
	490	local_keys = [key for key in keys if key in nodes]
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	491	if self.reference_lists:
3847.2.2 by John Arbash Meinel Rather than skipping the difference_update entirely, just restrict it to the intersection keys.	492	for key in local_keys:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	493	node = nodes[key]
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	494	yield self, key, node[1], node[0]
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	495	else:
3847.2.2 by John Arbash Meinel Rather than skipping the difference_update entirely, just restrict it to the intersection keys.	496	for key in local_keys:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	497	node = nodes[key]
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	498	yield self, key, node[1]
3847.2.1 by John Arbash Meinel Shortcut BTreeBuilder.iter_entries when there are no backing indices.	499	# Find things that are in backing indices that have not been handled
	500	# yet.
3847.2.3 by John Arbash Meinel Bring back the shortcut	501	if not self._backing_indices:
3847.2.3 by John Arbash Meinel Bring back the shortcut	502	return # We won't find anything there either
3847.2.2 by John Arbash Meinel Rather than skipping the difference_update entirely, just restrict it to the intersection keys.	503	# Remove all of the keys that we found locally
	504	keys.difference_update(local_keys)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	505	for backing in self._backing_indices:
	506	if backing is None:
	507	continue
	508	if not keys:
	509	return
	510	for node in backing.iter_entries(keys):
	511	keys.remove(node[1])
	512	yield (self,) + node[1:]
	513
	514	def iter_entries_prefix(self, keys):
	515	"""Iterate over keys within the index using prefix matching.
	516
	517	Prefix matching is applied within the tuple of a key, not to within
	518	the bytestring of each key element. e.g. if you have the keys ('foo',
	519	'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
	520	only the former key is returned.
	521
	522	:param keys: An iterable providing the key prefixes to be retrieved.
	523	Each key prefix takes the form of a tuple the length of a key, but
	524	with the last N elements 'None' rather than a regular bytestring.
	525	The first element cannot be 'None'.
	526	:return: An iterable as per iter_all_entries, but restricted to the
	527	keys with a matching prefix to those supplied. No additional keys
	528	will be returned, and every match that is in the index will be
	529	returned.
	530	"""
	531	# XXX: To much duplication with the GraphIndex class; consider finding
	532	# a good place to pull out the actual common logic.
	533	keys = set(keys)
	534	if not keys:
	535	return
	536	for backing in self._backing_indices:
	537	if backing is None:
	538	continue
	539	for node in backing.iter_entries_prefix(keys):
	540	yield (self,) + node[1:]
	541	if self._key_length == 1:
	542	for key in keys:
	543	# sanity check
	544	if key[0] is None:
	545	raise errors.BadIndexKey(key)
	546	if len(key) != self._key_length:
	547	raise errors.BadIndexKey(key)
	548	try:
	549	node = self._nodes[key]
	550	except KeyError:
	551	continue
	552	if self.reference_lists:
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	553	yield self, key, node[1], node[0]
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	554	else:
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	555	yield self, key, node[1]
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	556	return
	557	for key in keys:
	558	# sanity check
	559	if key[0] is None:
	560	raise errors.BadIndexKey(key)
	561	if len(key) != self._key_length:
	562	raise errors.BadIndexKey(key)
	563	# find what it refers to:
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	564	key_dict = self._get_nodes_by_key()
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	565	elements = list(key)
	566	# find the subdict to return
	567	try:
	568	while len(elements) and elements[0] is not None:
	569	key_dict = key_dict[elements[0]]
	570	elements.pop(0)
	571	except KeyError:
	572	# a non-existant lookup.
	573	continue
	574	if len(elements):
	575	dicts = [key_dict]
	576	while dicts:
	577	key_dict = dicts.pop(-1)
	578	# can't be empty or would not exist
6634.2.1 by Martin Apply 2to3 next fixer and make compatible	579	item, value = next(key_dict.iteritems())
6619.3.18 by Jelmer Vernooĳ Run 2to3 idioms fixer.	580	if isinstance(value, dict):
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	581	# push keys
	582	dicts.extend(key_dict.itervalues())
	583	else:
	584	# yield keys
	585	for value in key_dict.itervalues():
5088.1.1 by Jelmer Vernooij Force value to a tuple before concatenating with tuple that contains variables	586	yield (self, ) + tuple(value)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	587	else:
	588	yield (self, ) + key_dict
	589
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	590	def _get_nodes_by_key(self):
	591	if self._nodes_by_key is None:
	592	nodes_by_key = {}
	593	if self.reference_lists:
	594	for key, (references, value) in self._nodes.iteritems():
	595	key_dict = nodes_by_key
	596	for subkey in key[:-1]:
	597	key_dict = key_dict.setdefault(subkey, {})
	598	key_dict[key[-1]] = key, value, references
	599	else:
	600	for key, (references, value) in self._nodes.iteritems():
	601	key_dict = nodes_by_key
	602	for subkey in key[:-1]:
	603	key_dict = key_dict.setdefault(subkey, {})
	604	key_dict[key[-1]] = key, value
	605	self._nodes_by_key = nodes_by_key
	606	return self._nodes_by_key
	607
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	608	def key_count(self):
	609	"""Return an estimate of the number of keys in this index.
	610
	611	For InMemoryGraphIndex the estimate is exact.
	612	"""
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	613	return len(self._nodes) + sum(backing.key_count() for backing in
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	614	self._backing_indices if backing is not None)
	615
	616	def validate(self):
	617	"""In memory index's have no known corruption at the moment."""
	618
	619
5365.5.12 by John Arbash Meinel Make _LeafNode inherit from dict (is-a rather than have-a)	620	class _LeafNode(dict):
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	621	"""A leaf node for a serialised B+Tree index."""
	622
5365.5.23 by John Arbash Meinel A __sizeof__ check that ensure we are getting what we are looking for.	623	__slots__ = ('min_key', 'max_key', '_keys')
4274.1.2 by John Arbash Meinel Add slots to _LeafNode and _InternalNode.	624
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	625	def __init__(self, bytes, key_length, ref_list_length):
	626	"""Parse bytes to create a leaf node object."""
	627	# splitlines mangles the \r delimiters.. don't use it.
4593.4.2 by John Arbash Meinel Removing the min(keys) and max(keys) calls saves 100ms in the inner loop	628	key_list = _btree_serializer._parse_leaf_lines(bytes,
	629	key_length, ref_list_length)
	630	if key_list:
4593.4.4 by John Arbash Meinel Trying out a few more tweaks.	631	self.min_key = key_list[0][0]
	632	self.max_key = key_list[-1][0]
4593.4.2 by John Arbash Meinel Removing the min(keys) and max(keys) calls saves 100ms in the inner loop	633	else:
	634	self.min_key = self.max_key = None
5365.5.12 by John Arbash Meinel Make _LeafNode inherit from dict (is-a rather than have-a)	635	super(_LeafNode, self).__init__(key_list)
5365.5.23 by John Arbash Meinel A __sizeof__ check that ensure we are getting what we are looking for.	636	self._keys = dict(self)
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	637
	638	def all_items(self):
	639	"""Return a sorted list of (key, (value, refs)) items"""
6619.3.18 by Jelmer Vernooĳ Run 2to3 idioms fixer.	640	items = sorted(self.items())
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	641	return items
	642
	643	def all_keys(self):
	644	"""Return a sorted list of all keys."""
6619.3.18 by Jelmer Vernooĳ Run 2to3 idioms fixer.	645	keys = sorted(self.keys())
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	646	return keys
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	647
	648
	649	class _InternalNode(object):
	650	"""An internal node for a serialised B+Tree index."""
	651
4274.1.2 by John Arbash Meinel Add slots to _LeafNode and _InternalNode.	652	__slots__ = ('keys', 'offset')
	653
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	654	def __init__(self, bytes):
	655	"""Parse bytes to create an internal node object."""
	656	# splitlines mangles the \r delimiters.. don't use it.
	657	self.keys = self._parse_lines(bytes.split('\n'))
	658
	659	def _parse_lines(self, lines):
	660	nodes = []
	661	self.offset = int(lines[1][7:])
4789.28.1 by John Arbash Meinel Use StaticTuple as part of the builder process.	662	as_st = static_tuple.StaticTuple.from_sequence
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	663	for line in lines[2:]:
	664	if line == '':
	665	break
6631.3.1 by Martin Run 2to3 map fixer and refactor after	666	# GZ 2017-05-24: Used to intern() each chunk of line as well, need
	667	# to recheck performance and perhaps adapt StaticTuple to adjust.
	668	nodes.append(as_st(line.split(b'\0')).intern())
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	669	return nodes
	670
	671
	672	class BTreeGraphIndex(object):
	673	"""Access to nodes via the standard GraphIndex interface for B+Tree's.
	674
	675	Individual nodes are held in a LRU cache. This holds the root node in
	676	memory except when very large walks are done.
	677	"""
	678
5074.4.1 by John Arbash Meinel Add an offset flag to BTreeGraphIndex.	679	def __init__(self, transport, name, size, unlimited_cache=False,
	680	offset=0):
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	681	"""Create a B+Tree index object on the index name.
	682
	683	:param transport: The transport to read data for the index from.
	684	:param name: The file name of the index on transport.
	685	:param size: Optional size of the index in bytes. This allows
	686	compatibility with the GraphIndex API, as well as ensuring that
	687	the initial read (to read the root node header) can be done
	688	without over-reading even on empty indices, and on small indices
	689	allows single-IO to read the entire index.
4634.71.1 by John Arbash Meinel Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).	690	:param unlimited_cache: If set to True, then instead of using an
	691	LRUCache with size _NODE_CACHE_SIZE, we will use a dict and always
	692	cache all leaf nodes.
5074.4.1 by John Arbash Meinel Add an offset flag to BTreeGraphIndex.	693	:param offset: The start of the btree index data isn't byte 0 of the
	694	file. Instead it starts at some point later.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	695	"""
	696	self._transport = transport
	697	self._name = name
	698	self._size = size
	699	self._file = None
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	700	self._recommended_pages = self._compute_recommended_pages()
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	701	self._root_node = None
5074.4.1 by John Arbash Meinel Add an offset flag to BTreeGraphIndex.	702	self._base_offset = offset
5365.5.18 by John Arbash Meinel Expose the new leaf node factory across the stack.	703	self._leaf_factory = _LeafNode
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	704	# Default max size is 100,000 leave values
	705	self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
4634.71.1 by John Arbash Meinel Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).	706	if unlimited_cache:
	707	self._leaf_node_cache = {}
	708	self._internal_node_cache = {}
	709	else:
	710	self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)
	711	# We use a FIFO here just to prevent possible blowout. However, a
	712	# 300k record btree has only 3k leaf nodes, and only 20 internal
	713	# nodes. A value of 100 scales to ~100100100 = 1M records.
	714	self._internal_node_cache = fifo_cache.FIFOCache(100)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	715	self._key_count = None
	716	self._row_lengths = None
	717	self._row_offsets = None # Start of each row, [-1] is the end
	718
	719	def __eq__(self, other):
	720	"""Equal when self and other were created with the same parameters."""
	721	return (
6619.3.18 by Jelmer Vernooĳ Run 2to3 idioms fixer.	722	isinstance(self, type(other)) and
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	723	self._transport == other._transport and
	724	self._name == other._name and
	725	self._size == other._size)
	726
	727	def __ne__(self, other):
	728	return not self.__eq__(other)
	729
3763.8.12 by John Arbash Meinel Code cleanup.	730	def _get_and_cache_nodes(self, nodes):
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	731	"""Read nodes and cache them in the lru.
	732
	733	The nodes list supplied is sorted and then read from disk, each node
	734	being inserted it into the _node_cache.
	735
	736	Note: Asking for more nodes than the _node_cache can contain will
	737	result in some of the results being immediately discarded, to prevent
	738	this an assertion is raised if more nodes are asked for than are
	739	cachable.
	740
	741	:return: A dict of {node_pos: node}
	742	"""
	743	found = {}
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	744	start_of_leaves = None
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	745	for node_pos, node in self._read_nodes(sorted(nodes)):
	746	if node_pos == 0: # Special case
	747	self._root_node = node
	748	else:
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	749	if start_of_leaves is None:
	750	start_of_leaves = self._row_offsets[-2]
	751	if node_pos < start_of_leaves:
4634.71.2 by John Arbash Meinel If we are going to sometimes use a dict, we have to conform to just the dict interface.	752	self._internal_node_cache[node_pos] = node
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	753	else:
4634.71.2 by John Arbash Meinel If we are going to sometimes use a dict, we have to conform to just the dict interface.	754	self._leaf_node_cache[node_pos] = node
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	755	found[node_pos] = node
	756	return found
	757
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	758	def _compute_recommended_pages(self):
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	759	"""Convert transport's recommended_page_size into btree pages.
	760
	761	recommended_page_size is in bytes, we want to know how many _PAGE_SIZE
	762	pages fit in that length.
	763	"""
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	764	recommended_read = self._transport.recommended_page_size()
	765	recommended_pages = int(math.ceil(recommended_read /
	766	float(_PAGE_SIZE)))
	767	return recommended_pages
	768
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	769	def _compute_total_pages_in_index(self):
	770	"""How many pages are in the index.
	771
	772	If we have read the header we will use the value stored there.
	773	Otherwise it will be computed based on the length of the index.
	774	"""
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	775	if self._size is None:
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	776	raise AssertionError('_compute_total_pages_in_index should not be'
	777	' called when self._size is None')
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	778	if self._root_node is not None:
	779	# This is the number of pages as defined by the header
	780	return self._row_offsets[-1]
	781	# This is the number of pages as defined by the size of the index. They
	782	# should be indentical.
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	783	total_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))
	784	return total_pages
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	785
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	786	def _expand_offsets(self, offsets):
	787	"""Find extra pages to download.
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	788
	789	The idea is that we always want to make big-enough requests (like 64kB
	790	for http), so that we don't waste round trips. So given the entries
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	791	that we already have cached and the new pages being downloaded figure
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	792	out what other pages we might want to read.
	793
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	794	See also doc/developers/btree_index_prefetch.txt for more details.
	795
	796	:param offsets: The offsets to be read
	797	:return: A list of offsets to download
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	798	"""
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	799	if 'index' in debug.debug_flags:
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	800	trace.mutter('expanding: %s\toffsets: %s', self._name, offsets)
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	801
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	802	if len(offsets) >= self._recommended_pages:
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	803	# Don't add more, we are already requesting more than enough
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	804	if 'index' in debug.debug_flags:
	805	trace.mutter(' not expanding large request (%s >= %s)',
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	806	len(offsets), self._recommended_pages)
	807	return offsets
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	808	if self._size is None:
	809	# Don't try anything, because we don't know where the file ends
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	810	if 'index' in debug.debug_flags:
	811	trace.mutter(' not expanding without knowing index size')
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	812	return offsets
	813	total_pages = self._compute_total_pages_in_index()
	814	cached_offsets = self._get_offsets_to_cached_pages()
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	815	# If reading recommended_pages would read the rest of the index, just
	816	# do so.
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	817	if total_pages - len(cached_offsets) <= self._recommended_pages:
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	818	# Read whatever is left
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	819	if cached_offsets:
	820	expanded = [x for x in xrange(total_pages)
	821	if x not in cached_offsets]
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	822	else:
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	823	expanded = range(total_pages)
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	824	if 'index' in debug.debug_flags:
	825	trace.mutter(' reading all unread pages: %s', expanded)
	826	return expanded
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	827
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	828	if self._root_node is None:
	829	# ATM on the first read of the root node of a large index, we don't
	830	# bother pre-reading any other pages. This is because the
	831	# likelyhood of actually reading interesting pages is very low.
	832	# See doc/developers/btree_index_prefetch.txt for a discussion, and
	833	# a possible implementation when we are guessing that the second
	834	# layer index is small
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	835	final_offsets = offsets
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	836	else:
3763.8.14 by John Arbash Meinel Add in a shortcut when we haven't cached much yet.	837	tree_depth = len(self._row_lengths)
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	838	if len(cached_offsets) < tree_depth and len(offsets) == 1:
3763.8.14 by John Arbash Meinel Add in a shortcut when we haven't cached much yet.	839	# We haven't read enough to justify expansion
	840	# If we are only going to read the root node, and 1 leaf node,
	841	# then it isn't worth expanding our request. Once we've read at
	842	# least 2 nodes, then we are probably doing a search, and we
	843	# start expanding our requests.
	844	if 'index' in debug.debug_flags:
	845	trace.mutter(' not expanding on first reads')
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	846	return offsets
	847	final_offsets = self._expand_to_neighbors(offsets, cached_offsets,
	848	total_pages)
	849
	850	final_offsets = sorted(final_offsets)
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	851	if 'index' in debug.debug_flags:
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	852	trace.mutter('expanded: %s', final_offsets)
	853	return final_offsets
	854
	855	def _expand_to_neighbors(self, offsets, cached_offsets, total_pages):
	856	"""Expand requests to neighbors until we have enough pages.
	857
	858	This is called from _expand_offsets after policy has determined that we
	859	want to expand.
	860	We only want to expand requests within a given layer. We cheat a little
	861	bit and assume all requests will be in the same layer. This is true
	862	given the current design, but if it changes this algorithm may perform
	863	oddly.
	864
	865	:param offsets: requested offsets
	866	:param cached_offsets: offsets for pages we currently have cached
	867	:return: A set() of offsets after expansion
	868	"""
	869	final_offsets = set(offsets)
	870	first = end = None
	871	new_tips = set(final_offsets)
	872	while len(final_offsets) < self._recommended_pages and new_tips:
	873	next_tips = set()
	874	for pos in new_tips:
	875	if first is None:
	876	first, end = self._find_layer_first_and_end(pos)
	877	previous = pos - 1
	878	if (previous > 0
	879	and previous not in cached_offsets
	880	and previous not in final_offsets
	881	and previous >= first):
	882	next_tips.add(previous)
	883	after = pos + 1
	884	if (after < total_pages
	885	and after not in cached_offsets
	886	and after not in final_offsets
	887	and after < end):
	888	next_tips.add(after)
	889	# This would keep us from going bigger than
	890	# recommended_pages by only expanding the first offsets.
	891	# However, if we are making a 'wide' request, it is
	892	# reasonable to expand all points equally.
	893	# if len(final_offsets) > recommended_pages:
	894	# break
	895	final_offsets.update(next_tips)
	896	new_tips = next_tips
	897	return final_offsets
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	898
4744.2.6 by John Arbash Meinel Start exposing an GraphIndex.clear_cache() member.	899	def clear_cache(self):
	900	"""Clear out any cached/memoized values.
	901
	902	This can be called at any time, but generally it is used when we have
	903	extracted some information, but don't expect to be requesting any more
	904	from this index.
	905	"""
	906	# Note that we don't touch self._root_node or self._internal_node_cache
	907	# We don't expect either of those to be big, and it can save
	908	# round-trips in the future. We may re-evaluate this if InternalNode
	909	# memory starts to be an issue.
	910	self._leaf_node_cache.clear()
	911
4011.5.3 by Andrew Bennetts Implement and test external_references on GraphIndex and BTreeGraphIndex.	912	def external_references(self, ref_list_num):
	913	if self._root_node is None:
	914	self._get_root_node()
	915	if ref_list_num + 1 > self.node_ref_lists:
	916	raise ValueError('No ref list %d, index has %d ref lists'
	917	% (ref_list_num, self.node_ref_lists))
	918	keys = set()
	919	refs = set()
	920	for node in self.iter_all_entries():
	921	keys.add(node[1])
	922	refs.update(node[3][ref_list_num])
	923	return refs - keys
	924
3763.8.12 by John Arbash Meinel Code cleanup.	925	def _find_layer_first_and_end(self, offset):
	926	"""Find the start/stop nodes for the layer corresponding to offset.
	927
	928	:return: (first, end)
	929	first is the first node in this layer
	930	end is the first node of the next layer
	931	"""
	932	first = end = 0
	933	for roffset in self._row_offsets:
	934	first = end
	935	end = roffset
	936	if offset < roffset:
	937	break
	938	return first, end
	939
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	940	def _get_offsets_to_cached_pages(self):
3763.8.12 by John Arbash Meinel Code cleanup.	941	"""Determine what nodes we already have cached."""
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	942	cached_offsets = set(self._internal_node_cache.keys())
	943	cached_offsets.update(self._leaf_node_cache.keys())
3763.8.12 by John Arbash Meinel Code cleanup.	944	if self._root_node is not None:
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	945	cached_offsets.add(0)
	946	return cached_offsets
3763.8.12 by John Arbash Meinel Code cleanup.	947
	948	def _get_root_node(self):
	949	if self._root_node is None:
	950	# We may not have a root node yet
	951	self._get_internal_nodes([0])
	952	return self._root_node
	953
3641.5.18 by John Arbash Meinel Clean out the global state, good for prototyping and tuning, bad for production code.	954	def _get_nodes(self, cache, node_indexes):
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	955	found = {}
	956	needed = []
	957	for idx in node_indexes:
	958	if idx == 0 and self._root_node is not None:
	959	found[0] = self._root_node
	960	continue
	961	try:
	962	found[idx] = cache[idx]
	963	except KeyError:
	964	needed.append(idx)
3763.8.1 by John Arbash Meinel Playing around with expanding requests for btree index nodes into neighboring nodes.	965	if not needed:
	966	return found
3763.8.15 by John Arbash Meinel Review comments from Martin. Code clarity/variable name/docstring updates.	967	needed = self._expand_offsets(needed)
3763.8.12 by John Arbash Meinel Code cleanup.	968	found.update(self._get_and_cache_nodes(needed))
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	969	return found
	970
	971	def _get_internal_nodes(self, node_indexes):
	972	"""Get a node, from cache or disk.
	973
	974	After getting it, the node will be cached.
	975	"""
3641.5.18 by John Arbash Meinel Clean out the global state, good for prototyping and tuning, bad for production code.	976	return self._get_nodes(self._internal_node_cache, node_indexes)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	977
3805.4.6 by John Arbash Meinel refactor for clarity.	978	def _cache_leaf_values(self, nodes):
3805.4.6 by John Arbash Meinel refactor for clarity.	979	"""Cache directly from key => value, skipping the btree."""
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	980	if self._leaf_value_cache is not None:
3805.4.6 by John Arbash Meinel refactor for clarity.	981	for node in nodes.itervalues():
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	982	for key, value in node.all_items():
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	983	if key in self._leaf_value_cache:
	984	# Don't add the rest of the keys, we've seen this node
	985	# before.
	986	break
	987	self._leaf_value_cache[key] = value
3805.4.6 by John Arbash Meinel refactor for clarity.	988
	989	def _get_leaf_nodes(self, node_indexes):
	990	"""Get a bunch of nodes, from cache or disk."""
	991	found = self._get_nodes(self._leaf_node_cache, node_indexes)
	992	self._cache_leaf_values(found)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	993	return found
	994
	995	def iter_all_entries(self):
	996	"""Iterate over all keys within the index.
	997
	998	:return: An iterable of (index, key, value) or (index, key, value, reference_lists).
	999	The former tuple is used when there are no reference lists in the
	1000	index, making the API compatible with simple key:value index types.
	1001	There is no defined order for the result iteration - it will be in
	1002	the most efficient order for the index.
	1003	"""
	1004	if 'evil' in debug.debug_flags:
	1005	trace.mutter_callsite(3,
	1006	"iter_all_entries scales with size of history.")
	1007	if not self.key_count():
	1008	return
3823.5.2 by John Arbash Meinel It turns out that we read the pack-names file 3-times because	1009	if self._row_offsets[-1] == 1:
	1010	# There is only the root node, and we read that via key_count()
	1011	if self.node_ref_lists:
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	1012	for key, (value, refs) in self._root_node.all_items():
3823.5.2 by John Arbash Meinel It turns out that we read the pack-names file 3-times because	1013	yield (self, key, value, refs)
	1014	else:
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	1015	for key, (value, refs) in self._root_node.all_items():
3823.5.2 by John Arbash Meinel It turns out that we read the pack-names file 3-times because	1016	yield (self, key, value)
	1017	return
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1018	start_of_leaves = self._row_offsets[-2]
	1019	end_of_leaves = self._row_offsets[-1]
3824.1.2 by John Arbash Meinel iter_all_entries() shouldn't need to re-read the page.	1020	needed_offsets = range(start_of_leaves, end_of_leaves)
	1021	if needed_offsets == [0]:
	1022	# Special case when we only have a root node, as we have already
	1023	# read everything
	1024	nodes = [(0, self._root_node)]
	1025	else:
	1026	nodes = self._read_nodes(needed_offsets)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1027	# We iterate strictly in-order so that we can use this function
	1028	# for spilling index builds to disk.
	1029	if self.node_ref_lists:
3824.1.2 by John Arbash Meinel iter_all_entries() shouldn't need to re-read the page.	1030	for _, node in nodes:
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	1031	for key, (value, refs) in node.all_items():
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1032	yield (self, key, value, refs)
	1033	else:
3824.1.2 by John Arbash Meinel iter_all_entries() shouldn't need to re-read the page.	1034	for _, node in nodes:
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	1035	for key, (value, refs) in node.all_items():
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1036	yield (self, key, value)
	1037
	1038	@staticmethod
	1039	def _multi_bisect_right(in_keys, fixed_keys):
	1040	"""Find the positions where each 'in_key' would fit in fixed_keys.
	1041
	1042	This is equivalent to doing "bisect_right" on each in_key into
	1043	fixed_keys
	1044
	1045	:param in_keys: A sorted list of keys to match with fixed_keys
	1046	:param fixed_keys: A sorted list of keys to match against
	1047	:return: A list of (integer position, [key list]) tuples.
	1048	"""
	1049	if not in_keys:
	1050	return []
	1051	if not fixed_keys:
	1052	# no pointers in the fixed_keys list, which means everything must
	1053	# fall to the left.
	1054	return [(0, in_keys)]
	1055
	1056	# TODO: Iterating both lists will generally take M + N steps
	1057	# Bisecting each key will generally take M * log2 N steps.
	1058	# If we had an efficient way to compare, we could pick the method
	1059	# based on which has the fewer number of steps.
	1060	# There is also the argument that bisect_right is a compiled
	1061	# function, so there is even more to be gained.
	1062	# iter_steps = len(in_keys) + len(fixed_keys)
	1063	# bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)
	1064	if len(in_keys) == 1: # Bisect will always be faster for M = 1
5753.2.4 by Jelmer Vernooij Review feedback from John.	1065	return [(bisect.bisect_right(fixed_keys, in_keys[0]), in_keys)]
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1066	# elif bisect_steps < iter_steps:
	1067	# offsets = {}
	1068	# for key in in_keys:
	1069	# offsets.setdefault(bisect_right(fixed_keys, key),
	1070	# []).append(key)
	1071	# return [(o, offsets[o]) for o in sorted(offsets)]
	1072	in_keys_iter = iter(in_keys)
	1073	fixed_keys_iter = enumerate(fixed_keys)
6634.2.1 by Martin Apply 2to3 next fixer and make compatible	1074	cur_in_key = next(in_keys_iter)
	1075	cur_fixed_offset, cur_fixed_key = next(fixed_keys_iter)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1076
	1077	class InputDone(Exception): pass
	1078	class FixedDone(Exception): pass
	1079
	1080	output = []
	1081	cur_out = []
	1082
	1083	# TODO: Another possibility is that rather than iterating on each side,
	1084	# we could use a combination of bisecting and iterating. For
	1085	# example, while cur_in_key < fixed_key, bisect to find its
	1086	# point, then iterate all matching keys, then bisect (restricted
	1087	# to only the remainder) for the next one, etc.
	1088	try:
	1089	while True:
	1090	if cur_in_key < cur_fixed_key:
	1091	cur_keys = []
	1092	cur_out = (cur_fixed_offset, cur_keys)
	1093	output.append(cur_out)
	1094	while cur_in_key < cur_fixed_key:
	1095	cur_keys.append(cur_in_key)
	1096	try:
6634.2.1 by Martin Apply 2to3 next fixer and make compatible	1097	cur_in_key = next(in_keys_iter)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1098	except StopIteration:
	1099	raise InputDone
	1100	# At this point cur_in_key must be >= cur_fixed_key
	1101	# step the cur_fixed_key until we pass the cur key, or walk off
	1102	# the end
	1103	while cur_in_key >= cur_fixed_key:
	1104	try:
6634.2.1 by Martin Apply 2to3 next fixer and make compatible	1105	cur_fixed_offset, cur_fixed_key = next(fixed_keys_iter)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1106	except StopIteration:
	1107	raise FixedDone
	1108	except InputDone:
	1109	# We consumed all of the input, nothing more to do
	1110	pass
	1111	except FixedDone:
	1112	# There was some input left, but we consumed all of fixed, so we
	1113	# have to add one more for the tail
	1114	cur_keys = [cur_in_key]
	1115	cur_keys.extend(in_keys_iter)
	1116	cur_out = (len(fixed_keys), cur_keys)
	1117	output.append(cur_out)
	1118	return output
	1119
4593.4.5 by John Arbash Meinel Start adding some tests.	1120	def _walk_through_internal_nodes(self, keys):
	1121	"""Take the given set of keys, and find the corresponding LeafNodes.
	1122
	1123	:param keys: An unsorted iterable of keys to search for
	1124	:return: (nodes, index_and_keys)
	1125	nodes is a dict mapping {index: LeafNode}
	1126	keys_at_index is a list of tuples of [(index, [keys for Leaf])]
	1127	"""
	1128	# 6 seconds spent in miss_torture using the sorted() line.
	1129	# Even with out of order disk IO it seems faster not to sort it when
	1130	# large queries are being made.
	1131	keys_at_index = [(0, sorted(keys))]
	1132
	1133	for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):
	1134	node_indexes = [idx for idx, s_keys in keys_at_index]
	1135	nodes = self._get_internal_nodes(node_indexes)
	1136
	1137	next_nodes_and_keys = []
	1138	for node_index, sub_keys in keys_at_index:
	1139	node = nodes[node_index]
	1140	positions = self._multi_bisect_right(sub_keys, node.keys)
	1141	node_offset = next_row_start + node.offset
	1142	next_nodes_and_keys.extend([(node_offset + pos, s_keys)
	1143	for pos, s_keys in positions])
	1144	keys_at_index = next_nodes_and_keys
	1145	# We should now be at the _LeafNodes
	1146	node_indexes = [idx for idx, s_keys in keys_at_index]
	1147
	1148	# TODO: We may not want to always read all the nodes in one
	1149	# big go. Consider setting a max size on this.
	1150	nodes = self._get_leaf_nodes(node_indexes)
	1151	return nodes, keys_at_index
	1152
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1153	def iter_entries(self, keys):
	1154	"""Iterate over keys within the index.
	1155
	1156	:param keys: An iterable providing the keys to be retrieved.
	1157	:return: An iterable as per iter_all_entries, but restricted to the
	1158	keys supplied. No additional keys will be returned, and every
	1159	key supplied that is in the index will be returned.
	1160	"""
	1161	# 6 seconds spent in miss_torture using the sorted() line.
	1162	# Even with out of order disk IO it seems faster not to sort it when
	1163	# large queries are being made.
	1164	# However, now that we are doing multi-way bisecting, we need the keys
	1165	# in sorted order anyway. We could change the multi-way code to not
	1166	# require sorted order. (For example, it bisects for the first node,
	1167	# does an in-order search until a key comes before the current point,
	1168	# which it then bisects for, etc.)
	1169	keys = frozenset(keys)
	1170	if not keys:
	1171	return
	1172
	1173	if not self.key_count():
	1174	return
	1175
	1176	needed_keys = []
	1177	if self._leaf_value_cache is None:
	1178	needed_keys = keys
	1179	else:
	1180	for key in keys:
	1181	value = self._leaf_value_cache.get(key, None)
	1182	if value is not None:
	1183	# This key is known not to be here, skip it
	1184	value, refs = value
	1185	if self.node_ref_lists:
	1186	yield (self, key, value, refs)
	1187	else:
	1188	yield (self, key, value)
	1189	else:
	1190	needed_keys.append(key)
	1191
	1192	last_key = None
	1193	needed_keys = keys
	1194	if not needed_keys:
	1195	return
4593.4.5 by John Arbash Meinel Start adding some tests.	1196	nodes, nodes_and_keys = self._walk_through_internal_nodes(needed_keys)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1197	for node_index, sub_keys in nodes_and_keys:
	1198	if not sub_keys:
	1199	continue
	1200	node = nodes[node_index]
	1201	for next_sub_key in sub_keys:
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	1202	if next_sub_key in node:
	1203	value, refs = node[next_sub_key]
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1204	if self.node_ref_lists:
	1205	yield (self, next_sub_key, value, refs)
	1206	else:
	1207	yield (self, next_sub_key, value)
	1208
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	1209	def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
4593.4.11 by John Arbash Meinel Snapshot the work in progress.	1210	"""Find the parent_map information for the set of keys.
	1211
	1212	This populates the parent_map dict and missing_keys set based on the
	1213	queried keys. It also can fill out an arbitrary number of parents that
	1214	it finds while searching for the supplied keys.
	1215
	1216	It is unlikely that you want to call this directly. See
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	1217	"CombinedGraphIndex.find_ancestry()" for a more appropriate API.
4593.4.11 by John Arbash Meinel Snapshot the work in progress.	1218
	1219	:param keys: A keys whose ancestry we want to return
	1220	Every key will either end up in 'parent_map' or 'missing_keys'.
4593.4.1 by John Arbash Meinel Implement a function on btree that inlines the get_parent_map loop.	1221	:param ref_list_num: This index in the ref_lists is the parents we
	1222	care about.
4593.4.11 by John Arbash Meinel Snapshot the work in progress.	1223	:param parent_map: {key: parent_keys} for keys that are present in this
	1224	index. This may contain more entries than were in 'keys', that are
	1225	reachable ancestors of the keys requested.
4593.4.5 by John Arbash Meinel Start adding some tests.	1226	:param missing_keys: keys which are known to be missing in this index.
4593.4.11 by John Arbash Meinel Snapshot the work in progress.	1227	This may include parents that were not directly requested, but we
	1228	were able to determine that they are not present in this index.
	1229	:return: search_keys parents that were found but not queried to know
	1230	if they are missing or present. Callers can re-query this index for
	1231	those keys, and they will be placed into parent_map or missing_keys
4593.4.1 by John Arbash Meinel Implement a function on btree that inlines the get_parent_map loop.	1232	"""
	1233	if not self.key_count():
	1234	# We use key_count() to trigger reading the root node and
	1235	# determining info about this BTreeGraphIndex
	1236	# If we don't have any keys, then everything is missing
4593.4.11 by John Arbash Meinel Snapshot the work in progress.	1237	missing_keys.update(keys)
	1238	return set()
4593.4.1 by John Arbash Meinel Implement a function on btree that inlines the get_parent_map loop.	1239	if ref_list_num >= self.node_ref_lists:
	1240	raise ValueError('No ref list %d, index has %d ref lists'
	1241	% (ref_list_num, self.node_ref_lists))
	1242
	1243	# The main trick we are trying to accomplish is that when we find a
	1244	# key listing its parents, we expect that the parent key is also likely
	1245	# to sit on the same page. Allowing us to expand parents quickly
	1246	# without suffering the full stack of bisecting, etc.
4593.4.5 by John Arbash Meinel Start adding some tests.	1247	nodes, nodes_and_keys = self._walk_through_internal_nodes(keys)
4593.4.5 by John Arbash Meinel Start adding some tests.	1248
4593.4.1 by John Arbash Meinel Implement a function on btree that inlines the get_parent_map loop.	1249	# These are parent keys which could not be immediately resolved on the
	1250	# page where the child was present. Note that we may already be
	1251	# searching for that key, and it may actually be present [or known
	1252	# missing] on one of the other pages we are reading.
	1253	# TODO:
	1254	# We could try searching for them in the immediate previous or next
	1255	# page. If they occur "later" we could put them in a pending lookup
	1256	# set, and then for each node we read thereafter we could check to
	1257	# see if they are present.
	1258	# However, we don't know the impact of keeping this list of things
	1259	# that I'm going to search for every node I come across from here on
	1260	# out.
	1261	# It doesn't handle the case when the parent key is missing on a
	1262	# page that we don't read. So we already have to handle being
	1263	# re-entrant for that.
	1264	# Since most keys contain a date string, they are more likely to be
	1265	# found earlier in the file than later, but we would know that right
	1266	# away (key < min_key), and wouldn't keep searching it on every other
	1267	# page that we read.
	1268	# Mostly, it is an idea, one which should be benchmarked.
	1269	parents_not_on_page = set()
	1270
	1271	for node_index, sub_keys in nodes_and_keys:
	1272	if not sub_keys:
	1273	continue
	1274	# sub_keys is all of the keys we are looking for that should exist
	1275	# on this page, if they aren't here, then they won't be found
	1276	node = nodes[node_index]
	1277	parents_to_check = set()
	1278	for next_sub_key in sub_keys:
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	1279	if next_sub_key not in node:
4593.4.5 by John Arbash Meinel Start adding some tests.	1280	# This one is just not present in the index at all
	1281	missing_keys.add(next_sub_key)
	1282	else:
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	1283	value, refs = node[next_sub_key]
4593.4.1 by John Arbash Meinel Implement a function on btree that inlines the get_parent_map loop.	1284	parent_keys = refs[ref_list_num]
	1285	parent_map[next_sub_key] = parent_keys
	1286	parents_to_check.update(parent_keys)
	1287	# Don't look for things we've already found
	1288	parents_to_check = parents_to_check.difference(parent_map)
4593.4.4 by John Arbash Meinel Trying out a few more tweaks.	1289	# this can be used to test the benefit of having the check loop
	1290	# inlined.
	1291	# parents_not_on_page.update(parents_to_check)
	1292	# continue
4593.4.1 by John Arbash Meinel Implement a function on btree that inlines the get_parent_map loop.	1293	while parents_to_check:
	1294	next_parents_to_check = set()
	1295	for key in parents_to_check:
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	1296	if key in node:
	1297	value, refs = node[key]
4593.4.1 by John Arbash Meinel Implement a function on btree that inlines the get_parent_map loop.	1298	parent_keys = refs[ref_list_num]
	1299	parent_map[key] = parent_keys
	1300	next_parents_to_check.update(parent_keys)
	1301	else:
4593.4.4 by John Arbash Meinel Trying out a few more tweaks.	1302	# This parent either is genuinely missing, or should be
	1303	# found on another page. Perf test whether it is better
	1304	# to check if this node should fit on this page or not.
	1305	# in the 'everything-in-one-pack' scenario, this not
	1306	# doing the check is 237ms vs 243ms.
	1307	# So slightly better, but I assume the standard 'lots
	1308	# of packs' is going to show a reasonable improvement
	1309	# from the check, because it avoids 'going around
	1310	# again' for everything that is in another index
4593.4.5 by John Arbash Meinel Start adding some tests.	1311	# parents_not_on_page.add(key)
	1312	# Missing for some reason
	1313	if key < node.min_key:
	1314	# in the case of bzr.dev, 3.4k/5.3k misses are
	1315	# 'earlier' misses (65%)
	1316	parents_not_on_page.add(key)
	1317	elif key > node.max_key:
	1318	# This parent key would be present on a different
	1319	# LeafNode
	1320	parents_not_on_page.add(key)
	1321	else:
	1322	# assert key != node.min_key and key != node.max_key
	1323	# If it was going to be present, it would be on
	1324	# this page, so mark it missing.
	1325	missing_keys.add(key)
4593.4.1 by John Arbash Meinel Implement a function on btree that inlines the get_parent_map loop.	1326	parents_to_check = next_parents_to_check.difference(parent_map)
4593.4.4 by John Arbash Meinel Trying out a few more tweaks.	1327	# Might want to do another .difference() from missing_keys
4593.4.1 by John Arbash Meinel Implement a function on btree that inlines the get_parent_map loop.	1328	# parents_not_on_page could have been found on a different page, or be
	1329	# known to be missing. So cull out everything that has already been
	1330	# found.
4593.4.5 by John Arbash Meinel Start adding some tests.	1331	search_keys = parents_not_on_page.difference(
4593.4.1 by John Arbash Meinel Implement a function on btree that inlines the get_parent_map loop.	1332	parent_map).difference(missing_keys)
4593.4.5 by John Arbash Meinel Start adding some tests.	1333	return search_keys
4593.4.1 by John Arbash Meinel Implement a function on btree that inlines the get_parent_map loop.	1334
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1335	def iter_entries_prefix(self, keys):
	1336	"""Iterate over keys within the index using prefix matching.
	1337
	1338	Prefix matching is applied within the tuple of a key, not to within
	1339	the bytestring of each key element. e.g. if you have the keys ('foo',
	1340	'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
	1341	only the former key is returned.
	1342
	1343	WARNING: Note that this method currently causes a full index parse
	1344	unconditionally (which is reasonably appropriate as it is a means for
	1345	thunking many small indices into one larger one and still supplies
	1346	iter_all_entries at the thunk layer).
	1347
	1348	:param keys: An iterable providing the key prefixes to be retrieved.
	1349	Each key prefix takes the form of a tuple the length of a key, but
	1350	with the last N elements 'None' rather than a regular bytestring.
	1351	The first element cannot be 'None'.
	1352	:return: An iterable as per iter_all_entries, but restricted to the
	1353	keys with a matching prefix to those supplied. No additional keys
	1354	will be returned, and every match that is in the index will be
	1355	returned.
	1356	"""
	1357	keys = sorted(set(keys))
	1358	if not keys:
	1359	return
	1360	# Load if needed to check key lengths
	1361	if self._key_count is None:
	1362	self._get_root_node()
	1363	# TODO: only access nodes that can satisfy the prefixes we are looking
	1364	# for. For now, to meet API usage (as this function is not used by
6622.1.34 by Jelmer Vernooĳ Rename brzlib => breezy.	1365	# current breezy) just suck the entire index and iterate in memory.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1366	nodes = {}
	1367	if self.node_ref_lists:
	1368	if self._key_length == 1:
	1369	for _1, key, value, refs in self.iter_all_entries():
	1370	nodes[key] = value, refs
	1371	else:
	1372	nodes_by_key = {}
	1373	for _1, key, value, refs in self.iter_all_entries():
	1374	key_value = key, value, refs
	1375	# For a key of (foo, bar, baz) create
	1376	# _nodes_by_key[foo][bar][baz] = key_value
	1377	key_dict = nodes_by_key
	1378	for subkey in key[:-1]:
	1379	key_dict = key_dict.setdefault(subkey, {})
	1380	key_dict[key[-1]] = key_value
	1381	else:
	1382	if self._key_length == 1:
	1383	for _1, key, value in self.iter_all_entries():
	1384	nodes[key] = value
	1385	else:
	1386	nodes_by_key = {}
	1387	for _1, key, value in self.iter_all_entries():
	1388	key_value = key, value
	1389	# For a key of (foo, bar, baz) create
	1390	# _nodes_by_key[foo][bar][baz] = key_value
	1391	key_dict = nodes_by_key
	1392	for subkey in key[:-1]:
	1393	key_dict = key_dict.setdefault(subkey, {})
	1394	key_dict[key[-1]] = key_value
	1395	if self._key_length == 1:
	1396	for key in keys:
	1397	# sanity check
	1398	if key[0] is None:
	1399	raise errors.BadIndexKey(key)
	1400	if len(key) != self._key_length:
	1401	raise errors.BadIndexKey(key)
	1402	try:
	1403	if self.node_ref_lists:
	1404	value, node_refs = nodes[key]
	1405	yield self, key, value, node_refs
	1406	else:
	1407	yield self, key, nodes[key]
	1408	except KeyError:
	1409	pass
	1410	return
	1411	for key in keys:
	1412	# sanity check
	1413	if key[0] is None:
	1414	raise errors.BadIndexKey(key)
	1415	if len(key) != self._key_length:
	1416	raise errors.BadIndexKey(key)
	1417	# find what it refers to:
	1418	key_dict = nodes_by_key
	1419	elements = list(key)
	1420	# find the subdict whose contents should be returned.
	1421	try:
	1422	while len(elements) and elements[0] is not None:
	1423	key_dict = key_dict[elements[0]]
	1424	elements.pop(0)
	1425	except KeyError:
	1426	# a non-existant lookup.
	1427	continue
	1428	if len(elements):
	1429	dicts = [key_dict]
1430	while dicts:
1431	key_dict = dicts.pop(-1)
1432	# can't be empty or would not exist
6634.2.1 by Martin Apply 2to3 next fixer and make compatible	1433	item, value = next(key_dict.iteritems())
6619.3.18 by Jelmer Vernooĳ Run 2to3 idioms fixer.	1434	if isinstance(value, dict):
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1435	# push keys
	1436	dicts.extend(key_dict.itervalues())
	1437	else:
	1438	# yield keys
	1439	for value in key_dict.itervalues():
	1440	# each value is the key:value:node refs tuple
	1441	# ready to yield.
	1442	yield (self, ) + value
	1443	else:
	1444	# the last thing looked up was a terminal element
	1445	yield (self, ) + key_dict
	1446
	1447	def key_count(self):
	1448	"""Return an estimate of the number of keys in this index.
	1449
	1450	For BTreeGraphIndex the estimate is exact as it is contained in the
	1451	header.
	1452	"""
	1453	if self._key_count is None:
	1454	self._get_root_node()
	1455	return self._key_count
	1456
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	1457	def _compute_row_offsets(self):
	1458	"""Fill out the _row_offsets attribute based on _row_lengths."""
	1459	offsets = []
	1460	row_offset = 0
	1461	for row in self._row_lengths:
	1462	offsets.append(row_offset)
	1463	row_offset += row
	1464	offsets.append(row_offset)
	1465	self._row_offsets = offsets
	1466
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1467	def _parse_header_from_bytes(self, bytes):
	1468	"""Parse the header from a region of bytes.
	1469
	1470	:param bytes: The data to parse.
	1471	:return: An offset, data tuple such as readv yields, for the unparsed
	1472	data. (which may be of length 0).
	1473	"""
	1474	signature = bytes[0:len(self._signature())]
	1475	if not signature == self._signature():
	1476	raise errors.BadIndexFormatSignature(self._name, BTreeGraphIndex)
	1477	lines = bytes[len(self._signature()):].splitlines()
	1478	options_line = lines[0]
	1479	if not options_line.startswith(_OPTION_NODE_REFS):
	1480	raise errors.BadIndexOptions(self)
	1481	try:
	1482	self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
	1483	except ValueError:
	1484	raise errors.BadIndexOptions(self)
	1485	options_line = lines[1]
	1486	if not options_line.startswith(_OPTION_KEY_ELEMENTS):
	1487	raise errors.BadIndexOptions(self)
	1488	try:
	1489	self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
	1490	except ValueError:
	1491	raise errors.BadIndexOptions(self)
	1492	options_line = lines[2]
	1493	if not options_line.startswith(_OPTION_LEN):
	1494	raise errors.BadIndexOptions(self)
	1495	try:
	1496	self._key_count = int(options_line[len(_OPTION_LEN):])
	1497	except ValueError:
	1498	raise errors.BadIndexOptions(self)
	1499	options_line = lines[3]
	1500	if not options_line.startswith(_OPTION_ROW_LENGTHS):
	1501	raise errors.BadIndexOptions(self)
	1502	try:
6631.3.1 by Martin Run 2to3 map fixer and refactor after	1503	self._row_lengths = [int(length) for length in
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1504	options_line[len(_OPTION_ROW_LENGTHS):].split(',')
6631.3.1 by Martin Run 2to3 map fixer and refactor after	1505	if length]
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1506	except ValueError:
	1507	raise errors.BadIndexOptions(self)
3763.8.7 by John Arbash Meinel A bit of doc updates, start putting in tests for current behavior.	1508	self._compute_row_offsets()
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1509
	1510	# calculate the bytes we have processed
	1511	header_end = (len(signature) + sum(map(len, lines[0:4])) + 4)
	1512	return header_end, bytes[header_end:]
	1513
	1514	def _read_nodes(self, nodes):
	1515	"""Read some nodes from disk into the LRU cache.
	1516
	1517	This performs a readv to get the node data into memory, and parses each
3868.1.1 by Martin Pool merge John's patch to avoid re-reading pack-names file	1518	node, then yields it to the caller. The nodes are requested in the
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1519	supplied order. If possible doing sort() on the list before requesting
	1520	a read may improve performance.
	1521
	1522	:param nodes: The nodes to read. 0 - first node, 1 - second node etc.
	1523	:return: None
	1524	"""
3868.1.1 by Martin Pool merge John's patch to avoid re-reading pack-names file	1525	# may be the byte string of the whole file
3823.5.2 by John Arbash Meinel It turns out that we read the pack-names file 3-times because	1526	bytes = None
3868.1.1 by Martin Pool merge John's patch to avoid re-reading pack-names file	1527	# list of (offset, length) regions of the file that should, evenually
	1528	# be read in to data_ranges, either from 'bytes' or from the transport
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1529	ranges = []
5074.4.1 by John Arbash Meinel Add an offset flag to BTreeGraphIndex.	1530	base_offset = self._base_offset
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1531	for index in nodes:
5074.4.1 by John Arbash Meinel Add an offset flag to BTreeGraphIndex.	1532	offset = (index * _PAGE_SIZE)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1533	size = _PAGE_SIZE
	1534	if index == 0:
	1535	# Root node - special case
	1536	if self._size:
	1537	size = min(_PAGE_SIZE, self._size)
	1538	else:
3824.1.1 by John Arbash Meinel Fix _read_nodes() to only issue a single read if there is no known size.	1539	# The only case where we don't know the size, is for very
	1540	# small indexes. So we read the whole thing
3823.5.2 by John Arbash Meinel It turns out that we read the pack-names file 3-times because	1541	bytes = self._transport.get_bytes(self._name)
5074.4.1 by John Arbash Meinel Add an offset flag to BTreeGraphIndex.	1542	num_bytes = len(bytes)
	1543	self._size = num_bytes - base_offset
3868.1.1 by Martin Pool merge John's patch to avoid re-reading pack-names file	1544	# the whole thing should be parsed out of 'bytes'
5074.4.1 by John Arbash Meinel Add an offset flag to BTreeGraphIndex.	1545	ranges = [(start, min(_PAGE_SIZE, num_bytes - start))
	1546	for start in xrange(base_offset, num_bytes, _PAGE_SIZE)]
3823.5.2 by John Arbash Meinel It turns out that we read the pack-names file 3-times because	1547	break
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1548	else:
3763.8.6 by John Arbash Meinel Fix the logic a bit, and add a bit more tweaking opportunities	1549	if offset > self._size:
	1550	raise AssertionError('tried to read past the end'
	1551	' of the file %s > %s'
	1552	% (offset, self._size))
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1553	size = min(size, self._size - offset)
5074.4.1 by John Arbash Meinel Add an offset flag to BTreeGraphIndex.	1554	ranges.append((base_offset + offset, size))
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1555	if not ranges:
	1556	return
3868.1.1 by Martin Pool merge John's patch to avoid re-reading pack-names file	1557	elif bytes is not None:
	1558	# already have the whole file
5074.4.1 by John Arbash Meinel Add an offset flag to BTreeGraphIndex.	1559	data_ranges = [(start, bytes[start:start+size])
	1560	for start, size in ranges]
3824.1.1 by John Arbash Meinel Fix _read_nodes() to only issue a single read if there is no known size.	1561	elif self._file is None:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1562	data_ranges = self._transport.readv(self._name, ranges)
	1563	else:
	1564	data_ranges = []
	1565	for offset, size in ranges:
	1566	self._file.seek(offset)
	1567	data_ranges.append((offset, self._file.read(size)))
	1568	for offset, data in data_ranges:
5074.4.1 by John Arbash Meinel Add an offset flag to BTreeGraphIndex.	1569	offset -= base_offset
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1570	if offset == 0:
	1571	# extract the header
	1572	offset, data = self._parse_header_from_bytes(data)
	1573	if len(data) == 0:
	1574	continue
	1575	bytes = zlib.decompress(data)
	1576	if bytes.startswith(_LEAF_FLAG):
5365.5.18 by John Arbash Meinel Expose the new leaf node factory across the stack.	1577	node = self._leaf_factory(bytes, self._key_length,
	1578	self.node_ref_lists)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1579	elif bytes.startswith(_INTERNAL_FLAG):
	1580	node = _InternalNode(bytes)
	1581	else:
	1582	raise AssertionError("Unknown node type for %r" % bytes)
	1583	yield offset / _PAGE_SIZE, node
	1584
	1585	def _signature(self):
	1586	"""The file signature for this index type."""
	1587	return _BTSIGNATURE
	1588
	1589	def validate(self):
	1590	"""Validate that everything in the index can be accessed."""
	1591	# just read and parse every node.
	1592	self._get_root_node()
	1593	if len(self._row_lengths) > 1:
	1594	start_node = self._row_offsets[1]
	1595	else:
	1596	# We shouldn't be reading anything anyway
	1597	start_node = 1
	1598	node_end = self._row_offsets[-1]
	1599	for node in self._read_nodes(range(start_node, node_end)):
	1600	pass
	1601
	1602
5365.5.18 by John Arbash Meinel Expose the new leaf node factory across the stack.	1603	_gcchk_factory = _LeafNode
	1604
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	1605	try:
6622.1.34 by Jelmer Vernooĳ Rename brzlib => breezy.	1606	from breezy import _btree_serializer_pyx as _btree_serializer
5365.5.18 by John Arbash Meinel Expose the new leaf node factory across the stack.	1607	_gcchk_factory = _btree_serializer._parse_into_chk
6619.3.2 by Jelmer Vernooĳ Apply 2to3 except fix.	1608	except ImportError as e:
4574.3.8 by Martin Pool Only mutter extension load errors when they occur, and record for later	1609	osutils.failed_to_load_extension(e)
6622.1.34 by Jelmer Vernooĳ Rename brzlib => breezy.	1610	from breezy import _btree_serializer_py as _btree_serializer