/brz/remove-bazaar : contents of bzrlib/groupcompress.py at revision 3735.2.151

: (revision 3735.2.151)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	1	# Copyright (C) 2008, 2009 Canonical Ltd
	2	#
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	3	# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	12	#
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	16
	17	"""Core compression logic for compressing streams of related files."""
	18
0.17.13 by Robert Collins Do not output copy instructions which take more to encode than a fresh insert. (But do not refer to those insertions when finding ranges to copy: they are not interesting).	19	from itertools import izip
0.17.5 by Robert Collins nograph tests completely passing.	20	from cStringIO import StringIO
0.25.3 by John Arbash Meinel Add a encode/decode base128 functions.	21	import struct
0.17.5 by Robert Collins nograph tests completely passing.	22	import zlib
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	23	try:
	24	import pylzma
	25	except ImportError:
	26	pylzma = None
0.17.5 by Robert Collins nograph tests completely passing.	27
0.17.4 by Robert Collins Annotate.	28	from bzrlib import (
0.17.4 by Robert Collins Annotate.	29	annotate,
0.17.5 by Robert Collins nograph tests completely passing.	30	debug,
0.17.4 by Robert Collins Annotate.	31	diff,
0.17.5 by Robert Collins nograph tests completely passing.	32	errors,
0.17.4 by Robert Collins Annotate.	33	graph as _mod_graph,
0.20.2 by John Arbash Meinel Teach groupcompress about 'chunked' encoding	34	osutils,
0.17.4 by Robert Collins Annotate.	35	pack,
	36	patiencediff,
	37	)
	38	from bzrlib.graph import Graph
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	39	from bzrlib.knit import _DirectPackAccess
0.17.2 by Robert Collins Core proof of concept working.	40	from bzrlib.osutils import (
	41	contains_whitespace,
	42	sha_string,
	43	split_lines,
	44	)
0.17.21 by Robert Collins Update groupcompress to bzrlib 1.10.	45	from bzrlib.btree_index import BTreeBuilder
0.17.24 by Robert Collins Add a group cache to decompression, 5 times faster than knit at decompression when accessing everything in a group.	46	from bzrlib.lru_cache import LRUSizeCache
0.17.9 by Robert Collins Initial stab at repository format support.	47	from bzrlib.tsort import topo_sort
0.17.2 by Robert Collins Core proof of concept working.	48	from bzrlib.versionedfile import (
0.17.5 by Robert Collins nograph tests completely passing.	49	adapter_registry,
0.17.5 by Robert Collins nograph tests completely passing.	50	AbsentContentFactory,
0.20.5 by John Arbash Meinel Finish the Fulltext => Chunked conversions so that we work in the more-efficient Chunks.	51	ChunkedContentFactory,
0.17.2 by Robert Collins Core proof of concept working.	52	FulltextContentFactory,
	53	VersionedFiles,
	54	)
	55
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	56	_USE_LZMA = False and (pylzma is not None)
3735.32.1 by John Arbash Meinel Fix the VF WalkingContent checks.	57	_NO_LABELS = False
0.25.16 by John Arbash Meinel Make sure we don't inter-pack to GCCHKBig repos.	58	_FAST = False
0.17.2 by Robert Collins Core proof of concept working.	59
0.25.3 by John Arbash Meinel Add a encode/decode base128 functions.	60	def encode_base128_int(val):
	61	"""Convert an integer into a 7-bit lsb encoding."""
	62	bytes = []
	63	count = 0
	64	while val >= 0x80:
	65	bytes.append(chr((val \| 0x80) & 0xFF))
	66	val >>= 7
	67	bytes.append(chr(val))
	68	return ''.join(bytes)
	69
	70
	71	def decode_base128_int(bytes):
	72	"""Decode an integer from a 7-bit lsb encoding."""
	73	offset = 0
	74	val = 0
	75	shift = 0
	76	bval = ord(bytes[offset])
	77	while bval >= 0x80:
	78	val \|= (bval & 0x7F) << shift
	79	shift += 7
	80	offset += 1
	81	bval = ord(bytes[offset])
	82	val \|= bval << shift
	83	offset += 1
	84	return val, offset
	85
	86
0.20.11 by John Arbash Meinel start experimenting with gc-optimal ordering.	87	def sort_gc_optimal(parent_map):
3735.31.14 by John Arbash Meinel Change the gc-optimal to 'groupcompress'	88	"""Sort and group the keys in parent_map into groupcompress order.
0.20.11 by John Arbash Meinel start experimenting with gc-optimal ordering.	89
3735.31.14 by John Arbash Meinel Change the gc-optimal to 'groupcompress'	90	groupcompress is defined (currently) as reverse-topological order, grouped by
0.20.11 by John Arbash Meinel start experimenting with gc-optimal ordering.	91	the key prefix.
	92
	93	:return: A sorted-list of keys
	94	"""
3735.31.14 by John Arbash Meinel Change the gc-optimal to 'groupcompress'	95	# groupcompress ordering is approximately reverse topological,
0.20.11 by John Arbash Meinel start experimenting with gc-optimal ordering.	96	# properly grouped by file-id.
0.20.23 by John Arbash Meinel Add a progress indicator for chk pages.	97	per_prefix_map = {}
0.20.11 by John Arbash Meinel start experimenting with gc-optimal ordering.	98	for item in parent_map.iteritems():
	99	key = item[0]
	100	if isinstance(key, str) or len(key) == 1:
0.20.23 by John Arbash Meinel Add a progress indicator for chk pages.	101	prefix = ''
0.20.11 by John Arbash Meinel start experimenting with gc-optimal ordering.	102	else:
0.20.23 by John Arbash Meinel Add a progress indicator for chk pages.	103	prefix = key[0]
	104	try:
	105	per_prefix_map[prefix].append(item)
	106	except KeyError:
	107	per_prefix_map[prefix] = [item]
0.20.11 by John Arbash Meinel start experimenting with gc-optimal ordering.	108
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	109	present_keys = []
0.20.11 by John Arbash Meinel start experimenting with gc-optimal ordering.	110	for prefix in sorted(per_prefix_map):
	111	present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))
	112	return present_keys
	113
	114
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	115	class GroupCompressBlockEntry(object):
	116	"""Track the information about a single object inside a GC group.
	117
	118	This is generally just the dumb data structure.
	119	"""
	120
	121	def __init__(self, key, type, sha1, start, length):
	122	self.key = key
	123	self.type = type # delta, fulltext, external?
	124	self.sha1 = sha1 # Sha1 of content
	125	self.start = start # Byte offset to start of data
	126	self.length = length # Length of content
	127
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	128	def __repr__(self):
	129	return '%s(%s, %s, %s, %s, %s)' % (
	130	self.__class__.__name__,
	131	self.key, self.type, self.sha1, self.start, self.length
	132	)
	133
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	134
	135	class GroupCompressBlock(object):
	136	"""An object which maintains the internal structure of the compressed data.
	137
	138	This tracks the meta info (start of text, length, type, etc.)
	139	"""
	140
0.25.5 by John Arbash Meinel Now using a zlib compressed format.	141	# Group Compress Block v1 Zlib
	142	GCB_HEADER = 'gcb1z\n'
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	143	GCB_LZ_HEADER = 'gcb1l\n'
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	144
	145	def __init__(self):
	146	# map by key? or just order in file?
	147	self._entries = {}
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	148	self._content = None
	149	self._size = 0
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	150
	151	def _parse_header(self):
	152	"""Parse the meta-info from the stream."""
	153
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	154	def __len__(self):
	155	return self._size
	156
0.17.48 by John Arbash Meinel if _NO_LABELS is set, don't bother parsing the mini header.	157	def _parse_header_bytes(self, header_bytes):
	158	"""Parse the header part of the block."""
	159	if _NO_LABELS:
	160	# Don't parse the label structure if we aren't going to use it
	161	return
	162	lines = header_bytes.split('\n')
	163	info_dict = {}
	164	for line in lines:
	165	if not line: #End of record
	166	if not info_dict:
	167	break
	168	self.add_entry(**info_dict)
	169	info_dict = {}
	170	continue
	171	key, value = line.split(':', 1)
	172	if key == 'key':
	173	value = tuple(map(intern, value.split('\x00')))
	174	elif key in ('start', 'length'):
	175	value = int(value)
	176	elif key == 'type':
	177	value = intern(value)
	178	info_dict[key] = value
	179
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	180	@classmethod
	181	def from_bytes(cls, bytes):
	182	out = cls()
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	183	if bytes[:6] not in (cls.GCB_HEADER, cls.GCB_LZ_HEADER):
3735.31.1 by John Arbash Meinel Bring the groupcompress plugin into the brisbane-core branch.	184	raise ValueError('bytes did not start with %r' % (cls.GCB_HEADER,))
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	185	if bytes[4] == 'z':
	186	decomp = zlib.decompress
0.17.45 by John Arbash Meinel Just make sure we have the right decompressor	187	elif bytes[4] == 'l':
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	188	decomp = pylzma.decompress
0.17.45 by John Arbash Meinel Just make sure we have the right decompressor	189	else:
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	190	raise ValueError('unknown compressor: %r' % (bytes,))
0.25.4 by John Arbash Meinel We at least have the rudimentary ability to encode and decode values.	191	pos = bytes.index('\n', 6)
0.25.5 by John Arbash Meinel Now using a zlib compressed format.	192	z_header_length = int(bytes[6:pos])
	193	pos += 1
	194	pos2 = bytes.index('\n', pos)
	195	header_length = int(bytes[pos:pos2])
	196	if z_header_length == 0:
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	197	if header_length != 0:
	198	raise ValueError('z_header_length 0, but header length != 0')
0.17.35 by John Arbash Meinel Get the tests passing again	199	zcontent = bytes[pos2+1:]
0.17.35 by John Arbash Meinel Get the tests passing again	200	if zcontent:
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	201	out._content = decomp(zcontent)
0.17.35 by John Arbash Meinel Get the tests passing again	202	out._size = len(out._content)
0.25.4 by John Arbash Meinel We at least have the rudimentary ability to encode and decode values.	203	return out
0.25.5 by John Arbash Meinel Now using a zlib compressed format.	204	pos = pos2 + 1
	205	pos2 = pos + z_header_length
	206	z_header_bytes = bytes[pos:pos2]
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	207	if len(z_header_bytes) != z_header_length:
	208	raise ValueError('Wrong length of compressed header. %s != %s'
	209	% (len(z_header_bytes), z_header_length))
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	210	header_bytes = decomp(z_header_bytes)
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	211	if len(header_bytes) != header_length:
	212	raise ValueError('Wrong length of header. %s != %s'
	213	% (len(header_bytes), header_length))
0.25.5 by John Arbash Meinel Now using a zlib compressed format.	214	del z_header_bytes
0.17.48 by John Arbash Meinel if _NO_LABELS is set, don't bother parsing the mini header.	215	out._parse_header_bytes(header_bytes)
0.17.49 by John Arbash Meinel Fix some missing variable names.	216	del header_bytes
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	217	zcontent = bytes[pos2:]
	218	if zcontent:
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	219	out._content = decomp(zcontent)
0.17.49 by John Arbash Meinel Fix some missing variable names.	220	out._size = header_length + len(out._content)
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	221	return out
	222
0.25.12 by John Arbash Meinel Add a single byte to indicate whether the following text is a fulltext	223	def extract(self, key, index_memo, sha1=None):
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	224	"""Extract the text for a specific key.
	225
	226	:param key: The label used for this content
	227	:param sha1: TODO (should we validate only when sha1 is supplied?)
	228	:return: The bytes for the content
	229	"""
0.25.16 by John Arbash Meinel Make sure we don't inter-pack to GCCHKBig repos.	230	if _NO_LABELS or not self._entries:
0.25.13 by John Arbash Meinel bring back the code that handles _NO_LABELS.	231	start, end = index_memo[3:5]
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	232	# The bytes are 'f' or 'd' for the type, then a variable-length
	233	# base128 integer for the content size, then the actual content
	234	# We know that the variable-length integer won't be longer than 10
	235	# bytes (it only takes 5 bytes to encode 2^32)
0.25.13 by John Arbash Meinel bring back the code that handles _NO_LABELS.	236	c = self._content[start]
	237	if c == 'f':
	238	type = 'fulltext'
	239	else:
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	240	if c != 'd':
	241	raise ValueError('Unknown content control code: %s'
	242	% (c,))
0.25.13 by John Arbash Meinel bring back the code that handles _NO_LABELS.	243	type = 'delta'
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	244	entry = GroupCompressBlockEntry(key, type, sha1=None,
	245	start=start, length=end-start)
	246	else:
	247	entry = self._entries[key]
	248	c = self._content[entry.start]
	249	if entry.type == 'fulltext':
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	250	if c != 'f':
	251	raise ValueError('Label claimed fulltext, byte claims: %s'
	252	% (c,))
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	253	elif entry.type == 'delta':
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	254	if c != 'd':
	255	raise ValueError('Label claimed delta, byte claims: %s'
	256	% (c,))
0.17.37 by Ian Clatworthy fix initialization of start variable	257	start = entry.start
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	258	content_len, len_len = decode_base128_int(
0.26.1 by John Arbash Meinel Prototype using LZMA as the secondary compressor, rather than zlib.	259	self._content[entry.start + 1:entry.start + 11])
	260	content_start = entry.start + 1 + len_len
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	261	end = entry.start + entry.length
	262	content = self._content[content_start:end]
	263	if c == 'f':
	264	bytes = content
	265	elif c == 'd':
	266	bytes = _groupcompress_pyx.apply_delta(self._content, content)
	267	if entry.sha1 is None:
	268	entry.sha1 = sha_string(bytes)
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	269	return entry, bytes
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	270
	271	def add_entry(self, key, type, sha1, start, length):
	272	"""Add new meta info about an entry.
	273
	274	:param key: The key for the new content
	275	:param type: Whether this is a delta or fulltext entry (external?)
	276	:param sha1: sha1sum of the fulltext of this entry
	277	:param start: where the encoded bytes start
	278	:param length: total number of bytes in the encoded form
	279	:return: The entry?
	280	"""
	281	entry = GroupCompressBlockEntry(key, type, sha1, start, length)
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	282	if key in self._entries:
	283	raise ValueError('Duplicate key found: %s' % (key,))
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	284	self._entries[key] = entry
	285	return entry
	286
0.25.7 by John Arbash Meinel Have the GroupCompressBlock decide how to compress the header and content.	287	def to_bytes(self, content=''):
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	288	"""Encode the information into a byte stream."""
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	289	compress = zlib.compress
	290	if _USE_LZMA:
	291	compress = pylzma.compress
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	292	chunks = []
	293	for key in sorted(self._entries):
	294	entry = self._entries[key]
	295	chunk = ('key:%s\n'
0.25.4 by John Arbash Meinel We at least have the rudimentary ability to encode and decode values.	296	'sha1:%s\n'
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	297	'type:%s\n'
	298	'start:%s\n'
	299	'length:%s\n'
	300	'\n'
	301	) % ('\x00'.join(entry.key),
0.25.4 by John Arbash Meinel We at least have the rudimentary ability to encode and decode values.	302	entry.sha1,
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	303	entry.type,
	304	entry.start,
	305	entry.length,
	306	)
	307	chunks.append(chunk)
0.25.5 by John Arbash Meinel Now using a zlib compressed format.	308	bytes = ''.join(chunks)
	309	info_len = len(bytes)
0.25.7 by John Arbash Meinel Have the GroupCompressBlock decide how to compress the header and content.	310	z_bytes = []
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	311	z_bytes.append(compress(bytes))
0.25.5 by John Arbash Meinel Now using a zlib compressed format.	312	del bytes
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	313	# TODO: we may want to have the header compressed in the same chain
	314	# as the data, or we may not, evaulate it
	315	# having them compressed together is probably a win for
	316	# revisions and the 'inv' portion of chk inventories. As the
	317	# label in the header is duplicated in the text.
	318	# For chk pages and real bytes, I would guess this is not
	319	# true.
0.25.7 by John Arbash Meinel Have the GroupCompressBlock decide how to compress the header and content.	320	z_len = sum(map(len, z_bytes))
	321	c_len = len(content)
0.25.13 by John Arbash Meinel bring back the code that handles _NO_LABELS.	322	if _NO_LABELS:
	323	z_bytes = []
	324	z_len = 0
	325	info_len = 0
0.17.44 by John Arbash Meinel Use the bit field to allow both lzma groups and zlib groups.	326	z_bytes.append(compress(content))
0.17.46 by John Arbash Meinel Set the proper header when using/not using lzma	327	if _USE_LZMA:
	328	header = self.GCB_LZ_HEADER
	329	else:
	330	header = self.GCB_HEADER
	331	chunks = [header,
0.25.7 by John Arbash Meinel Have the GroupCompressBlock decide how to compress the header and content.	332	'%d\n' % (z_len,),
	333	'%d\n' % (info_len,),
	334	#'%d\n' % (c_len,),
	335	]
	336	chunks.extend(z_bytes)
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	337	return ''.join(chunks)
	338
	339
0.17.2 by Robert Collins Core proof of concept working.	340	class GroupCompressor(object):
0.17.3 by Robert Collins new encoder, allows non monotonically increasing sequence matches for moar compression.	341	"""Produce a serialised group of compressed texts.
0.23.6 by John Arbash Meinel Start stripping out the actual GroupCompressor	342
0.17.3 by Robert Collins new encoder, allows non monotonically increasing sequence matches for moar compression.	343	It contains code very similar to SequenceMatcher because of having a similar
	344	task. However some key differences apply:
	345	- there is no junk, we want a minimal edit not a human readable diff.
	346	- we don't filter very common lines (because we don't know where a good
	347	range will start, and after the first text we want to be emitting minmal
	348	edits only.
	349	- we chain the left side, not the right side
	350	- we incrementally update the adjacency matrix as new lines are provided.
	351	- we look for matches in all of the left side, so the routine which does
	352	the analagous task of find_longest_match does not need to filter on the
	353	left side.
	354	"""
0.17.2 by Robert Collins Core proof of concept working.	355
	356	def __init__(self, delta=True):
	357	"""Create a GroupCompressor.
	358
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	359	:param delta: If False, do not compress records.
0.17.2 by Robert Collins Core proof of concept working.	360	"""
0.23.13 by John Arbash Meinel Factor out the ability to have/not have labels.	361	# Consider seeding the lines with some sort of GC Start flag, or
	362	# putting it as part of the output stream, rather than in the
	363	# compressed bytes.
0.23.9 by John Arbash Meinel We now basically have full support for using diff-delta as the compressor.	364	self.lines = []
0.17.2 by Robert Collins Core proof of concept working.	365	self.endpoint = 0
0.17.2 by Robert Collins Core proof of concept working.	366	self.input_bytes = 0
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	367	self.num_keys = 0
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	368	self.labels_deltas = {}
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	369	self._last = None
0.23.26 by John Arbash Meinel We now start to make use of the ability to extend the delta index	370	self._delta_index = _groupcompress_pyx.DeltaIndex()
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	371	self._block = GroupCompressBlock()
0.23.6 by John Arbash Meinel Start stripping out the actual GroupCompressor	372
3735.31.12 by John Arbash Meinel Push nostore_sha down through the stack.	373	def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
0.17.2 by Robert Collins Core proof of concept working.	374	"""Compress lines with label key.
	375
	376	:param key: A key tuple. It is stored in the output
0.17.26 by Robert Collins Working better --gc-plain-chk.	377	for identification of the text during decompression. If the last
	378	element is 'None' it is replaced with the sha1 of the text -
	379	e.g. sha1:xxxxxxx.
0.23.58 by John Arbash Meinel fix up the failing tests.	380	:param bytes: The bytes to be compressed
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	381	:param expected_sha: If non-None, the sha the lines are believed to
0.17.2 by Robert Collins Core proof of concept working.	382	have. During compression the sha is calculated; a mismatch will
0.17.2 by Robert Collins Core proof of concept working.	383	cause an error.
3735.31.12 by John Arbash Meinel Push nostore_sha down through the stack.	384	:param nostore_sha: If the computed sha1 sum matches, we will raise
	385	ExistingContent rather than adding the text.
0.20.15 by John Arbash Meinel Change so that regions that have lots of copies get converted back	386	:param soft: Do a 'soft' compression. This means that we require larger
	387	ranges to match to be considered for a copy command.
0.17.2 by Robert Collins Core proof of concept working.	388	:return: The sha1 of lines, and the number of bytes accumulated in
0.17.2 by Robert Collins Core proof of concept working.	389	the group output so far.
3735.31.12 by John Arbash Meinel Push nostore_sha down through the stack.	390	:seealso VersionedFiles.add_lines:
0.17.2 by Robert Collins Core proof of concept working.	391	"""
0.23.54 by John Arbash Meinel 'bzr pack' _FAST during compress() now is 32s versus 25s.	392	if not _FAST or expected_sha is None:
	393	sha1 = sha_string(bytes)
	394	else:
	395	sha1 = expected_sha
3735.31.12 by John Arbash Meinel Push nostore_sha down through the stack.	396	if sha1 == nostore_sha:
	397	raise errors.ExistingContent()
0.17.26 by Robert Collins Working better --gc-plain-chk.	398	if key[-1] is None:
0.17.26 by Robert Collins Working better --gc-plain-chk.	399	key = key[:-1] + ('sha1:' + sha1,)
0.23.52 by John Arbash Meinel Use the max_delta flag.	400	input_len = len(bytes)
0.23.12 by John Arbash Meinel Add a 'len:' field to the data.	401	# By having action/label/sha1/len, we can parse the group if the index
	402	# was ever destroyed, we have the key in 'label', we know the final
	403	# bytes are valid from sha1, and we know where to find the end of this
	404	# record because of 'len'. (the delta record itself will store the
	405	# total length for the expanded record)
0.23.13 by John Arbash Meinel Factor out the ability to have/not have labels.	406	# 'len: %d\n' costs approximately 1% increase in total data
	407	# Having the labels at all costs us 9-10% increase, 38% increase for
	408	# inventory pages, and 5.8% increase for text pages
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	409	# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]
0.23.33 by John Arbash Meinel Fix a bug when handling multiple large-range copies.	410	if self._delta_index._source_offset != self.endpoint:
	411	raise AssertionError('_source_offset != endpoint'
	412	' somehow the DeltaIndex got out of sync with'
	413	' the output lines')
0.23.52 by John Arbash Meinel Use the max_delta flag.	414	max_delta_size = len(bytes) / 2
	415	delta = self._delta_index.make_delta(bytes, max_delta_size)
	416	if (delta is None):
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	417	type = 'fulltext'
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	418	enc_length = encode_base128_int(len(bytes))
	419	len_mini_header = 1 + len(enc_length)
	420	length = len(bytes) + len_mini_header
	421	self._delta_index.add_source(bytes, len_mini_header)
	422	new_chunks = ['f', enc_length, bytes]
0.23.9 by John Arbash Meinel We now basically have full support for using diff-delta as the compressor.	423	else:
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	424	type = 'delta'
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	425	enc_length = encode_base128_int(len(delta))
	426	len_mini_header = 1 + len(enc_length)
	427	length = len(delta) + len_mini_header
	428	new_chunks = ['d', enc_length, delta]
0.23.43 by John Arbash Meinel Change the internals to allow delta indexes to be expanded with new source data.	429	if _FAST:
0.25.14 by John Arbash Meinel Fix a bug in 'FAST' handling.	430	self._delta_index._source_offset += length
0.23.43 by John Arbash Meinel Change the internals to allow delta indexes to be expanded with new source data.	431	else:
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	432	self._delta_index.add_delta_source(delta, len_mini_header)
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	433	self._block.add_entry(key, type=type, sha1=sha1,
	434	start=self.endpoint, length=length)
0.18.9 by John Arbash Meinel If we are going to do it this way, we don't need to explicitly distinguish left and right	435	delta_start = (self.endpoint, len(self.lines))
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	436	self.num_keys += 1
0.23.9 by John Arbash Meinel We now basically have full support for using diff-delta as the compressor.	437	self.output_chunks(new_chunks)
0.23.6 by John Arbash Meinel Start stripping out the actual GroupCompressor	438	self.input_bytes += input_len
0.18.9 by John Arbash Meinel If we are going to do it this way, we don't need to explicitly distinguish left and right	439	delta_end = (self.endpoint, len(self.lines))
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	440	self.labels_deltas[key] = (delta_start, delta_end)
0.23.29 by John Arbash Meinel Forgot to add the delta bytes to the index objects.	441	if not self._delta_index._source_offset == self.endpoint:
	442	raise AssertionError('the delta index is out of sync'
	443	'with the output lines %s != %s'
	444	% (self._delta_index._source_offset, self.endpoint))
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	445	return sha1, self.endpoint, type, length
0.17.2 by Robert Collins Core proof of concept working.	446
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	447	def extract(self, key):
0.17.12 by Robert Collins Encode copy ranges as bytes not lines, halves decode overhead.	448	"""Extract a key previously added to the compressor.
0.23.6 by John Arbash Meinel Start stripping out the actual GroupCompressor	449
0.17.12 by Robert Collins Encode copy ranges as bytes not lines, halves decode overhead.	450	:param key: The key to extract.
	451	:return: An iterable over bytes and the sha1.
	452	"""
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	453	delta_details = self.labels_deltas[key]
0.23.9 by John Arbash Meinel We now basically have full support for using diff-delta as the compressor.	454	delta_chunks = self.lines[delta_details[0][1]:delta_details[1][1]]
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	455	stored_bytes = ''.join(delta_chunks)
	456	# TODO: Fix this, we shouldn't really be peeking here
	457	entry = self._block._entries[key]
	458	if entry.type == 'fulltext':
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	459	if stored_bytes[0] != 'f':
	460	raise ValueError('Index claimed fulltext, but stored bytes'
	461	' indicate %s' % (stored_bytes[0],))
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	462	fulltext_len, offset = decode_base128_int(stored_bytes[1:10])
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	463	if fulltext_len + 1 + offset != len(stored_bytes):
	464	raise ValueError('Index claimed fulltext len, but stored bytes'
	465	' claim %s != %s'
	466	% (len(stored_bytes),
	467	fulltext_len + 1 + offset))
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	468	bytes = stored_bytes[offset + 1:]
0.23.9 by John Arbash Meinel We now basically have full support for using diff-delta as the compressor.	469	else:
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	470	if entry.type != 'delta':
	471	raise ValueError('Unknown entry type: %s' % (entry.type,))
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	472	# XXX: This is inefficient at best
	473	source = ''.join(self.lines)
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	474	if stored_bytes[0] != 'd':
	475	raise ValueError('Entry type claims delta, bytes claim %s'
	476	% (stored_bytes[0],))
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	477	delta_len, offset = decode_base128_int(stored_bytes[1:10])
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	478	if delta_len + 1 + offset != len(stored_bytes):
	479	raise ValueError('Index claimed delta len, but stored bytes'
	480	' claim %s != %s'
	481	% (len(stored_bytes),
	482	delta_len + 1 + offset))
0.17.36 by John Arbash Meinel Adding a mini-len to the delta/fulltext bytes	483	bytes = _groupcompress_pyx.apply_delta(source,
	484	stored_bytes[offset + 1:])
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	485	bytes_sha1 = sha_string(bytes)
	486	if entry.sha1 != bytes_sha1:
	487	raise ValueError('Recorded sha1 != measured %s != %s'
	488	% (entry.sha1, bytes_sha1))
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	489	return bytes, entry.sha1
0.23.9 by John Arbash Meinel We now basically have full support for using diff-delta as the compressor.	490
	491	def output_chunks(self, new_chunks):
	492	"""Output some chunks.
	493
	494	:param new_chunks: The chunks to output.
	495	"""
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	496	self._last = (len(self.lines), self.endpoint)
0.17.12 by Robert Collins Encode copy ranges as bytes not lines, halves decode overhead.	497	endpoint = self.endpoint
0.23.9 by John Arbash Meinel We now basically have full support for using diff-delta as the compressor.	498	self.lines.extend(new_chunks)
	499	endpoint += sum(map(len, new_chunks))
0.17.12 by Robert Collins Encode copy ranges as bytes not lines, halves decode overhead.	500	self.endpoint = endpoint
0.17.3 by Robert Collins new encoder, allows non monotonically increasing sequence matches for moar compression.	501
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	502	def pop_last(self):
	503	"""Call this if you want to 'revoke' the last compression.
	504
	505	After this, the data structures will be rolled back, but you cannot do
	506	more compression.
	507	"""
	508	self._delta_index = None
	509	del self.lines[self._last[0]:]
	510	self.endpoint = self._last[1]
	511	self._last = None
	512
0.17.2 by Robert Collins Core proof of concept working.	513	def ratio(self):
	514	"""Return the overall compression ratio."""
	515	return float(self.input_bytes) / float(self.endpoint)
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	516
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	517
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	518	def make_pack_factory(graph, delta, keylength):
	519	"""Create a factory for creating a pack based groupcompress.
	520
	521	This is only functional enough to run interface tests, it doesn't try to
	522	provide a full pack environment.
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	523
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	524	:param graph: Store a graph.
	525	:param delta: Delta compress contents.
	526	:param keylength: How long should keys be.
	527	"""
	528	def factory(transport):
3735.32.2 by John Arbash Meinel The 'delta' flag has no effect on the content (all GC is delta'd),	529	parents = graph
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	530	ref_length = 0
	531	if graph:
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	532	ref_length = 1
0.17.7 by Robert Collins Update for current index2 changes.	533	graph_index = BTreeBuilder(reference_lists=ref_length,
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	534	key_elements=keylength)
	535	stream = transport.open_write_stream('newpack')
	536	writer = pack.ContainerWriter(stream.write)
	537	writer.begin()
	538	index = _GCGraphIndex(graph_index, lambda:True, parents=parents,
0.17.9 by Robert Collins Initial stab at repository format support.	539	add_callback=graph_index.add_nodes)
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	540	access = _DirectPackAccess({})
	541	access.set_writer(writer, graph_index, (transport, 'newpack'))
0.17.2 by Robert Collins Core proof of concept working.	542	result = GroupCompressVersionedFiles(index, access, delta)
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	543	result.stream = stream
	544	result.writer = writer
	545	return result
	546	return factory
	547
	548
	549	def cleanup_pack_group(versioned_files):
0.17.23 by Robert Collins Only decompress as much of the zlib data as is needed to read the text recipe.	550	versioned_files.writer.end()
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	551	versioned_files.stream.close()
	552
	553
	554	class GroupCompressVersionedFiles(VersionedFiles):
	555	"""A group-compress based VersionedFiles implementation."""
	556
0.17.2 by Robert Collins Core proof of concept working.	557	def __init__(self, index, access, delta=True):
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	558	"""Create a GroupCompressVersionedFiles object.
	559
	560	:param index: The index object storing access and graph data.
	561	:param access: The access object storing raw data.
0.17.2 by Robert Collins Core proof of concept working.	562	:param delta: Whether to delta compress or just entropy compress.
	563	"""
	564	self._index = index
	565	self._access = access
	566	self._delta = delta
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	567	self._unadded_refs = {}
0.17.24 by Robert Collins Add a group cache to decompression, 5 times faster than knit at decompression when accessing everything in a group.	568	self._group_cache = LRUSizeCache(max_size=5010241024)
3735.31.7 by John Arbash Meinel Start bringing in stacking support for Groupcompress repos.	569	self._fallback_vfs = []
0.17.2 by Robert Collins Core proof of concept working.	570
	571	def add_lines(self, key, parents, lines, parent_texts=None,
	572	left_matching_blocks=None, nostore_sha=None, random_id=False,
	573	check_content=True):
	574	"""Add a text to the store.
	575
	576	:param key: The key tuple of the text to add.
	577	:param parents: The parents key tuples of the text to add.
	578	:param lines: A list of lines. Each line must be a bytestring. And all
	579	of them except the last must be terminated with \n and contain no
	580	other \n's. The last line may either contain no \n's or a single
	581	terminating \n. If the lines list does meet this constraint the add
	582	routine may error or may succeed - but you will be unable to read
	583	the data back accurately. (Checking the lines have been split
	584	correctly is expensive and extremely unlikely to catch bugs so it
	585	is not done at runtime unless check_content is True.)
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	586	:param parent_texts: An optional dictionary containing the opaque
0.17.2 by Robert Collins Core proof of concept working.	587	representations of some or all of the parents of version_id to
	588	allow delta optimisations. VERY IMPORTANT: the texts must be those
	589	returned by add_lines or data corruption can be caused.
	590	:param left_matching_blocks: a hint about which areas are common
	591	between the text and its left-hand-parent. The format is
	592	the SequenceMatcher.get_matching_blocks format.
	593	:param nostore_sha: Raise ExistingContent and do not add the lines to
	594	the versioned file if the digest of the lines matches this.
	595	:param random_id: If True a random id has been selected rather than
	596	an id determined by some deterministic process such as a converter
	597	from a foreign VCS. When True the backend may choose not to check
	598	for uniqueness of the resulting key within the versioned file, so
	599	this should only be done when the result is expected to be unique
	600	anyway.
	601	:param check_content: If True, the lines supplied are verified to be
	602	bytestrings that are correctly formed lines.
	603	:return: The text sha1, the number of bytes in the text, and an opaque
	604	representation of the inserted version which can be provided
	605	back to future add_lines calls in the parent_texts dictionary.
	606	"""
	607	self._index._check_write_ok()
	608	self._check_add(key, lines, random_id, check_content)
	609	if parents is None:
	610	# The caller might pass None if there is no graph data, but kndx
	611	# indexes can't directly store that, so we give them
	612	# an empty tuple instead.
	613	parents = ()
	614	# double handling for now. Make it work until then.
0.20.5 by John Arbash Meinel Finish the Fulltext => Chunked conversions so that we work in the more-efficient Chunks.	615	length = sum(map(len, lines))
	616	record = ChunkedContentFactory(key, parents, None, lines)
3735.31.12 by John Arbash Meinel Push nostore_sha down through the stack.	617	sha1 = list(self._insert_record_stream([record], random_id=random_id,
	618	nostore_sha=nostore_sha))[0]
0.20.5 by John Arbash Meinel Finish the Fulltext => Chunked conversions so that we work in the more-efficient Chunks.	619	return sha1, length, None
0.17.2 by Robert Collins Core proof of concept working.	620
3735.31.7 by John Arbash Meinel Start bringing in stacking support for Groupcompress repos.	621	def add_fallback_versioned_files(self, a_versioned_files):
	622	"""Add a source of texts for texts not present in this knit.
	623
	624	:param a_versioned_files: A VersionedFiles object.
	625	"""
	626	self._fallback_vfs.append(a_versioned_files)
	627
0.17.4 by Robert Collins Annotate.	628	def annotate(self, key):
	629	"""See VersionedFiles.annotate."""
	630	graph = Graph(self)
0.17.5 by Robert Collins nograph tests completely passing.	631	parent_map = self.get_parent_map([key])
	632	if not parent_map:
	633	raise errors.RevisionNotPresent(key, self)
	634	if parent_map[key] is not None:
	635	search = graph._make_breadth_first_searcher([key])
	636	keys = set()
	637	while True:
	638	try:
	639	present, ghosts = search.next_with_ghosts()
	640	except StopIteration:
	641	break
	642	keys.update(present)
	643	parent_map = self.get_parent_map(keys)
	644	else:
	645	keys = [key]
	646	parent_map = {key:()}
0.17.4 by Robert Collins Annotate.	647	head_cache = _mod_graph.FrozenHeadsCache(graph)
	648	parent_cache = {}
	649	reannotate = annotate.reannotate
	650	for record in self.get_record_stream(keys, 'topological', True):
	651	key = record.key
0.20.2 by John Arbash Meinel Teach groupcompress about 'chunked' encoding	652	chunks = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
0.17.4 by Robert Collins Annotate.	653	parent_lines = [parent_cache[parent] for parent in parent_map[key]]
0.17.4 by Robert Collins Annotate.	654	parent_cache[key] = list(
0.20.21 by John Arbash Meinel Merge the chk sorting code.	655	reannotate(parent_lines, chunks, key, None, head_cache))
0.17.4 by Robert Collins Annotate.	656	return parent_cache[key]
0.17.4 by Robert Collins Annotate.	657
0.17.5 by Robert Collins nograph tests completely passing.	658	def check(self, progress_bar=None):
	659	"""See VersionedFiles.check()."""
	660	keys = self.keys()
	661	for record in self.get_record_stream(keys, 'unordered', True):
	662	record.get_bytes_as('fulltext')
	663
0.17.2 by Robert Collins Core proof of concept working.	664	def _check_add(self, key, lines, random_id, check_content):
	665	"""check that version_id and lines are safe to add."""
	666	version_id = key[-1]
0.17.26 by Robert Collins Working better --gc-plain-chk.	667	if version_id is not None:
0.17.26 by Robert Collins Working better --gc-plain-chk.	668	if contains_whitespace(version_id):
3735.31.1 by John Arbash Meinel Bring the groupcompress plugin into the brisbane-core branch.	669	raise errors.InvalidRevisionId(version_id, self)
0.17.2 by Robert Collins Core proof of concept working.	670	self.check_not_reserved_id(version_id)
	671	# TODO: If random_id==False and the key is already present, we should
	672	# probably check that the existing content is identical to what is
	673	# being inserted, and otherwise raise an exception. This would make
	674	# the bundle code simpler.
	675	if check_content:
	676	self._check_lines_not_unicode(lines)
	677	self._check_lines_are_lines(lines)
	678
0.17.5 by Robert Collins nograph tests completely passing.	679	def get_parent_map(self, keys):
3735.31.7 by John Arbash Meinel Start bringing in stacking support for Groupcompress repos.	680	"""Get a map of the graph parents of keys.
0.17.5 by Robert Collins nograph tests completely passing.	681
	682	:param keys: The keys to look up parents for.
	683	:return: A mapping from keys to parents. Absent keys are absent from
	684	the mapping.
	685	"""
3735.31.7 by John Arbash Meinel Start bringing in stacking support for Groupcompress repos.	686	return self._get_parent_map_with_sources(keys)[0]
	687
	688	def _get_parent_map_with_sources(self, keys):
	689	"""Get a map of the parents of keys.
	690
	691	:param keys: The keys to look up parents for.
	692	:return: A tuple. The first element is a mapping from keys to parents.
	693	Absent keys are absent from the mapping. The second element is a
	694	list with the locations each key was found in. The first element
	695	is the in-this-knit parents, the second the first fallback source,
	696	and so on.
	697	"""
0.17.5 by Robert Collins nograph tests completely passing.	698	result = {}
3735.31.7 by John Arbash Meinel Start bringing in stacking support for Groupcompress repos.	699	sources = [self._index] + self._fallback_vfs
0.17.5 by Robert Collins nograph tests completely passing.	700	source_results = []
	701	missing = set(keys)
	702	for source in sources:
	703	if not missing:
	704	break
	705	new_result = source.get_parent_map(missing)
	706	source_results.append(new_result)
	707	result.update(new_result)
	708	missing.difference_update(set(new_result))
3735.31.7 by John Arbash Meinel Start bringing in stacking support for Groupcompress repos.	709	return result, source_results
0.17.5 by Robert Collins nograph tests completely passing.	710
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	711	def _get_block(self, index_memo):
0.20.14 by John Arbash Meinel Factor out _get_group_and_delta_lines.	712	read_memo = index_memo[0:3]
	713	# get the group:
	714	try:
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	715	block = self._group_cache[read_memo]
0.20.14 by John Arbash Meinel Factor out _get_group_and_delta_lines.	716	except KeyError:
	717	# read the group
	718	zdata = self._access.get_raw_records([read_memo]).next()
	719	# decompress - whole thing - this is not a bug, as it
	720	# permits caching. We might want to store the partially
	721	# decompresed group and decompress object, so that recent
	722	# texts are not penalised by big groups.
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	723	block = GroupCompressBlock.from_bytes(zdata)
	724	self._group_cache[read_memo] = block
0.20.14 by John Arbash Meinel Factor out _get_group_and_delta_lines.	725	# cheapo debugging:
	726	# print len(zdata), len(plain)
	727	# parse - requires split_lines, better to have byte offsets
	728	# here (but not by much - we only split the region for the
	729	# recipe, and we often want to end up with lines anyway.
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	730	return block
0.20.14 by John Arbash Meinel Factor out _get_group_and_delta_lines.	731
0.20.18 by John Arbash Meinel Implement new handling of get_bytes_as(), and get_missing_compression_parent_keys()	732	def get_missing_compression_parent_keys(self):
	733	"""Return the keys of missing compression parents.
	734
	735	Missing compression parents occur when a record stream was missing
	736	basis texts, or a index was scanned that had missing basis texts.
	737	"""
	738	# GroupCompress cannot currently reference texts that are not in the
	739	# group, so this is valid for now
	740	return frozenset()
	741
0.17.5 by Robert Collins nograph tests completely passing.	742	def get_record_stream(self, keys, ordering, include_delta_closure):
	743	"""Get a stream of records for keys.
	744
	745	:param keys: The keys to include.
	746	:param ordering: Either 'unordered' or 'topological'. A topologically
	747	sorted stream has compression parents strictly before their
	748	children.
	749	:param include_delta_closure: If True then the closure across any
	750	compression parents will be included (in the opaque data).
	751	:return: An iterator of ContentFactory objects, each of which is only
	752	valid until the iterator is advanced.
	753	"""
	754	# keys might be a generator
0.22.6 by John Arbash Meinel Clustering chk pages properly makes a big difference.	755	orig_keys = list(keys)
3735.31.18 by John Arbash Meinel Implement stacking support across all ordering implementations.	756	keys = set(keys)
0.17.5 by Robert Collins nograph tests completely passing.	757	if not keys:
0.17.5 by Robert Collins nograph tests completely passing.	758	return
0.20.23 by John Arbash Meinel Add a progress indicator for chk pages.	759	if (not self._index.has_graph
3735.31.14 by John Arbash Meinel Change the gc-optimal to 'groupcompress'	760	and ordering in ('topological', 'groupcompress')):
0.17.5 by Robert Collins nograph tests completely passing.	761	# Cannot topological order when no graph has been stored.
3735.31.18 by John Arbash Meinel Implement stacking support across all ordering implementations.	762	# but we allow 'as-requested' or 'unordered'
0.17.5 by Robert Collins nograph tests completely passing.	763	ordering = 'unordered'
3735.31.18 by John Arbash Meinel Implement stacking support across all ordering implementations.	764
	765	remaining_keys = keys
	766	while True:
	767	try:
	768	keys = set(remaining_keys)
	769	for content_factory in self._get_remaining_record_stream(keys,
	770	orig_keys, ordering, include_delta_closure):
	771	remaining_keys.discard(content_factory.key)
	772	yield content_factory
	773	return
	774	except errors.RetryWithNewPacks, e:
	775	self._access.reload_or_raise(e)
	776
	777	def _find_from_fallback(self, missing):
	778	"""Find whatever keys you can from the fallbacks.
	779
	780	:param missing: A set of missing keys. This set will be mutated as keys
	781	are found from a fallback_vfs
	782	:return: (parent_map, key_to_source_map, source_results)
	783	parent_map the overall key => parent_keys
	784	key_to_source_map a dict from {key: source}
	785	source_results a list of (source: keys)
	786	"""
	787	parent_map = {}
	788	key_to_source_map = {}
	789	source_results = []
	790	for source in self._fallback_vfs:
	791	if not missing:
	792	break
	793	source_parents = source.get_parent_map(missing)
	794	parent_map.update(source_parents)
	795	source_parents = list(source_parents)
	796	source_results.append((source, source_parents))
	797	key_to_source_map.update((key, source) for key in source_parents)
	798	missing.difference_update(source_parents)
	799	return parent_map, key_to_source_map, source_results
	800
	801	def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map):
	802	"""Get the (source, [keys]) list.
	803
	804	The returned objects should be in the order defined by 'ordering',
	805	which can weave between different sources.
	806	:param ordering: Must be one of 'topological' or 'groupcompress'
	807	:return: List of [(source, [keys])] tuples, such that all keys are in
	808	the defined order, regardless of source.
	809	"""
	810	if ordering == 'topological':
	811	present_keys = topo_sort(parent_map)
	812	else:
	813	# ordering == 'groupcompress'
	814	# XXX: This only optimizes for the target ordering. We may need
	815	# to balance that with the time it takes to extract
	816	# ordering, by somehow grouping based on
	817	# locations[key][0:3]
	818	present_keys = sort_gc_optimal(parent_map)
	819	# Now group by source:
	820	source_keys = []
	821	current_source = None
	822	for key in present_keys:
	823	source = key_to_source_map.get(key, self)
	824	if source is not current_source:
	825	source_keys.append((source, []))
3735.2.151 by John Arbash Meinel A the source grouping code needs to update current_source	826	current_source = source
3735.31.18 by John Arbash Meinel Implement stacking support across all ordering implementations.	827	source_keys[-1][1].append(key)
	828	return source_keys
	829
	830	def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys,
	831	key_to_source_map):
	832	source_keys = []
	833	current_source = None
	834	for key in orig_keys:
	835	if key in locations or key in unadded_keys:
	836	source = self
	837	elif key in key_to_source_map:
	838	source = key_to_source_map[key]
	839	else: # absent
	840	continue
	841	if source is not current_source:
	842	source_keys.append((source, []))
3735.2.151 by John Arbash Meinel A the source grouping code needs to update current_source	843	current_source = source
3735.31.18 by John Arbash Meinel Implement stacking support across all ordering implementations.	844	source_keys[-1][1].append(key)
	845	return source_keys
	846
	847	def _get_io_ordered_source_keys(self, locations, unadded_keys,
	848	source_result):
	849	def get_group(key):
	850	# This is the group the bytes are stored in, followed by the
	851	# location in the group
	852	return locations[key][0]
	853	present_keys = sorted(locations.iterkeys(), key=get_group)
	854	# We don't have an ordering for keys in the in-memory object, but
	855	# lets process the in-memory ones first.
	856	present_keys = list(unadded_keys) + present_keys
	857	# Now grab all of the ones from other sources
	858	source_keys = [(self, present_keys)]
	859	source_keys.extend(source_result)
	860	return source_keys
	861
	862	def _get_remaining_record_stream(self, keys, orig_keys, ordering,
	863	include_delta_closure):
	864	"""Get a stream of records for keys.
	865
	866	:param keys: The keys to include.
	867	:param ordering: one of 'unordered', 'topological', 'groupcompress' or
	868	'as-requested'
	869	:param include_delta_closure: If True then the closure across any
	870	compression parents will be included (in the opaque data).
	871	:return: An iterator of ContentFactory objects, each of which is only
	872	valid until the iterator is advanced.
	873	"""
0.17.5 by Robert Collins nograph tests completely passing.	874	# Cheap: iterate
0.17.5 by Robert Collins nograph tests completely passing.	875	locations = self._index.get_build_details(keys)
3735.31.18 by John Arbash Meinel Implement stacking support across all ordering implementations.	876	unadded_keys = set(self._unadded_refs).intersection(keys)
	877	missing = keys.difference(locations)
	878	missing.difference_update(unadded_keys)
	879	(fallback_parent_map, key_to_source_map,
	880	source_result) = self._find_from_fallback(missing)
	881	if ordering in ('topological', 'groupcompress'):
0.17.5 by Robert Collins nograph tests completely passing.	882	# would be better to not globally sort initially but instead
	883	# start with one key, recurse to its oldest parent, then grab
	884	# everything in the same group, etc.
	885	parent_map = dict((key, details[2]) for key, details in
	886	locations.iteritems())
3735.31.18 by John Arbash Meinel Implement stacking support across all ordering implementations.	887	for key in unadded_keys:
	888	parent_map[key] = self._unadded_refs[key]
	889	parent_map.update(fallback_parent_map)
	890	source_keys = self._get_ordered_source_keys(ordering, parent_map,
	891	key_to_source_map)
0.22.6 by John Arbash Meinel Clustering chk pages properly makes a big difference.	892	elif ordering == 'as-requested':
3735.31.18 by John Arbash Meinel Implement stacking support across all ordering implementations.	893	source_keys = self._get_as_requested_source_keys(orig_keys,
	894	locations, unadded_keys, key_to_source_map)
0.17.5 by Robert Collins nograph tests completely passing.	895	else:
0.20.10 by John Arbash Meinel Change the extraction ordering for 'unordered'.	896	# We want to yield the keys in a semi-optimal (read-wise) ordering.
	897	# Otherwise we thrash the _group_cache and destroy performance
3735.31.18 by John Arbash Meinel Implement stacking support across all ordering implementations.	898	source_keys = self._get_io_ordered_source_keys(locations,
	899	unadded_keys, source_result)
	900	for key in missing:
0.17.5 by Robert Collins nograph tests completely passing.	901	yield AbsentContentFactory(key)
3735.31.18 by John Arbash Meinel Implement stacking support across all ordering implementations.	902	for source, keys in source_keys:
	903	if source is self:
	904	for key in keys:
	905	if key in self._unadded_refs:
	906	bytes, sha1 = self._compressor.extract(key)
	907	parents = self._unadded_refs[key]
	908	else:
	909	index_memo, _, parents, (method, _) = locations[key]
	910	block = self._get_block(index_memo)
	911	entry, bytes = block.extract(key, index_memo)
	912	sha1 = entry.sha1
	913	# TODO: If we don't have labels, then the sha1 here is computed
	914	# from the data, so we don't want to re-sha the string.
	915	if not _FAST and sha_string(bytes) != sha1:
	916	raise AssertionError('sha1 sum did not match')
	917	yield FulltextContentFactory(key, parents, sha1, bytes)
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	918	else:
3735.31.18 by John Arbash Meinel Implement stacking support across all ordering implementations.	919	for record in source.get_record_stream(keys, ordering,
	920	include_delta_closure):
	921	yield record
0.20.5 by John Arbash Meinel Finish the Fulltext => Chunked conversions so that we work in the more-efficient Chunks.	922
0.17.5 by Robert Collins nograph tests completely passing.	923	def get_sha1s(self, keys):
	924	"""See VersionedFiles.get_sha1s()."""
	925	result = {}
	926	for record in self.get_record_stream(keys, 'unordered', True):
	927	if record.sha1 != None:
	928	result[record.key] = record.sha1
	929	else:
	930	if record.storage_kind != 'absent':
	931	result[record.key] == sha_string(record.get_bytes_as(
	932	'fulltext'))
	933	return result
	934
0.17.2 by Robert Collins Core proof of concept working.	935	def insert_record_stream(self, stream):
	936	"""Insert a record stream into this container.
	937
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	938	:param stream: A stream of records to insert.
0.17.2 by Robert Collins Core proof of concept working.	939	:return: None
	940	:seealso VersionedFiles.get_record_stream:
	941	"""
0.17.5 by Robert Collins nograph tests completely passing.	942	for _ in self._insert_record_stream(stream):
0.17.5 by Robert Collins nograph tests completely passing.	943	pass
0.17.2 by Robert Collins Core proof of concept working.	944
3735.31.12 by John Arbash Meinel Push nostore_sha down through the stack.	945	def _insert_record_stream(self, stream, random_id=False, nostore_sha=None):
0.17.2 by Robert Collins Core proof of concept working.	946	"""Internal core to insert a record stream into this container.
	947
	948	This helper function has a different interface than insert_record_stream
	949	to allow add_lines to be minimal, but still return the needed data.
	950
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	951	:param stream: A stream of records to insert.
3735.31.12 by John Arbash Meinel Push nostore_sha down through the stack.	952	:param nostore_sha: If the sha1 of a given text matches nostore_sha,
	953	raise ExistingContent, rather than committing the new text.
0.17.2 by Robert Collins Core proof of concept working.	954	:return: An iterator over the sha1 of the inserted records.
	955	:seealso insert_record_stream:
	956	:seealso add_lines:
	957	"""
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	958	adapters = {}
0.17.5 by Robert Collins nograph tests completely passing.	959	def get_adapter(adapter_key):
	960	try:
	961	return adapters[adapter_key]
	962	except KeyError:
	963	adapter_factory = adapter_registry.get(adapter_key)
	964	adapter = adapter_factory(self)
	965	adapters[adapter_key] = adapter
	966	return adapter
0.17.2 by Robert Collins Core proof of concept working.	967	# This will go up to fulltexts for gc to gc fetching, which isn't
0.17.2 by Robert Collins Core proof of concept working.	968	# ideal.
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	969	self._compressor = GroupCompressor(self._delta)
	970	self._unadded_refs = {}
0.17.5 by Robert Collins nograph tests completely passing.	971	keys_to_add = []
0.17.5 by Robert Collins nograph tests completely passing.	972	basis_end = 0
0.17.6 by Robert Collins Cap group size at 20MB internal buffer. (Probably way too big).	973	def flush():
0.25.7 by John Arbash Meinel Have the GroupCompressBlock decide how to compress the header and content.	974	bytes = self._compressor._block.to_bytes(
	975	''.join(self._compressor.lines))
0.17.6 by Robert Collins Cap group size at 20MB internal buffer. (Probably way too big).	976	index, start, length = self._access.add_raw_records(
0.25.7 by John Arbash Meinel Have the GroupCompressBlock decide how to compress the header and content.	977	[(None, len(bytes))], bytes)[0]
0.17.6 by Robert Collins Cap group size at 20MB internal buffer. (Probably way too big).	978	nodes = []
	979	for key, reads, refs in keys_to_add:
	980	nodes.append((key, "%d %d %s" % (start, length, reads), refs))
	981	self._index.add_records(nodes, random_id=random_id)
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	982	self._unadded_refs = {}
	983	del keys_to_add[:]
	984	self._compressor = GroupCompressor(self._delta)
	985
0.20.15 by John Arbash Meinel Change so that regions that have lots of copies get converted back	986	last_prefix = None
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	987	last_fulltext_len = None
	988	max_fulltext_len = 0
0.25.11 by John Arbash Meinel Slightly different handling of large texts.	989	max_fulltext_prefix = None
0.17.2 by Robert Collins Core proof of concept working.	990	for record in stream:
0.17.5 by Robert Collins nograph tests completely passing.	991	# Raise an error when a record is missing.
0.17.5 by Robert Collins nograph tests completely passing.	992	if record.storage_kind == 'absent':
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	993	raise errors.RevisionNotPresent(record.key, self)
0.20.18 by John Arbash Meinel Implement new handling of get_bytes_as(), and get_missing_compression_parent_keys()	994	try:
0.23.52 by John Arbash Meinel Use the max_delta flag.	995	bytes = record.get_bytes_as('fulltext')
0.20.18 by John Arbash Meinel Implement new handling of get_bytes_as(), and get_missing_compression_parent_keys()	996	except errors.UnavailableRepresentation:
0.17.5 by Robert Collins nograph tests completely passing.	997	adapter_key = record.storage_kind, 'fulltext'
0.17.5 by Robert Collins nograph tests completely passing.	998	adapter = get_adapter(adapter_key)
0.20.21 by John Arbash Meinel Merge the chk sorting code.	999	bytes = adapter.get_bytes(record)
0.20.13 by John Arbash Meinel Play around a bit.	1000	if len(record.key) > 1:
0.20.13 by John Arbash Meinel Play around a bit.	1001	prefix = record.key[0]
0.25.11 by John Arbash Meinel Slightly different handling of large texts.	1002	soft = (prefix == last_prefix)
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	1003	else:
	1004	prefix = None
0.25.11 by John Arbash Meinel Slightly different handling of large texts.	1005	soft = False
	1006	if max_fulltext_len < len(bytes):
	1007	max_fulltext_len = len(bytes)
	1008	max_fulltext_prefix = prefix
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	1009	(found_sha1, end_point, type,
	1010	length) = self._compressor.compress(record.key,
3735.31.12 by John Arbash Meinel Push nostore_sha down through the stack.	1011	bytes, record.sha1, soft=soft,
	1012	nostore_sha=nostore_sha)
0.25.11 by John Arbash Meinel Slightly different handling of large texts.	1013	# delta_ratio = float(len(bytes)) / length
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	1014	# Check if we want to continue to include that text
0.25.11 by John Arbash Meinel Slightly different handling of large texts.	1015	if (prefix == max_fulltext_prefix
	1016	and end_point < 2 * max_fulltext_len):
	1017	# As long as we are on the same file_id, we will fill at least
	1018	# 2 * max_fulltext_len
	1019	start_new_block = False
	1020	elif end_point > 410241024:
	1021	start_new_block = True
	1022	elif (prefix is not None and prefix != last_prefix
	1023	and end_point > 210241024):
	1024	start_new_block = True
	1025	else:
	1026	start_new_block = False
0.25.10 by John Arbash Meinel Play around with detecting compression breaks.	1027	# if type == 'fulltext':
	1028	# # If this is the first text, we don't do anything
	1029	# if self._compressor.num_keys > 1:
	1030	# if prefix is not None and prefix != last_prefix:
	1031	# # We just inserted a fulltext for a different prefix
	1032	# # (aka file-id).
	1033	# if end_point > 512 * 1024:
	1034	# start_new_block = True
	1035	# # TODO: Consider packing several small texts together
	1036	# # maybe only flush if end_point > some threshold
	1037	# # if end_point > 512 * 1024 or len(bytes) <
	1038	# # start_new_block = true
	1039	# else:
	1040	# # We just added a fulltext, part of the same file-id
	1041	# if (end_point > 210241024
	1042	# and end_point > 5*max_fulltext_len):
	1043	# start_new_block = True
	1044	# last_fulltext_len = len(bytes)
	1045	# else:
	1046	# delta_ratio = float(len(bytes)) / length
	1047	# if delta_ratio < 3: # Not much compression
	1048	# if end_point > 110241024:
	1049	# start_new_block = True
	1050	# elif delta_ratio < 10: # 10:1 compression
	1051	# if end_point > 410241024:
	1052	# start_new_block = True
	1053	last_prefix = prefix
	1054	if start_new_block:
	1055	self._compressor.pop_last()
	1056	flush()
	1057	basis_end = 0
	1058	max_fulltext_len = len(bytes)
	1059	(found_sha1, end_point, type,
	1060	length) = self._compressor.compress(record.key,
	1061	bytes, record.sha1)
	1062	last_fulltext_len = length
0.17.26 by Robert Collins Working better --gc-plain-chk.	1063	if record.key[-1] is None:
	1064	key = record.key[:-1] + ('sha1:' + found_sha1,)
	1065	else:
	1066	key = record.key
	1067	self._unadded_refs[key] = record.parents
0.17.3 by Robert Collins new encoder, allows non monotonically increasing sequence matches for moar compression.	1068	yield found_sha1
0.17.26 by Robert Collins Working better --gc-plain-chk.	1069	keys_to_add.append((key, '%d %d' % (basis_end, end_point),
0.17.5 by Robert Collins nograph tests completely passing.	1070	(record.parents,)))
0.17.5 by Robert Collins nograph tests completely passing.	1071	basis_end = end_point
0.17.8 by Robert Collins Flush pending updates at the end of _insert_record_stream	1072	if len(keys_to_add):
	1073	flush()
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	1074	self._compressor = None
0.17.5 by Robert Collins nograph tests completely passing.	1075
	1076	def iter_lines_added_or_present_in_keys(self, keys, pb=None):
	1077	"""Iterate over the lines in the versioned files from keys.
	1078
	1079	This may return lines from other keys. Each item the returned
	1080	iterator yields is a tuple of a line and a text version that that line
	1081	is present in (not introduced in).
	1082
	1083	Ordering of results is in whatever order is most suitable for the
	1084	underlying storage format.
	1085
	1086	If a progress bar is supplied, it may be used to indicate progress.
	1087	The caller is responsible for cleaning up progress bars (because this
	1088	is an iterator).
	1089
	1090	NOTES:
	1091	* Lines are normalised by the underlying store: they will all have \n
	1092	terminators.
	1093	* Lines are returned in arbitrary order.
	1094
	1095	:return: An iterator over (line, key).
	1096	"""
	1097	if pb is None:
	1098	pb = progress.DummyProgress()
	1099	keys = set(keys)
	1100	total = len(keys)
	1101	# we don't care about inclusions, the caller cares.
	1102	# but we need to setup a list of records to visit.
	1103	# we need key, position, length
	1104	for key_idx, record in enumerate(self.get_record_stream(keys,
	1105	'unordered', True)):
	1106	# XXX: todo - optimise to use less than full texts.
	1107	key = record.key
3735.32.1 by John Arbash Meinel Fix the VF WalkingContent checks.	1108	pb.update('Walking content', key_idx, total)
0.17.5 by Robert Collins nograph tests completely passing.	1109	if record.storage_kind == 'absent':
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	1110	raise errors.RevisionNotPresent(key, self)
0.17.5 by Robert Collins nograph tests completely passing.	1111	lines = split_lines(record.get_bytes_as('fulltext'))
	1112	for line in lines:
	1113	yield line, key
3735.32.1 by John Arbash Meinel Fix the VF WalkingContent checks.	1114	pb.update('Walking content', total, total)
0.17.5 by Robert Collins nograph tests completely passing.	1115
	1116	def keys(self):
	1117	"""See VersionedFiles.keys."""
	1118	if 'evil' in debug.debug_flags:
	1119	trace.mutter_callsite(2, "keys scales with size of history")
3735.31.7 by John Arbash Meinel Start bringing in stacking support for Groupcompress repos.	1120	sources = [self._index] + self._fallback_vfs
0.17.5 by Robert Collins nograph tests completely passing.	1121	result = set()
	1122	for source in sources:
	1123	result.update(source.keys())
	1124	return result
	1125
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	1126
	1127	class _GCGraphIndex(object):
	1128	"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""
	1129
0.17.9 by Robert Collins Initial stab at repository format support.	1130	def __init__(self, graph_index, is_locked, parents=True,
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	1131	add_callback=None):
	1132	"""Construct a _GCGraphIndex on a graph_index.
	1133
	1134	:param graph_index: An implementation of bzrlib.index.GraphIndex.
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	1135	:param is_locked: A callback, returns True if the index is locked and
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	1136	thus usable.
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	1137	:param parents: If True, record knits parents, if not do not record
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	1138	parents.
	1139	:param add_callback: If not None, allow additions to the index and call
	1140	this callback with a list of added GraphIndex nodes:
	1141	[(node, value, node_refs), ...]
	1142	"""
	1143	self._add_callback = add_callback
	1144	self._graph_index = graph_index
	1145	self._parents = parents
	1146	self.has_graph = parents
	1147	self._is_locked = is_locked
	1148
0.17.5 by Robert Collins nograph tests completely passing.	1149	def add_records(self, records, random_id=False):
0.17.5 by Robert Collins nograph tests completely passing.	1150	"""Add multiple records to the index.
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	1151
0.17.5 by Robert Collins nograph tests completely passing.	1152	This function does not insert data into the Immutable GraphIndex
	1153	backing the KnitGraphIndex, instead it prepares data for insertion by
	1154	the caller and checks that it is safe to insert then calls
	1155	self._add_callback with the prepared GraphIndex nodes.
	1156
	1157	:param records: a list of tuples:
	1158	(key, options, access_memo, parents).
	1159	:param random_id: If True the ids being added were randomly generated
	1160	and no check for existence will be performed.
	1161	"""
	1162	if not self._add_callback:
	1163	raise errors.ReadOnlyError(self)
	1164	# we hope there are no repositories with inconsistent parentage
	1165	# anymore.
	1166
	1167	changed = False
	1168	keys = {}
	1169	for (key, value, refs) in records:
	1170	if not self._parents:
	1171	if refs:
	1172	for ref in refs:
	1173	if ref:
	1174	raise KnitCorrupt(self,
	1175	"attempt to add node with parents "
	1176	"in parentless index.")
	1177	refs = ()
	1178	changed = True
	1179	keys[key] = (value, refs)
	1180	# check for dups
	1181	if not random_id:
	1182	present_nodes = self._get_entries(keys)
	1183	for (index, key, value, node_refs) in present_nodes:
	1184	if node_refs != keys[key][1]:
	1185	raise errors.KnitCorrupt(self, "inconsistent details in add_records"
	1186	": %s %s" % ((value, node_refs), keys[key]))
	1187	del keys[key]
	1188	changed = True
	1189	if changed:
	1190	result = []
	1191	if self._parents:
	1192	for key, (value, node_refs) in keys.iteritems():
	1193	result.append((key, value, node_refs))
	1194	else:
	1195	for key, (value, node_refs) in keys.iteritems():
	1196	result.append((key, value))
	1197	records = result
	1198	self._add_callback(records)
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	1199
0.17.5 by Robert Collins nograph tests completely passing.	1200	def _check_read(self):
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	1201	"""Raise an exception if reads are not permitted."""
0.17.5 by Robert Collins nograph tests completely passing.	1202	if not self._is_locked():
	1203	raise errors.ObjectNotLocked(self)
	1204
0.17.2 by Robert Collins Core proof of concept working.	1205	def _check_write_ok(self):
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	1206	"""Raise an exception if writes are not permitted."""
0.17.2 by Robert Collins Core proof of concept working.	1207	if not self._is_locked():
	1208	raise errors.ObjectNotLocked(self)
	1209
0.17.5 by Robert Collins nograph tests completely passing.	1210	def _get_entries(self, keys, check_present=False):
0.17.5 by Robert Collins nograph tests completely passing.	1211	"""Get the entries for keys.
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	1212
	1213	Note: Callers are responsible for checking that the index is locked
	1214	before calling this method.
	1215
0.17.5 by Robert Collins nograph tests completely passing.	1216	:param keys: An iterable of index key tuples.
	1217	"""
	1218	keys = set(keys)
	1219	found_keys = set()
	1220	if self._parents:
	1221	for node in self._graph_index.iter_entries(keys):
	1222	yield node
	1223	found_keys.add(node[1])
	1224	else:
	1225	# adapt parentless index to the rest of the code.
	1226	for node in self._graph_index.iter_entries(keys):
	1227	yield node[0], node[1], node[2], ()
	1228	found_keys.add(node[1])
	1229	if check_present:
	1230	missing_keys = keys.difference(found_keys)
	1231	if missing_keys:
	1232	raise RevisionNotPresent(missing_keys.pop(), self)
	1233
	1234	def get_parent_map(self, keys):
	1235	"""Get a map of the parents of keys.
	1236
	1237	:param keys: The keys to look up parents for.
	1238	:return: A mapping from keys to parents. Absent keys are absent from
	1239	the mapping.
	1240	"""
	1241	self._check_read()
	1242	nodes = self._get_entries(keys)
	1243	result = {}
	1244	if self._parents:
	1245	for node in nodes:
	1246	result[node[1]] = node[3][0]
	1247	else:
	1248	for node in nodes:
	1249	result[node[1]] = None
	1250	return result
	1251
	1252	def get_build_details(self, keys):
	1253	"""Get the various build details for keys.
	1254
	1255	Ghosts are omitted from the result.
	1256
	1257	:param keys: An iterable of keys.
	1258	:return: A dict of key:
	1259	(index_memo, compression_parent, parents, record_details).
	1260	index_memo
	1261	opaque structure to pass to read_records to extract the raw
	1262	data
	1263	compression_parent
	1264	Content that this record is built upon, may be None
	1265	parents
	1266	Logical parents of this node
	1267	record_details
	1268	extra information about the content which needs to be passed to
	1269	Factory.parse_record
	1270	"""
	1271	self._check_read()
	1272	result = {}
0.20.29 by Ian Clatworthy groupcompress.py code cleanups	1273	entries = self._get_entries(keys)
0.17.5 by Robert Collins nograph tests completely passing.	1274	for entry in entries:
	1275	key = entry[1]
	1276	if not self._parents:
	1277	parents = None
	1278	else:
	1279	parents = entry[3][0]
	1280	method = 'group'
	1281	result[key] = (self._node_to_position(entry),
	1282	None, parents, (method, None))
	1283	return result
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	1284
0.17.5 by Robert Collins nograph tests completely passing.	1285	def keys(self):
0.17.5 by Robert Collins nograph tests completely passing.	1286	"""Get all the keys in the collection.
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	1287
0.17.5 by Robert Collins nograph tests completely passing.	1288	The keys are not ordered.
	1289	"""
	1290	self._check_read()
	1291	return [node[1] for node in self._graph_index.iter_all_entries()]
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	1292
0.17.5 by Robert Collins nograph tests completely passing.	1293	def _node_to_position(self, node):
	1294	"""Convert an index value to position details."""
	1295	bits = node[2].split(' ')
	1296	# It would be nice not to read the entire gzip.
	1297	start = int(bits[0])
	1298	stop = int(bits[1])
	1299	basis_end = int(bits[2])
	1300	delta_end = int(bits[3])
	1301	return node[0], start, stop, basis_end, delta_end
0.18.14 by John Arbash Meinel A bit more work, not really usable yet.	1302
	1303
	1304	try:
3735.31.1 by John Arbash Meinel Bring the groupcompress plugin into the brisbane-core branch.	1305	from bzrlib import _groupcompress_pyx
0.18.14 by John Arbash Meinel A bit more work, not really usable yet.	1306	except ImportError:
	1307	pass