/brz/remove-bazaar : contents of bzrlib/tests/test_groupcompress.py at revision 5755.2.7

: (revision 5755.2.7)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

5557.1.7 by John Arbash Meinel Merge in the bzr.dev 5582	1	# Copyright (C) 2008-2011 Canonical Ltd
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	2	#
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	3	# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel Cleanup trailing whitespace, get test_source to pass by removing asserts.	12	#
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel Add the new address for FSF to the new files.	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.17.1 by Robert Collins Starting point. Interface tests hooked up and failing.	16
	17	"""Tests for group compression."""
	18
	19	import zlib
	20
3735.31.1 by John Arbash Meinel Bring the groupcompress plugin into the brisbane-core branch.	21	from bzrlib import (
4343.3.20 by John Arbash Meinel Copy the track_external_parent_refs tests over to GCVF.	22	btree_index,
5755.2.4 by John Arbash Meinel Expose the max_entries_per_source into GroupCompressVersionedFiles	23	config,
3735.31.1 by John Arbash Meinel Bring the groupcompress plugin into the brisbane-core branch.	24	groupcompress,
3735.32.8 by John Arbash Meinel Some tests for the LazyGroupCompressFactory	25	errors,
4343.3.20 by John Arbash Meinel Copy the track_external_parent_refs tests over to GCVF.	26	index as _mod_index,
3735.32.7 by John Arbash Meinel Implement partial decompression support.	27	osutils,
3735.31.1 by John Arbash Meinel Bring the groupcompress plugin into the brisbane-core branch.	28	tests,
4465.2.3 by Aaron Bentley Update to change redundant inserts into a warning.	29	trace,
3735.32.20 by John Arbash Meinel groupcompress now copies the blocks exactly as they were given.	30	versionedfile,
3735.31.1 by John Arbash Meinel Bring the groupcompress plugin into the brisbane-core branch.	31	)
0.23.58 by John Arbash Meinel fix up the failing tests.	32	from bzrlib.osutils import sha_string
4913.2.24 by John Arbash Meinel Track down a few more import typos.	33	from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
5559.2.2 by Martin Pool Change to using standard load_tests_apply_scenarios.	34	from bzrlib.tests.scenarios import load_tests_apply_scenarios
	35
	36
	37	def group_compress_implementation_scenarios():
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	38	scenarios = [
	39	('python', {'compressor': groupcompress.PythonGroupCompressor}),
	40	]
4913.2.24 by John Arbash Meinel Track down a few more import typos.	41	if compiled_groupcompress_feature.available():
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	42	scenarios.append(('C',
	43	{'compressor': groupcompress.PyrexGroupCompressor}))
5559.2.2 by Martin Pool Change to using standard load_tests_apply_scenarios.	44	return scenarios
	45
	46
	47	load_tests = load_tests_apply_scenarios
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	48
	49
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	50	class TestGroupCompressor(tests.TestCase):
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	51
	52	def _chunks_to_repr_lines(self, chunks):
	53	return '\n'.join(map(repr, ''.join(chunks).split('\n')))
	54
	55	def assertEqualDiffEncoded(self, expected, actual):
	56	"""Compare the actual content to the expected content.
	57
	58	:param expected: A group of chunks that we expect to see
	59	:param actual: The measured 'chunks'
	60
	61	We will transform the chunks back into lines, and then run 'repr()'
	62	over them to handle non-ascii characters.
	63	"""
	64	self.assertEqualDiff(self._chunks_to_repr_lines(expected),
	65	self._chunks_to_repr_lines(actual))
	66
	67
	68	class TestAllGroupCompressors(TestGroupCompressor):
0.17.2 by Robert Collins Core proof of concept working.	69	"""Tests for GroupCompressor"""
0.17.2 by Robert Collins Core proof of concept working.	70
5559.2.2 by Martin Pool Change to using standard load_tests_apply_scenarios.	71	scenarios = group_compress_implementation_scenarios()
	72	compressor = None # Set by scenario
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	73
0.17.2 by Robert Collins Core proof of concept working.	74	def test_empty_delta(self):
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	75	compressor = self.compressor()
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	76	self.assertEqual([], compressor.chunks)
0.17.2 by Robert Collins Core proof of concept working.	77
	78	def test_one_nosha_delta(self):
	79	# diff against NUKK
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	80	compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	81	sha1, start_point, end_point, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel fix up the failing tests.	82	'strange\ncommon\n', None)
0.23.58 by John Arbash Meinel fix up the failing tests.	83	self.assertEqual(sha_string('strange\ncommon\n'), sha1)
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	84	expected_lines = 'f' '\x0f' 'strange\ncommon\n'
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	85	self.assertEqual(expected_lines, ''.join(compressor.chunks))
3735.2.162 by John Arbash Meinel Change GroupCompressor.compress() to return the start_point.	86	self.assertEqual(0, start_point)
0.17.2 by Robert Collins Core proof of concept working.	87	self.assertEqual(sum(map(len, expected_lines)), end_point)
0.17.2 by Robert Collins Core proof of concept working.	88
3735.2.162 by John Arbash Meinel Change GroupCompressor.compress() to return the start_point.	89	def test_empty_content(self):
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	90	compressor = self.compressor()
3735.2.162 by John Arbash Meinel Change GroupCompressor.compress() to return the start_point.	91	# Adding empty bytes should return the 'null' record
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	92	sha1, start_point, end_point, kind = compressor.compress(('empty',),
	93	'', None)
3735.2.162 by John Arbash Meinel Change GroupCompressor.compress() to return the start_point.	94	self.assertEqual(0, start_point)
	95	self.assertEqual(0, end_point)
	96	self.assertEqual('fulltext', kind)
	97	self.assertEqual(groupcompress._null_sha1, sha1)
	98	self.assertEqual(0, compressor.endpoint)
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	99	self.assertEqual([], compressor.chunks)
3735.2.162 by John Arbash Meinel Change GroupCompressor.compress() to return the start_point.	100	# Even after adding some content
	101	compressor.compress(('content',), 'some\nbytes\n', None)
	102	self.assertTrue(compressor.endpoint > 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	103	sha1, start_point, end_point, kind = compressor.compress(('empty2',),
	104	'', None)
3735.2.162 by John Arbash Meinel Change GroupCompressor.compress() to return the start_point.	105	self.assertEqual(0, start_point)
	106	self.assertEqual(0, end_point)
	107	self.assertEqual('fulltext', kind)
	108	self.assertEqual(groupcompress._null_sha1, sha1)
	109
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	110	def test_extract_from_compressor(self):
	111	# Knit fetching will try to reconstruct texts locally which results in
	112	# reading something that is in the compressor stream already.
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	113	compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	114	sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	115	'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	116	expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	117	sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	118	'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	119	# get the first out
0.25.8 by John Arbash Meinel Fix up the tests. Mostly it was just changing things to	120	self.assertEqual(('strange\ncommon long line\n'
	121	'that needs a 16 byte match\n', sha1_1),
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	122	compressor.extract(('label',)))
0.17.11 by Robert Collins Add extraction of just-compressed texts to support converting from knits.	123	# and the second
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	124	self.assertEqual(('common long line\nthat needs a 16 byte match\n'
	125	'different\n', sha1_2),
	126	compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	127
4241.17.2 by John Arbash Meinel PythonGroupCompressor needs to support pop_last() properly.	128	def test_pop_last(self):
	129	compressor = self.compressor()
	130	_, _, _, _ = compressor.compress(('key1',),
	131	'some text\nfor the first entry\n', None)
	132	expected_lines = list(compressor.chunks)
	133	_, _, _, _ = compressor.compress(('key2',),
	134	'some text\nfor the second entry\n', None)
	135	compressor.pop_last()
	136	self.assertEqual(expected_lines, compressor.chunks)
	137
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	138
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	139	class TestPyrexGroupCompressor(TestGroupCompressor):
	140
4913.2.24 by John Arbash Meinel Track down a few more import typos.	141	_test_needs_features = [compiled_groupcompress_feature]
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	142	compressor = groupcompress.PyrexGroupCompressor
	143
	144	def test_stats(self):
	145	compressor = self.compressor()
3735.40.7 by John Arbash Meinel Move even more functionality into EquivalenceTable.	146	compressor.compress(('label',),
	147	'strange\n'
	148	'common very very long line\n'
	149	'plus more text\n', None)
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	150	compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel Move even more functionality into EquivalenceTable.	151	'common very very long line\n'
	152	'plus more text\n'
	153	'different\n'
	154	'moredifferent\n', None)
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	155	compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel Move even more functionality into EquivalenceTable.	156	'new\n'
	157	'common very very long line\n'
	158	'plus more text\n'
	159	'different\n'
	160	'moredifferent\n', None)
	161	self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	162
	163	def test_two_nosha_delta(self):
	164	compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	165	sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	166	'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	167	expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	168	sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	169	'common long line\nthat needs a 16 byte match\ndifferent\n', None)
	170	self.assertEqual(sha_string('common long line\n'
	171	'that needs a 16 byte match\n'
	172	'different\n'), sha1_2)
	173	expected_lines.extend([
	174	# 'delta', delta length
3735.40.10 by John Arbash Meinel Merge in the new delta format code.	175	'd\x0f',
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	176	# source and target length
3735.40.10 by John Arbash Meinel Merge in the new delta format code.	177	'\x36',
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	178	# copy the line common
	179	'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
	180	# add the line different, and the trailing newline
	181	'\x0adifferent\n', # insert 10 bytes
	182	])
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	183	self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	184	self.assertEqual(sum(map(len, expected_lines)), end_point)
	185
	186	def test_three_nosha_delta(self):
	187	# The first interesting test: make a change that should use lines from
	188	# both parents.
	189	compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	190	sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	191	'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	192	sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	193	'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	194	expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	195	sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	196	'new\ncommon very very long line\nwith some extra text\n'
	197	'different\nmoredifferent\nand then some more\n',
	198	None)
	199	self.assertEqual(
	200	sha_string('new\ncommon very very long line\nwith some extra text\n'
	201	'different\nmoredifferent\nand then some more\n'),
	202	sha1_3)
	203	expected_lines.extend([
	204	# 'delta', delta length
3735.40.10 by John Arbash Meinel Merge in the new delta format code.	205	'd\x0b',
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	206	# source and target length
3735.40.10 by John Arbash Meinel Merge in the new delta format code.	207	'\x5f'
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	208	# insert new
	209	'\x03new',
	210	# Copy of first parent 'common' range
	211	'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
	212	# Copy of second parent 'different' range
	213	'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
	214	])
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	215	self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	216	self.assertEqual(sum(map(len, expected_lines)), end_point)
	217
	218
	219	class TestPythonGroupCompressor(TestGroupCompressor):
	220
	221	compressor = groupcompress.PythonGroupCompressor
	222
	223	def test_stats(self):
	224	compressor = self.compressor()
3735.40.7 by John Arbash Meinel Move even more functionality into EquivalenceTable.	225	compressor.compress(('label',),
	226	'strange\n'
	227	'common very very long line\n'
	228	'plus more text\n', None)
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	229	compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel Move even more functionality into EquivalenceTable.	230	'common very very long line\n'
	231	'plus more text\n'
	232	'different\n'
	233	'moredifferent\n', None)
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	234	compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel Move even more functionality into EquivalenceTable.	235	'new\n'
	236	'common very very long line\n'
	237	'plus more text\n'
	238	'different\n'
	239	'moredifferent\n', None)
	240	self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	241
	242	def test_two_nosha_delta(self):
	243	compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	244	sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	245	'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	246	expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	247	sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	248	'common long line\nthat needs a 16 byte match\ndifferent\n', None)
	249	self.assertEqual(sha_string('common long line\n'
	250	'that needs a 16 byte match\n'
	251	'different\n'), sha1_2)
	252	expected_lines.extend([
	253	# 'delta', delta length
3735.40.10 by John Arbash Meinel Merge in the new delta format code.	254	'd\x0f',
	255	# target length
	256	'\x36',
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	257	# copy the line common
	258	'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
	259	# add the line different, and the trailing newline
	260	'\x0adifferent\n', # insert 10 bytes
	261	])
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	262	self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	263	self.assertEqual(sum(map(len, expected_lines)), end_point)
	264
	265	def test_three_nosha_delta(self):
	266	# The first interesting test: make a change that should use lines from
	267	# both parents.
	268	compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	269	sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	270	'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	271	sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	272	'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	273	expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	274	sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	275	'new\ncommon very very long line\nwith some extra text\n'
	276	'different\nmoredifferent\nand then some more\n',
	277	None)
	278	self.assertEqual(
	279	sha_string('new\ncommon very very long line\nwith some extra text\n'
	280	'different\nmoredifferent\nand then some more\n'),
	281	sha1_3)
	282	expected_lines.extend([
	283	# 'delta', delta length
3735.40.10 by John Arbash Meinel Merge in the new delta format code.	284	'd\x0c',
	285	# target length
	286	'\x5f'
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	287	# insert new
	288	'\x04new\n',
	289	# Copy of first parent 'common' range
	290	'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
	291	# Copy of second parent 'different' range
	292	'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
	293	])
3735.40.17 by John Arbash Meinel Change the attribute from 'lines' to 'chunks' to make it more	294	self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel Factor out tests that rely on the exact bytecode.	295	self.assertEqual(sum(map(len, expected_lines)), end_point)
	296
	297
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	298	class TestGroupCompressBlock(tests.TestCase):
	299
3735.32.15 by John Arbash Meinel Change the GroupCompressBlock code to allow not recording 'end'.	300	def make_block(self, key_to_text):
	301	"""Create a GroupCompressBlock, filling it with the given texts."""
	302	compressor = groupcompress.GroupCompressor()
	303	start = 0
	304	for key in sorted(key_to_text):
	305	compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	306	locs = dict((key, (start, end)) for key, (start, _, end, _)
	307	in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel Add a _LazyGroupContentManager._check_rebuild_block	308	block = compressor.flush()
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	309	raw_bytes = block.to_bytes()
3735.32.23 by John Arbash Meinel Add a _LazyGroupContentManager._check_rebuild_block	310	# Go through from_bytes(to_bytes()) so that we start with a compressed
	311	# content object
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	312	return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.15 by John Arbash Meinel Change the GroupCompressBlock code to allow not recording 'end'.	313
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	314	def test_from_empty_bytes(self):
3735.31.1 by John Arbash Meinel Bring the groupcompress plugin into the brisbane-core branch.	315	self.assertRaises(ValueError,
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	316	groupcompress.GroupCompressBlock.from_bytes, '')
	317
0.25.4 by John Arbash Meinel We at least have the rudimentary ability to encode and decode values.	318	def test_from_minimal_bytes(self):
3735.32.4 by John Arbash Meinel Change the byte representation of a groupcompress block.	319	block = groupcompress.GroupCompressBlock.from_bytes(
3735.38.4 by John Arbash Meinel Another disk format change.	320	'gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel We at least have the rudimentary ability to encode and decode values.	321	self.assertIsInstance(block, groupcompress.GroupCompressBlock)
3735.32.6 by John Arbash Meinel A bit of reworking changes things so content is expanded at extract() time.	322	self.assertIs(None, block._content)
	323	self.assertEqual('', block._z_content)
	324	block._ensure_content()
3735.32.5 by John Arbash Meinel Change the parsing code to start out just holding the compressed bytes.	325	self.assertEqual('', block._content)
3735.32.27 by John Arbash Meinel Have _LazyGroupContentManager pre-extract everything it holds.	326	self.assertEqual('', block._z_content)
3735.32.6 by John Arbash Meinel A bit of reworking changes things so content is expanded at extract() time.	327	block._ensure_content() # Ensure content is safe to call 2x
0.25.4 by John Arbash Meinel We at least have the rudimentary ability to encode and decode values.	328
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil Groupcompress from brisbane-core.	329	def test_from_invalid(self):
	330	self.assertRaises(ValueError,
	331	groupcompress.GroupCompressBlock.from_bytes,
	332	'this is not a valid header')
	333
3735.38.4 by John Arbash Meinel Another disk format change.	334	def test_from_bytes(self):
3735.32.4 by John Arbash Meinel Change the byte representation of a groupcompress block.	335	content = ('a tiny bit of content\n')
	336	z_content = zlib.compress(content)
	337	z_bytes = (
	338	'gcb1z\n' # group compress block v1 plain
	339	'%d\n' # Length of compressed content
	340	'%d\n' # Length of uncompressed content
	341	'%s' # Compressed content
3735.38.4 by John Arbash Meinel Another disk format change.	342	) % (len(z_content), len(content), z_content)
0.25.6 by John Arbash Meinel (tests broken) implement the basic ability to have a separate header	343	block = groupcompress.GroupCompressBlock.from_bytes(
3735.32.4 by John Arbash Meinel Change the byte representation of a groupcompress block.	344	z_bytes)
3735.32.6 by John Arbash Meinel A bit of reworking changes things so content is expanded at extract() time.	345	self.assertEqual(z_content, block._z_content)
	346	self.assertIs(None, block._content)
3735.38.4 by John Arbash Meinel Another disk format change.	347	self.assertEqual(len(z_content), block._z_content_length)
3735.38.4 by John Arbash Meinel Another disk format change.	348	self.assertEqual(len(content), block._content_length)
3735.32.10 by John Arbash Meinel test that we support reading from the gc blocks that didn't have their lengths.	349	block._ensure_content()
3735.32.27 by John Arbash Meinel Have _LazyGroupContentManager pre-extract everything it holds.	350	self.assertEqual(z_content, block._z_content)
3735.32.10 by John Arbash Meinel test that we support reading from the gc blocks that didn't have their lengths.	351	self.assertEqual(content, block._content)
	352
5439.2.1 by John Arbash Meinel Change GroupCompressBlock to work in self._z_compress_chunks	353	def test_to_chunks(self):
	354	content_chunks = ['this is some content\n',
	355	'this content will be compressed\n']
	356	content_len = sum(map(len, content_chunks))
	357	content = ''.join(content_chunks)
	358	gcb = groupcompress.GroupCompressBlock()
	359	gcb.set_chunked_content(content_chunks, content_len)
	360	total_len, block_chunks = gcb.to_chunks()
	361	block_bytes = ''.join(block_chunks)
	362	self.assertEqual(gcb._z_content_length, len(gcb._z_content))
	363	self.assertEqual(total_len, len(block_bytes))
	364	self.assertEqual(gcb._content_length, content_len)
	365	expected_header =('gcb1z\n' # group compress block v1 zlib
	366	'%d\n' # Length of compressed content
	367	'%d\n' # Length of uncompressed content
	368	) % (gcb._z_content_length, gcb._content_length)
	369	# The first chunk should be the header chunk. It is small, fixed size,
	370	# and there is no compelling reason to split it up
	371	self.assertEqual(expected_header, block_chunks[0])
	372	self.assertStartsWith(block_bytes, expected_header)
	373	remaining_bytes = block_bytes[len(expected_header):]
	374	raw_bytes = zlib.decompress(remaining_bytes)
	375	self.assertEqual(content, raw_bytes)
	376
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	377	def test_to_bytes(self):
3735.38.4 by John Arbash Meinel Another disk format change.	378	content = ('this is some content\n'
3735.38.4 by John Arbash Meinel Another disk format change.	379	'this content will be compressed\n')
0.25.2 by John Arbash Meinel First cut at meta-info as text form.	380	gcb = groupcompress.GroupCompressBlock()
3735.38.4 by John Arbash Meinel Another disk format change.	381	gcb.set_content(content)
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	382	bytes = gcb.to_bytes()
3735.38.4 by John Arbash Meinel Another disk format change.	383	self.assertEqual(gcb._z_content_length, len(gcb._z_content))
3735.38.4 by John Arbash Meinel Another disk format change.	384	self.assertEqual(gcb._content_length, len(content))
3735.32.4 by John Arbash Meinel Change the byte representation of a groupcompress block.	385	expected_header =('gcb1z\n' # group compress block v1 zlib
3735.38.4 by John Arbash Meinel Another disk format change.	386	'%d\n' # Length of compressed content
	387	'%d\n' # Length of uncompressed content
	388	) % (gcb._z_content_length, gcb._content_length)
3735.32.4 by John Arbash Meinel Change the byte representation of a groupcompress block.	389	self.assertStartsWith(bytes, expected_header)
	390	remaining_bytes = bytes[len(expected_header):]
0.25.5 by John Arbash Meinel Now using a zlib compressed format.	391	raw_bytes = zlib.decompress(remaining_bytes)
3735.38.4 by John Arbash Meinel Another disk format change.	392	self.assertEqual(content, raw_bytes)
3735.32.3 by John Arbash Meinel Start doing some direct GCVF tests.	393
4469.1.1 by John Arbash Meinel Add a set_content_chunked member to GroupCompressBlock.	394	# we should get the same results if using the chunked version
	395	gcb = groupcompress.GroupCompressBlock()
	396	gcb.set_chunked_content(['this is some content\n'
4469.1.2 by John Arbash Meinel The only caller already knows the content length, so make the api such that	397	'this content will be compressed\n'],
	398	len(content))
4469.1.1 by John Arbash Meinel Add a set_content_chunked member to GroupCompressBlock.	399	old_bytes = bytes
	400	bytes = gcb.to_bytes()
	401	self.assertEqual(old_bytes, bytes)
	402
3735.32.7 by John Arbash Meinel Implement partial decompression support.	403	def test_partial_decomp(self):
	404	content_chunks = []
	405	# We need a sufficient amount of data so that zlib.decompress has
	406	# partial decompression to work with. Most auto-generated data
	407	# compresses a bit too well, we want a combination, so we combine a sha
	408	# hash with compressible data.
	409	for i in xrange(2048):
	410	next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
	411	content_chunks.append(next_content)
	412	next_sha1 = osutils.sha_string(next_content)
	413	content_chunks.append(next_sha1 + '\n')
	414	content = ''.join(content_chunks)
	415	self.assertEqual(158634, len(content))
	416	z_content = zlib.compress(content)
	417	self.assertEqual(57182, len(z_content))
	418	block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel Change GroupCompressBlock to work in self._z_compress_chunks	419	block._z_content_chunks = (z_content,)
3735.32.7 by John Arbash Meinel Implement partial decompression support.	420	block._z_content_length = len(z_content)
3735.32.8 by John Arbash Meinel Some tests for the LazyGroupCompressFactory	421	block._compressor_name = 'zlib'
3735.32.7 by John Arbash Meinel Implement partial decompression support.	422	block._content_length = 158634
	423	self.assertIs(None, block._content)
	424	block._ensure_content(100)
	425	self.assertIsNot(None, block._content)
	426	# We have decompressed at least 100 bytes
	427	self.assertTrue(len(block._content) >= 100)
	428	# We have not decompressed the whole content
	429	self.assertTrue(len(block._content) < 158634)
	430	self.assertEqualDiff(content[:len(block._content)], block._content)
	431	# ensuring content that we already have shouldn't cause any more data
	432	# to be extracted
	433	cur_len = len(block._content)
	434	block._ensure_content(cur_len - 10)
	435	self.assertEqual(cur_len, len(block._content))
	436	# Now we want a bit more content
	437	cur_len += 10
	438	block._ensure_content(cur_len)
	439	self.assertTrue(len(block._content) >= cur_len)
	440	self.assertTrue(len(block._content) < 158634)
	441	self.assertEqualDiff(content[:len(block._content)], block._content)
	442	# And now lets finish
	443	block._ensure_content(158634)
	444	self.assertEqualDiff(content, block._content)
3735.32.8 by John Arbash Meinel Some tests for the LazyGroupCompressFactory	445	# And the decompressor is finalized
3735.32.7 by John Arbash Meinel Implement partial decompression support.	446	self.assertIs(None, block._z_content_decompressor)
	447
4744.2.3 by John Arbash Meinel change the GroupcompressBlock code a bit.	448	def test__ensure_all_content(self):
3735.32.11 by John Arbash Meinel Add tests for the ability to do partial decompression without knowing the final length.	449	content_chunks = []
4744.2.3 by John Arbash Meinel change the GroupcompressBlock code a bit.	450	# We need a sufficient amount of data so that zlib.decompress has
	451	# partial decompression to work with. Most auto-generated data
	452	# compresses a bit too well, we want a combination, so we combine a sha
	453	# hash with compressible data.
3735.32.11 by John Arbash Meinel Add tests for the ability to do partial decompression without knowing the final length.	454	for i in xrange(2048):
	455	next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
	456	content_chunks.append(next_content)
	457	next_sha1 = osutils.sha_string(next_content)
	458	content_chunks.append(next_sha1 + '\n')
	459	content = ''.join(content_chunks)
	460	self.assertEqual(158634, len(content))
	461	z_content = zlib.compress(content)
	462	self.assertEqual(57182, len(z_content))
	463	block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel Change GroupCompressBlock to work in self._z_compress_chunks	464	block._z_content_chunks = (z_content,)
3735.32.11 by John Arbash Meinel Add tests for the ability to do partial decompression without knowing the final length.	465	block._z_content_length = len(z_content)
	466	block._compressor_name = 'zlib'
4744.2.3 by John Arbash Meinel change the GroupcompressBlock code a bit.	467	block._content_length = 158634
3735.32.11 by John Arbash Meinel Add tests for the ability to do partial decompression without knowing the final length.	468	self.assertIs(None, block._content)
4744.2.3 by John Arbash Meinel change the GroupcompressBlock code a bit.	469	# The first _ensure_content got all of the required data
	470	block._ensure_content(158634)
3735.32.11 by John Arbash Meinel Add tests for the ability to do partial decompression without knowing the final length.	471	self.assertEqualDiff(content, block._content)
4744.2.3 by John Arbash Meinel change the GroupcompressBlock code a bit.	472	# And we should have released the _z_content_decompressor since it was
	473	# fully consumed
3735.32.11 by John Arbash Meinel Add tests for the ability to do partial decompression without knowing the final length.	474	self.assertIs(None, block._z_content_decompressor)
	475
4300.1.1 by John Arbash Meinel Add the ability to convert a gc block into 'human readable' form.	476	def test__dump(self):
	477	dup_content = 'some duplicate content\nwhich is sufficiently long\n'
	478	key_to_text = {('1',): dup_content + '1 unique\n',
	479	('2',): dup_content + '2 extra special\n'}
	480	locs, block = self.make_block(key_to_text)
	481	self.assertEqual([('f', len(key_to_text[('1',)])),
	482	('d', 21, len(key_to_text[('2',)]),
	483	[('c', 2, len(dup_content)),
	484	('i', len('2 extra special\n'), '')
	485	]),
	486	], block._dump())
	487
3735.32.3 by John Arbash Meinel Start doing some direct GCVF tests.	488
4744.2.5 by John Arbash Meinel Change to a generic 'VersionedFiles.clear_cache()' api.	489	class TestCaseWithGroupCompressVersionedFiles(
	490	tests.TestCaseWithMemoryTransport):
3735.32.3 by John Arbash Meinel Start doing some direct GCVF tests.	491
3735.32.20 by John Arbash Meinel groupcompress now copies the blocks exactly as they were given.	492	def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
4465.2.4 by Aaron Bentley Switch between warn and raise depending on inconsistent_fatal.	493	dir='.', inconsistency_fatal=True):
3735.32.20 by John Arbash Meinel groupcompress now copies the blocks exactly as they were given.	494	t = self.get_transport(dir)
	495	t.ensure_base()
3735.32.3 by John Arbash Meinel Start doing some direct GCVF tests.	496	vf = groupcompress.make_pack_factory(graph=create_graph,
4465.2.4 by Aaron Bentley Switch between warn and raise depending on inconsistent_fatal.	497	delta=False, keylength=keylength,
	498	inconsistency_fatal=inconsistency_fatal)(t)
3735.32.3 by John Arbash Meinel Start doing some direct GCVF tests.	499	if do_cleanup:
	500	self.addCleanup(groupcompress.cleanup_pack_group, vf)
	501	return vf
	502
3735.32.8 by John Arbash Meinel Some tests for the LazyGroupCompressFactory	503
	504	class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
	505
4343.3.20 by John Arbash Meinel Copy the track_external_parent_refs tests over to GCVF.	506	def make_g_index(self, name, ref_lists=0, nodes=[]):
	507	builder = btree_index.BTreeBuilder(ref_lists)
	508	for node, references, value in nodes:
	509	builder.add_node(node, references, value)
	510	stream = builder.finish()
	511	trans = self.get_transport()
	512	size = trans.put_file(name, stream)
	513	return btree_index.BTreeGraphIndex(trans, name, size)
	514
	515	def make_g_index_missing_parent(self):
	516	graph_index = self.make_g_index('missing_parent', 1,
	517	[(('parent', ), '2 78 2 10', ([],)),
	518	(('tip', ), '2 78 2 10',
	519	([('parent', ), ('missing-parent', )],)),
	520	])
	521	return graph_index
	522
3735.32.3 by John Arbash Meinel Start doing some direct GCVF tests.	523	def test_get_record_stream_as_requested(self):
	524	# Consider promoting 'as-requested' to general availability, and
	525	# make this a VF interface test
3735.32.21 by John Arbash Meinel We now have a 'reuse_blocks=False' flag for autopack et al.	526	vf = self.make_test_vf(False, dir='source')
3735.32.3 by John Arbash Meinel Start doing some direct GCVF tests.	527	vf.add_lines(('a',), (), ['lines\n'])
	528	vf.add_lines(('b',), (), ['lines\n'])
	529	vf.add_lines(('c',), (), ['lines\n'])
	530	vf.add_lines(('d',), (), ['lines\n'])
	531	vf.writer.end()
	532	keys = [record.key for record in vf.get_record_stream(
	533	[('a',), ('b',), ('c',), ('d',)],
	534	'as-requested', False)]
	535	self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
	536	keys = [record.key for record in vf.get_record_stream(
	537	[('b',), ('a',), ('d',), ('c',)],
	538	'as-requested', False)]
	539	self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
	540
	541	# It should work even after being repacked into another VF
3735.32.20 by John Arbash Meinel groupcompress now copies the blocks exactly as they were given.	542	vf2 = self.make_test_vf(False, dir='target')
3735.32.3 by John Arbash Meinel Start doing some direct GCVF tests.	543	vf2.insert_record_stream(vf.get_record_stream(
	544	[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
	545	vf2.writer.end()
	546
	547	keys = [record.key for record in vf2.get_record_stream(
	548	[('a',), ('b',), ('c',), ('d',)],
	549	'as-requested', False)]
	550	self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
	551	keys = [record.key for record in vf2.get_record_stream(
	552	[('b',), ('a',), ('d',), ('c',)],
	553	'as-requested', False)]
	554	self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
3735.32.8 by John Arbash Meinel Some tests for the LazyGroupCompressFactory	555
5755.2.6 by John Arbash Meinel Test that the record stream has the correct values set.	556	def test_get_record_stream_max_entries_per_source_default(self):
	557	vf = self.make_test_vf(True, dir='source')
	558	vf.add_lines(('a',), (), ['lines\n'])
	559	vf.writer.end()
	560	record = vf.get_record_stream([('a',)], 'unordered', True).next()
	561	self.assertEqual(vf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
	562	record._manager._get_max_entries_per_source())
	563
	564	def test_get_record_stream_accesses_max_entries_per_source_default(self):
	565	vf = self.make_test_vf(True, dir='source')
	566	vf.add_lines(('a',), (), ['lines\n'])
	567	vf.writer.end()
	568	vf._max_entries_per_source = 1234
	569	record = vf.get_record_stream([('a',)], 'unordered', True).next()
	570	self.assertEqual(1234, record._manager._get_max_entries_per_source())
	571
4665.3.9 by John Arbash Meinel Start doing some work to make sure that we call _check_rebuild_block	572	def test_insert_record_stream_reuses_blocks(self):
3735.32.21 by John Arbash Meinel We now have a 'reuse_blocks=False' flag for autopack et al.	573	vf = self.make_test_vf(True, dir='source')
3735.32.20 by John Arbash Meinel groupcompress now copies the blocks exactly as they were given.	574	def grouped_stream(revision_ids, first_parents=()):
	575	parents = first_parents
	576	for revision_id in revision_ids:
	577	key = (revision_id,)
	578	record = versionedfile.FulltextContentFactory(
	579	key, parents, None,
	580	'some content that is\n'
	581	'identical except for\n'
	582	'revision_id:%s\n' % (revision_id,))
	583	yield record
	584	parents = (key,)
	585	# One group, a-d
	586	vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
	587	# Second group, e-h
	588	vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
	589	first_parents=(('d',),)))
	590	block_bytes = {}
	591	stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
	592	'unordered', False)
3735.32.21 by John Arbash Meinel We now have a 'reuse_blocks=False' flag for autopack et al.	593	num_records = 0
3735.32.20 by John Arbash Meinel groupcompress now copies the blocks exactly as they were given.	594	for record in stream:
	595	if record.key in [('a',), ('e',)]:
	596	self.assertEqual('groupcompress-block', record.storage_kind)
	597	else:
	598	self.assertEqual('groupcompress-block-ref',
	599	record.storage_kind)
	600	block_bytes[record.key] = record._manager._block._z_content
3735.32.21 by John Arbash Meinel We now have a 'reuse_blocks=False' flag for autopack et al.	601	num_records += 1
	602	self.assertEqual(8, num_records)
3735.32.20 by John Arbash Meinel groupcompress now copies the blocks exactly as they were given.	603	for r in 'abcd':
	604	key = (r,)
	605	self.assertIs(block_bytes[key], block_bytes[('a',)])
	606	self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
	607	for r in 'efgh':
	608	key = (r,)
	609	self.assertIs(block_bytes[key], block_bytes[('e',)])
	610	self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
	611	# Now copy the blocks into another vf, and ensure that the blocks are
	612	# preserved without creating new entries
	613	vf2 = self.make_test_vf(True, dir='target')
	614	# ordering in 'groupcompress' order, should actually swap the groups in
	615	# the target vf, but the groups themselves should not be disturbed.
4665.3.9 by John Arbash Meinel Start doing some work to make sure that we call _check_rebuild_block	616	def small_size_stream():
	617	for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
	618	'groupcompress', False):
	619	record._manager._full_enough_block_size = \
	620	record._manager._block._content_length
	621	yield record
	622
	623	vf2.insert_record_stream(small_size_stream())
3735.32.20 by John Arbash Meinel groupcompress now copies the blocks exactly as they were given.	624	stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
	625	'groupcompress', False)
	626	vf2.writer.end()
3735.32.21 by John Arbash Meinel We now have a 'reuse_blocks=False' flag for autopack et al.	627	num_records = 0
3735.32.20 by John Arbash Meinel groupcompress now copies the blocks exactly as they were given.	628	for record in stream:
3735.32.21 by John Arbash Meinel We now have a 'reuse_blocks=False' flag for autopack et al.	629	num_records += 1
3735.32.20 by John Arbash Meinel groupcompress now copies the blocks exactly as they were given.	630	self.assertEqual(block_bytes[record.key],
	631	record._manager._block._z_content)
3735.32.21 by John Arbash Meinel We now have a 'reuse_blocks=False' flag for autopack et al.	632	self.assertEqual(8, num_records)
	633
4665.3.9 by John Arbash Meinel Start doing some work to make sure that we call _check_rebuild_block	634	def test_insert_record_stream_packs_on_the_fly(self):
	635	vf = self.make_test_vf(True, dir='source')
	636	def grouped_stream(revision_ids, first_parents=()):
	637	parents = first_parents
	638	for revision_id in revision_ids:
	639	key = (revision_id,)
	640	record = versionedfile.FulltextContentFactory(
	641	key, parents, None,
	642	'some content that is\n'
	643	'identical except for\n'
	644	'revision_id:%s\n' % (revision_id,))
	645	yield record
	646	parents = (key,)
	647	# One group, a-d
	648	vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
	649	# Second group, e-h
	650	vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
	651	first_parents=(('d',),)))
	652	# Now copy the blocks into another vf, and see that the
	653	# insert_record_stream rebuilt a new block on-the-fly because of
	654	# under-utilization
	655	vf2 = self.make_test_vf(True, dir='target')
	656	vf2.insert_record_stream(vf.get_record_stream(
	657	[(r,) for r in 'abcdefgh'], 'groupcompress', False))
	658	stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
	659	'groupcompress', False)
	660	vf2.writer.end()
	661	num_records = 0
	662	# All of the records should be recombined into a single block
	663	block = None
	664	for record in stream:
	665	num_records += 1
	666	if block is None:
	667	block = record._manager._block
	668	else:
	669	self.assertIs(block, record._manager._block)
	670	self.assertEqual(8, num_records)
	671
3735.32.21 by John Arbash Meinel We now have a 'reuse_blocks=False' flag for autopack et al.	672	def test__insert_record_stream_no_reuse_block(self):
	673	vf = self.make_test_vf(True, dir='source')
	674	def grouped_stream(revision_ids, first_parents=()):
	675	parents = first_parents
	676	for revision_id in revision_ids:
	677	key = (revision_id,)
	678	record = versionedfile.FulltextContentFactory(
	679	key, parents, None,
	680	'some content that is\n'
	681	'identical except for\n'
	682	'revision_id:%s\n' % (revision_id,))
	683	yield record
	684	parents = (key,)
	685	# One group, a-d
	686	vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
	687	# Second group, e-h
	688	vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
	689	first_parents=(('d',),)))
	690	vf.writer.end()
	691	self.assertEqual(8, len(list(vf.get_record_stream(
	692	[(r,) for r in 'abcdefgh'],
	693	'unordered', False))))
	694	# Now copy the blocks into another vf, and ensure that the blocks are
	695	# preserved without creating new entries
	696	vf2 = self.make_test_vf(True, dir='target')
	697	# ordering in 'groupcompress' order, should actually swap the groups in
	698	# the target vf, but the groups themselves should not be disturbed.
	699	list(vf2._insert_record_stream(vf.get_record_stream(
	700	[(r,) for r in 'abcdefgh'], 'groupcompress', False),
	701	reuse_blocks=False))
	702	vf2.writer.end()
	703	# After inserting with reuse_blocks=False, we should have everything in
	704	# a single new block.
	705	stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
	706	'groupcompress', False)
	707	block = None
	708	for record in stream:
	709	if block is None:
	710	block = record._manager._block
	711	else:
	712	self.assertIs(block, record._manager._block)
	713
4343.3.20 by John Arbash Meinel Copy the track_external_parent_refs tests over to GCVF.	714	def test_add_missing_noncompression_parent_unvalidated_index(self):
	715	unvalidated = self.make_g_index_missing_parent()
	716	combined = _mod_index.CombinedGraphIndex([unvalidated])
	717	index = groupcompress._GCGraphIndex(combined,
4343.3.21 by John Arbash Meinel Implement get_missing_parents in terms of _KeyRefs.	718	is_locked=lambda: True, parents=True,
	719	track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel Copy the track_external_parent_refs tests over to GCVF.	720	index.scan_unvalidated_index(unvalidated)
	721	self.assertEqual(
	722	frozenset([('missing-parent',)]), index.get_missing_parents())
	723
	724	def test_track_external_parent_refs(self):
	725	g_index = self.make_g_index('empty', 1, [])
	726	mod_index = btree_index.BTreeBuilder(1, 1)
	727	combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
	728	index = groupcompress._GCGraphIndex(combined,
	729	is_locked=lambda: True, parents=True,
4343.3.21 by John Arbash Meinel Implement get_missing_parents in terms of _KeyRefs.	730	add_callback=mod_index.add_nodes,
	731	track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel Copy the track_external_parent_refs tests over to GCVF.	732	index.add_records([
	733	(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
	734	self.assertEqual(
	735	frozenset([('parent-1',), ('parent-2',)]),
	736	index.get_missing_parents())
	737
4465.2.3 by Aaron Bentley Update to change redundant inserts into a warning.	738	def make_source_with_b(self, a_parent, path):
	739	source = self.make_test_vf(True, dir=path)
	740	source.add_lines(('a',), (), ['lines\n'])
	741	if a_parent:
	742	b_parents = (('a',),)
	743	else:
	744	b_parents = ()
	745	source.add_lines(('b',), b_parents, ['lines\n'])
	746	return source
	747
4465.2.4 by Aaron Bentley Switch between warn and raise depending on inconsistent_fatal.	748	def do_inconsistent_inserts(self, inconsistency_fatal):
	749	target = self.make_test_vf(True, dir='target',
	750	inconsistency_fatal=inconsistency_fatal)
	751	for x in range(2):
	752	source = self.make_source_with_b(x==1, 'source%s' % x)
	753	target.insert_record_stream(source.get_record_stream(
	754	[('b',)], 'unordered', False))
	755
4465.2.3 by Aaron Bentley Update to change redundant inserts into a warning.	756	def test_inconsistent_redundant_inserts_warn(self):
4465.2.2 by Aaron Bentley Add test that duplicates are skipped.	757	"""Should not insert a record that is already present."""
4465.2.3 by Aaron Bentley Update to change redundant inserts into a warning.	758	warnings = []
	759	def warning(template, args):
	760	warnings.append(template % args)
	761	_trace_warning = trace.warning
	762	trace.warning = warning
	763	try:
4465.2.4 by Aaron Bentley Switch between warn and raise depending on inconsistent_fatal.	764	self.do_inconsistent_inserts(inconsistency_fatal=False)
4465.2.3 by Aaron Bentley Update to change redundant inserts into a warning.	765	finally:
	766	trace.warning = _trace_warning
	767	self.assertEqual(["inconsistent details in skipped record: ('b',)"
	768	" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
	769	warnings)
3735.32.8 by John Arbash Meinel Some tests for the LazyGroupCompressFactory	770
4465.2.4 by Aaron Bentley Switch between warn and raise depending on inconsistent_fatal.	771	def test_inconsistent_redundant_inserts_raises(self):
	772	e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
	773	inconsistency_fatal=True)
	774	self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
	775	" in add_records:"
	776	" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
	777	" 0 8', \(\(\('a',\),\),\)\)")
	778
4744.2.5 by John Arbash Meinel Change to a generic 'VersionedFiles.clear_cache()' api.	779	def test_clear_cache(self):
	780	vf = self.make_source_with_b(True, 'source')
	781	vf.writer.end()
	782	for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
	783	True):
	784	pass
	785	self.assertTrue(len(vf._group_cache) > 0)
	786	vf.clear_cache()
	787	self.assertEqual(0, len(vf._group_cache))
	788
	789
5755.2.4 by John Arbash Meinel Expose the max_entries_per_source into GroupCompressVersionedFiles	790	class TestGroupCompressConfig(tests.TestCaseWithTransport):
	791
	792	def make_test_vf(self):
	793	t = self.get_transport('.')
	794	t.ensure_base()
	795	factory = groupcompress.make_pack_factory(graph=True,
	796	delta=False, keylength=1, inconsistency_fatal=True)
	797	vf = factory(t)
	798	self.addCleanup(groupcompress.cleanup_pack_group, vf)
	799	return vf
	800
	801	def test_max_entries_per_source_default(self):
	802	vf = self.make_test_vf()
	803	gc = vf._make_group_compressor()
	804	self.assertEqual(vf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
	805	vf._max_entries_per_source)
	806	if isinstance(gc, groupcompress.PyrexGroupCompressor):
	807	self.assertEqual(vf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
	808	gc._delta_index._max_entries_per_source)
	809
	810	def test_max_entries_per_source_in_config(self):
	811	c = config.GlobalConfig()
	812	c.set_user_option('bzr.groupcompress.max_entries_per_source', '10000')
	813	vf = self.make_test_vf()
	814	gc = vf._make_group_compressor()
	815	self.assertEqual(10000, vf._max_entries_per_source)
	816	if isinstance(gc, groupcompress.PyrexGroupCompressor):
	817	self.assertEqual(10000, gc._delta_index._max_entries_per_source)
	818
	819	def test_max_entries_per_source_bad_config(self):
	820	c = config.GlobalConfig()
	821	c.set_user_option('bzr.groupcompress.max_entries_per_source', 'boogah')
	822	vf = self.make_test_vf()
	823	# TODO: This is triggering a warning, we might want to trap and make
	824	# sure it is readable.
	825	gc = vf._make_group_compressor()
	826	self.assertEqual(vf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
	827	vf._max_entries_per_source)
	828	if isinstance(gc, groupcompress.PyrexGroupCompressor):
	829	self.assertEqual(vf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
	830	gc._delta_index._max_entries_per_source)
	831
	832
4634.3.20 by Andrew Bennetts Some basic whitebox unit tests for _BatchingBlockFetcher.	833	class StubGCVF(object):
4634.3.21 by Andrew Bennetts Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).	834	def __init__(self, canned_get_blocks=None):
4634.3.20 by Andrew Bennetts Some basic whitebox unit tests for _BatchingBlockFetcher.	835	self._group_cache = {}
4634.3.21 by Andrew Bennetts Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).	836	self._canned_get_blocks = canned_get_blocks or []
	837	def _get_blocks(self, read_memos):
	838	return iter(self._canned_get_blocks)
4634.3.20 by Andrew Bennetts Some basic whitebox unit tests for _BatchingBlockFetcher.	839
	840
	841	class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
	842	"""Simple whitebox unit tests for _BatchingBlockFetcher."""
	843
	844	def test_add_key_new_read_memo(self):
	845	"""Adding a key with an uncached read_memo new to this batch adds that
	846	read_memo to the list of memos to fetch.
	847	"""
	848	# locations are: index_memo, ignored, parents, ignored
	849	# where index_memo is: (idx, offset, len, factory_start, factory_end)
	850	# and (idx, offset, size) is known as the 'read_memo', identifying the
	851	# raw bytes needed.
	852	read_memo = ('fake index', 100, 50)
	853	locations = {
	854	('key',): (read_memo + (None, None), None, None, None)}
	855	batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
	856	total_size = batcher.add_key(('key',))
	857	self.assertEqual(50, total_size)
	858	self.assertEqual([('key',)], batcher.keys)
	859	self.assertEqual([read_memo], batcher.memos_to_get)
	860
	861	def test_add_key_duplicate_read_memo(self):
	862	"""read_memos that occur multiple times in a batch will only be fetched
	863	once.
	864	"""
	865	read_memo = ('fake index', 100, 50)
	866	# Two keys, both sharing the same read memo (but different overall
	867	# index_memos).
	868	locations = {
	869	('key1',): (read_memo + (0, 1), None, None, None),
	870	('key2',): (read_memo + (1, 2), None, None, None)}
	871	batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
	872	total_size = batcher.add_key(('key1',))
	873	total_size = batcher.add_key(('key2',))
	874	self.assertEqual(50, total_size)
	875	self.assertEqual([('key1',), ('key2',)], batcher.keys)
	876	self.assertEqual([read_memo], batcher.memos_to_get)
	877
	878	def test_add_key_cached_read_memo(self):
	879	"""Adding a key with a cached read_memo will not cause that read_memo
	880	to be added to the list to fetch.
	881	"""
	882	read_memo = ('fake index', 100, 50)
	883	gcvf = StubGCVF()
	884	gcvf._group_cache[read_memo] = 'fake block'
	885	locations = {
	886	('key',): (read_memo + (None, None), None, None, None)}
	887	batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
	888	total_size = batcher.add_key(('key',))
	889	self.assertEqual(0, total_size)
	890	self.assertEqual([('key',)], batcher.keys)
	891	self.assertEqual([], batcher.memos_to_get)
	892
4634.3.21 by Andrew Bennetts Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).	893	def test_yield_factories_empty(self):
	894	"""An empty batch yields no factories."""
	895	batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
	896	self.assertEqual([], list(batcher.yield_factories()))
	897
	898	def test_yield_factories_calls_get_blocks(self):
4634.3.22 by Andrew Bennetts Fix docstring.	899	"""Uncached memos are retrieved via get_blocks."""
4634.3.21 by Andrew Bennetts Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).	900	read_memo1 = ('fake index', 100, 50)
	901	read_memo2 = ('fake index', 150, 40)
	902	gcvf = StubGCVF(
	903	canned_get_blocks=[
	904	(read_memo1, groupcompress.GroupCompressBlock()),
	905	(read_memo2, groupcompress.GroupCompressBlock())])
	906	locations = {
	907	('key1',): (read_memo1 + (None, None), None, None, None),
	908	('key2',): (read_memo2 + (None, None), None, None, None)}
	909	batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
	910	batcher.add_key(('key1',))
	911	batcher.add_key(('key2',))
	912	factories = list(batcher.yield_factories(full_flush=True))
	913	self.assertLength(2, factories)
	914	keys = [f.key for f in factories]
	915	kinds = [f.storage_kind for f in factories]
	916	self.assertEqual([('key1',), ('key2',)], keys)
	917	self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
	918
	919	def test_yield_factories_flushing(self):
	920	"""yield_factories holds back on yielding results from the final block
	921	unless passed full_flush=True.
	922	"""
	923	fake_block = groupcompress.GroupCompressBlock()
	924	read_memo = ('fake index', 100, 50)
	925	gcvf = StubGCVF()
	926	gcvf._group_cache[read_memo] = fake_block
	927	locations = {
	928	('key',): (read_memo + (None, None), None, None, None)}
	929	batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
	930	batcher.add_key(('key',))
	931	self.assertEqual([], list(batcher.yield_factories()))
	932	factories = list(batcher.yield_factories(full_flush=True))
	933	self.assertLength(1, factories)
	934	self.assertEqual(('key',), factories[0].key)
	935	self.assertEqual('groupcompress-block', factories[0].storage_kind)
	936
4634.3.20 by Andrew Bennetts Some basic whitebox unit tests for _BatchingBlockFetcher.	937
3735.32.14 by John Arbash Meinel Move the tests over to testing the LazyGroupContentManager object.	938	class TestLazyGroupCompress(tests.TestCaseWithTransport):
3735.32.8 by John Arbash Meinel Some tests for the LazyGroupCompressFactory	939
3735.32.14 by John Arbash Meinel Move the tests over to testing the LazyGroupContentManager object.	940	_texts = {
	941	('key1',): "this is a text\n"
4665.3.7 by John Arbash Meinel We needed a bit more data to actually get groups doing delta-compression.	942	"with a reasonable amount of compressible bytes\n"
	943	"which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel Move the tests over to testing the LazyGroupContentManager object.	944	('key2',): "another text\n"
4665.3.7 by John Arbash Meinel We needed a bit more data to actually get groups doing delta-compression.	945	"with a reasonable amount of compressible bytes\n"
	946	"which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel Change the GroupCompressBlock code to allow not recording 'end'.	947	('key3',): "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel We needed a bit more data to actually get groups doing delta-compression.	948	"with a reasonable amount of compressible bytes\n"
	949	"which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel Change the GroupCompressBlock code to allow not recording 'end'.	950	('key4',): "this will be extracted\n"
3735.38.2 by John Arbash Meinel Make the text for key4 slightly longer, rather than include key3.	951	"but references most of its bytes from\n"
3735.32.15 by John Arbash Meinel Change the GroupCompressBlock code to allow not recording 'end'.	952	"yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel We needed a bit more data to actually get groups doing delta-compression.	953	"with a reasonable amount of compressible bytes\n"
	954	"which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel Move the tests over to testing the LazyGroupContentManager object.	955	}
3735.32.8 by John Arbash Meinel Some tests for the LazyGroupCompressFactory	956	def make_block(self, key_to_text):
	957	"""Create a GroupCompressBlock, filling it with the given texts."""
	958	compressor = groupcompress.GroupCompressor()
	959	start = 0
	960	for key in sorted(key_to_text):
	961	compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	962	locs = dict((key, (start, end)) for key, (start, _, end, _)
	963	in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel Add a _LazyGroupContentManager._check_rebuild_block	964	block = compressor.flush()
	965	raw_bytes = block.to_bytes()
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	966	return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.8 by John Arbash Meinel Some tests for the LazyGroupCompressFactory	967
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	968	def add_key_to_manager(self, key, locations, block, manager):
	969	start, end = locations[key]
	970	manager.add_factory(key, (), start, end)
3735.32.15 by John Arbash Meinel Change the GroupCompressBlock code to allow not recording 'end'.	971
4665.3.7 by John Arbash Meinel We needed a bit more data to actually get groups doing delta-compression.	972	def make_block_and_full_manager(self, texts):
	973	locations, block = self.make_block(texts)
	974	manager = groupcompress._LazyGroupContentManager(block)
	975	for key in sorted(texts):
	976	self.add_key_to_manager(key, locations, block, manager)
	977	return block, manager
	978
3735.32.8 by John Arbash Meinel Some tests for the LazyGroupCompressFactory	979	def test_get_fulltexts(self):
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	980	locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	981	manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	982	self.add_key_to_manager(('key1',), locations, block, manager)
	983	self.add_key_to_manager(('key2',), locations, block, manager)
3735.32.15 by John Arbash Meinel Change the GroupCompressBlock code to allow not recording 'end'.	984	result_order = []
	985	for record in manager.get_record_stream():
	986	result_order.append(record.key)
	987	text = self._texts[record.key]
	988	self.assertEqual(text, record.get_bytes_as('fulltext'))
	989	self.assertEqual([('key1',), ('key2',)], result_order)
	990
	991	# If we build the manager in the opposite order, we should get them
	992	# back in the opposite order
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	993	manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	994	self.add_key_to_manager(('key2',), locations, block, manager)
	995	self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.15 by John Arbash Meinel Change the GroupCompressBlock code to allow not recording 'end'.	996	result_order = []
	997	for record in manager.get_record_stream():
	998	result_order.append(record.key)
	999	text = self._texts[record.key]
	1000	self.assertEqual(text, record.get_bytes_as('fulltext'))
	1001	self.assertEqual([('key2',), ('key1',)], result_order)
	1002
3735.32.16 by John Arbash Meinel We now have a general header for the GC block.	1003	def test__wire_bytes_no_keys(self):
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	1004	locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	1005	manager = groupcompress._LazyGroupContentManager(block)
3735.32.16 by John Arbash Meinel We now have a general header for the GC block.	1006	wire_bytes = manager._wire_bytes()
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	1007	block_length = len(block.to_bytes())
3735.32.24 by John Arbash Meinel _wire_bytes() now strips groups as necessary, as does _insert_record_stream	1008	# We should have triggered a strip, since we aren't using any content
	1009	stripped_block = manager._block.to_bytes()
	1010	self.assertTrue(block_length > len(stripped_block))
	1011	empty_z_header = zlib.compress('')
	1012	self.assertEqual('groupcompress-block\n'
	1013	'8\n' # len(compress(''))
	1014	'0\n' # len('')
	1015	'%d\n'# compressed block len
	1016	'%s' # zheader
	1017	'%s' # block
	1018	% (len(stripped_block), empty_z_header,
	1019	stripped_block),
	1020	wire_bytes)
3735.32.16 by John Arbash Meinel We now have a general header for the GC block.	1021
3735.32.15 by John Arbash Meinel Change the GroupCompressBlock code to allow not recording 'end'.	1022	def test__wire_bytes(self):
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	1023	locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	1024	manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	1025	self.add_key_to_manager(('key1',), locations, block, manager)
	1026	self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	1027	block_bytes = block.to_bytes()
3735.32.16 by John Arbash Meinel We now have a general header for the GC block.	1028	wire_bytes = manager._wire_bytes()
	1029	(storage_kind, z_header_len, header_len,
	1030	block_len, rest) = wire_bytes.split('\n', 4)
	1031	z_header_len = int(z_header_len)
	1032	header_len = int(header_len)
	1033	block_len = int(block_len)
	1034	self.assertEqual('groupcompress-block', storage_kind)
4665.3.8 by John Arbash Meinel Of course, when you change the content, it can effect the stored wire bytes slightly.	1035	self.assertEqual(34, z_header_len)
	1036	self.assertEqual(26, header_len)
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	1037	self.assertEqual(len(block_bytes), block_len)
3735.32.16 by John Arbash Meinel We now have a general header for the GC block.	1038	z_header = rest[:z_header_len]
	1039	header = zlib.decompress(z_header)
	1040	self.assertEqual(header_len, len(header))
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	1041	entry1 = locations[('key1',)]
	1042	entry4 = locations[('key4',)]
3735.32.16 by John Arbash Meinel We now have a general header for the GC block.	1043	self.assertEqualDiff('key1\n'
	1044	'\n' # no parents
	1045	'%d\n' # start offset
3735.38.2 by John Arbash Meinel Make the text for key4 slightly longer, rather than include key3.	1046	'%d\n' # end offset
3735.32.16 by John Arbash Meinel We now have a general header for the GC block.	1047	'key4\n'
	1048	'\n'
	1049	'%d\n'
	1050	'%d\n'
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	1051	% (entry1[0], entry1[1],
	1052	entry4[0], entry4[1]),
3735.32.16 by John Arbash Meinel We now have a general header for the GC block.	1053	header)
	1054	z_block = rest[z_header_len:]
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	1055	self.assertEqual(block_bytes, z_block)
	1056
	1057	def test_from_bytes(self):
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	1058	locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	1059	manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	1060	self.add_key_to_manager(('key1',), locations, block, manager)
	1061	self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	1062	wire_bytes = manager._wire_bytes()
	1063	self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
3735.32.18 by John Arbash Meinel We now support generating a network stream.	1064	manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	1065	self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
3735.38.2 by John Arbash Meinel Make the text for key4 slightly longer, rather than include key3.	1066	self.assertEqual(2, len(manager._factories))
3735.32.17 by John Arbash Meinel We now round-trip the wire_bytes.	1067	self.assertEqual(block._z_content, manager._block._z_content)
	1068	result_order = []
	1069	for record in manager.get_record_stream():
	1070	result_order.append(record.key)
	1071	text = self._texts[record.key]
	1072	self.assertEqual(text, record.get_bytes_as('fulltext'))
3735.38.2 by John Arbash Meinel Make the text for key4 slightly longer, rather than include key3.	1073	self.assertEqual([('key1',), ('key4',)], result_order)
3735.32.23 by John Arbash Meinel Add a _LazyGroupContentManager._check_rebuild_block	1074
	1075	def test__check_rebuild_no_changes(self):
4665.3.7 by John Arbash Meinel We needed a bit more data to actually get groups doing delta-compression.	1076	block, manager = self.make_block_and_full_manager(self._texts)
3735.32.23 by John Arbash Meinel Add a _LazyGroupContentManager._check_rebuild_block	1077	manager._check_rebuild_block()
	1078	self.assertIs(block, manager._block)
	1079
	1080	def test__check_rebuild_only_one(self):
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	1081	locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel Add a _LazyGroupContentManager._check_rebuild_block	1082	manager = groupcompress._LazyGroupContentManager(block)
	1083	# Request just the first key, which should trigger a 'strip' action
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	1084	self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.23 by John Arbash Meinel Add a _LazyGroupContentManager._check_rebuild_block	1085	manager._check_rebuild_block()
	1086	self.assertIsNot(block, manager._block)
	1087	self.assertTrue(block._content_length > manager._block._content_length)
	1088	# We should be able to still get the content out of this block, though
	1089	# it should only have 1 entry
	1090	for record in manager.get_record_stream():
	1091	self.assertEqual(('key1',), record.key)
	1092	self.assertEqual(self._texts[record.key],
	1093	record.get_bytes_as('fulltext'))
	1094
	1095	def test__check_rebuild_middle(self):
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	1096	locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel Add a _LazyGroupContentManager._check_rebuild_block	1097	manager = groupcompress._LazyGroupContentManager(block)
	1098	# Request a small key in the middle should trigger a 'rebuild'
3735.40.18 by John Arbash Meinel Get rid of the entries dict in GroupCompressBlock.	1099	self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel Add a _LazyGroupContentManager._check_rebuild_block	1100	manager._check_rebuild_block()
	1101	self.assertIsNot(block, manager._block)
	1102	self.assertTrue(block._content_length > manager._block._content_length)
	1103	for record in manager.get_record_stream():
	1104	self.assertEqual(('key4',), record.key)
	1105	self.assertEqual(self._texts[record.key],
	1106	record.get_bytes_as('fulltext'))
4665.3.7 by John Arbash Meinel We needed a bit more data to actually get groups doing delta-compression.	1107
5755.2.5 by John Arbash Meinel Expose the setting up the stack.	1108	def test_manager_default_max_entries_per_source(self):
	1109	locations, old_block = self.make_block(self._texts)
	1110	manager = groupcompress._LazyGroupContentManager(old_block)
	1111	gcvf = groupcompress.GroupCompressVersionedFiles
	1112	# It doesn't greedily evaluate _max_entries_per_source
	1113	self.assertIs(None, manager._max_entries_per_source)
	1114	self.assertEqual(gcvf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
	1115	manager._get_max_entries_per_source())
	1116
	1117	def test_manager_custom_max_entries_per_source(self):
	1118	locations, old_block = self.make_block(self._texts)
	1119	called = []
	1120	def max_entries():
	1121	called.append('called')
	1122	return 10
	1123	manager = groupcompress._LazyGroupContentManager(old_block,
	1124	get_max_entries_per_source=max_entries)
	1125	gcvf = groupcompress.GroupCompressVersionedFiles
	1126	# It doesn't greedily evaluate _max_entries_per_source
	1127	self.assertIs(None, manager._max_entries_per_source)
	1128	self.assertEqual(10, manager._get_max_entries_per_source())
	1129	self.assertEqual(10, manager._get_max_entries_per_source())
	1130	self.assertEqual(10, manager._max_entries_per_source)
	1131	# Only called 1 time
	1132	self.assertEqual(['called'], called)
	1133
	1134	def test__rebuild_handles_max_entries_per_source(self):
	1135	locations, old_block = self.make_block(self._texts)
	1136	manager = groupcompress._LazyGroupContentManager(old_block,
	1137	get_max_entries_per_source=lambda: 2)
	1138	gc = manager._make_group_compressor()
	1139	if isinstance(gc, groupcompress.PyrexGroupCompressor):
	1140	self.assertEqual(2, gc._delta_index._max_entries_per_source)
	1141	self.add_key_to_manager(('key3',), locations, old_block, manager)
	1142	self.add_key_to_manager(('key4',), locations, old_block, manager)
	1143	action, last_byte, total_bytes = manager._check_rebuild_action()
	1144	self.assertEqual('rebuild', action)
	1145	manager._rebuild_block()
	1146	new_block = manager._block
	1147	self.assertIsNot(old_block, new_block)
	1148	# Because of the new max_entries_per_source, we do a poor job of
	1149	# rebuilding. This is a side-effect of the change, but at least it does
	1150	# show the setting had an effect.
	1151	self.assertTrue(old_block._content_length < new_block._content_length)
	1152
4665.3.7 by John Arbash Meinel We needed a bit more data to actually get groups doing delta-compression.	1153	def test_check_is_well_utilized_all_keys(self):
	1154	block, manager = self.make_block_and_full_manager(self._texts)
	1155	self.assertFalse(manager.check_is_well_utilized())
	1156	# Though we can fake it by changing the recommended minimum size
	1157	manager._full_enough_block_size = block._content_length
	1158	self.assertTrue(manager.check_is_well_utilized())
	1159	# Setting it just above causes it to fail
	1160	manager._full_enough_block_size = block._content_length + 1
	1161	self.assertFalse(manager.check_is_well_utilized())
	1162	# Setting the mixed-block size doesn't do anything, because the content
	1163	# is considered to not be 'mixed'
	1164	manager._full_enough_mixed_block_size = block._content_length
	1165	self.assertFalse(manager.check_is_well_utilized())
	1166
	1167	def test_check_is_well_utilized_mixed_keys(self):
	1168	texts = {}
	1169	f1k1 = ('f1', 'k1')
	1170	f1k2 = ('f1', 'k2')
	1171	f2k1 = ('f2', 'k1')
	1172	f2k2 = ('f2', 'k2')
	1173	texts[f1k1] = self._texts[('key1',)]
	1174	texts[f1k2] = self._texts[('key2',)]
	1175	texts[f2k1] = self._texts[('key3',)]
	1176	texts[f2k2] = self._texts[('key4',)]
	1177	block, manager = self.make_block_and_full_manager(texts)
	1178	self.assertFalse(manager.check_is_well_utilized())
	1179	manager._full_enough_block_size = block._content_length
	1180	self.assertTrue(manager.check_is_well_utilized())
	1181	manager._full_enough_block_size = block._content_length + 1
	1182	self.assertFalse(manager.check_is_well_utilized())
	1183	manager._full_enough_mixed_block_size = block._content_length
	1184	self.assertTrue(manager.check_is_well_utilized())
	1185
	1186	def test_check_is_well_utilized_partial_use(self):
	1187	locations, block = self.make_block(self._texts)
	1188	manager = groupcompress._LazyGroupContentManager(block)
	1189	manager._full_enough_block_size = block._content_length
	1190	self.add_key_to_manager(('key1',), locations, block, manager)
	1191	self.add_key_to_manager(('key2',), locations, block, manager)
	1192	# Just using the content from key1 and 2 is not enough to be considered
	1193	# 'complete'
	1194	self.assertFalse(manager.check_is_well_utilized())
	1195	# However if we add key3, then we have enough, as we only require 75%
	1196	# consumption
	1197	self.add_key_to_manager(('key4',), locations, block, manager)
	1198	self.assertTrue(manager.check_is_well_utilized())
5365.4.1 by John Arbash Meinel Find a case where we are wasting a bit of memory.	1199
	1200
	1201	class Test_GCBuildDetails(tests.TestCase):
	1202
	1203	def test_acts_like_tuple(self):
	1204	# _GCBuildDetails inlines some of the data that used to be spread out
	1205	# across a bunch of tuples
	1206	bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
	1207	('INDEX', 10, 20, 0, 5))
	1208	self.assertEqual(4, len(bd))
	1209	self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
	1210	self.assertEqual(None, bd[1]) # Compression Parent is always None
	1211	self.assertEqual((('parent1',), ('parent2',)), bd[2])
	1212	self.assertEqual(('group', None), bd[3]) # Record details
	1213
	1214	def test__repr__(self):
	1215	bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
	1216	('INDEX', 10, 20, 0, 5))
	1217	self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
	1218	" (('parent1',), ('parent2',)))",
	1219	repr(bd))
	1220