/brz/remove-bazaar : contents of tests/test__groupcompress

: (revision 0.18.26)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

# Copyright (C) 2008 Canonical Limited.
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as published
# by the Free Software Foundation.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
# 

"""Tests for the pyrex extension of groupcompress"""

from bzrlib import tests

from bzrlib.plugins.groupcompress import groupcompress


class _CompiledGroupCompress(tests.Feature):

    def _probe(self):
        try:
            import bzrlib.plugins.groupcompress._groupcompress_c
        except ImportError:
            return False
        else:
            return True

    def feature_name(self):
        return 'bzrlib.plugins.groupcompress._groupcompress_c'

CompiledGroupCompress = _CompiledGroupCompress()


class TestCompiledEquivalenceTable(tests.TestCase):
    """Direct tests for the compiled Equivalence Table."""

    _tests_need_features = [CompiledGroupCompress]

    # These tests assume that hash(int) == int
    # If that ever changes, we can simply change this code to use a custom
    # class that has precomputed values returned from __hash__.

    def setUp(self):
        super(TestCompiledEquivalenceTable, self).setUp()
        from bzrlib.plugins.groupcompress import _groupcompress_c
        self._gc_module = _groupcompress_c

    def test_minimum_hash_size(self):
        eq = self._gc_module.EquivalenceTable([])
        # We request at least 33% free space in the hash (to make collisions
        # more bearable)
        self.assertEqual(1024, eq._py_compute_minimum_hash_size(683))
        self.assertEqual(2048, eq._py_compute_minimum_hash_size(684))
        self.assertEqual(2048, eq._py_compute_minimum_hash_size(1000))
        self.assertEqual(2048, eq._py_compute_minimum_hash_size(1024))

    def test_recommended_hash_size(self):
        eq = self._gc_module.EquivalenceTable([])
        # We always recommend a minimum of 8k
        self.assertEqual(8192, eq._py_compute_recommended_hash_size(10))
        self.assertEqual(8192, eq._py_compute_recommended_hash_size(1000))
        self.assertEqual(8192, eq._py_compute_recommended_hash_size(2000))
        self.assertEqual(8192, eq._py_compute_recommended_hash_size(4000))

        # And we recommend at least 50% free slots
        self.assertEqual(8192, eq._py_compute_recommended_hash_size(4096))
        self.assertEqual(16384, eq._py_compute_recommended_hash_size(4097))

    def test__raw_lines(self):
        eq = self._gc_module.EquivalenceTable([1, 2, 3])
        self.assertEqual([(1, 1, 1, -1), (2, 2, 2, -1), (3, 3, 3, -1)],
                         eq._inspect_left_lines())

    def test_build_hash(self):
        eq = self._gc_module.EquivalenceTable([1, 2, 3])
        # (size, [(offset, head_offset_in_lines, count)])
        self.assertEqual((8192, [(1, 0, 1), (2, 1, 1), (3, 2, 1)]),
                         eq._inspect_hash_table())

    def test_build_hash_with_duplicates(self):
        eq = self._gc_module.EquivalenceTable([1, 2, 4, 0, 1, 4, 2, 4])
        self.assertEqual([
            (1, 1, 1, 4),
            (2, 2, 2, 6),
            (4, 4, 4, 5),
            (0, 0, 0, -1),
            (1, 1, 1, -1),
            (4, 4, 4, 7),
            (2, 2, 2, -1),
            (4, 4, 4, -1),
            ], eq._inspect_left_lines())
        # (hash_offset, head_offset_in_lines, count)
        self.assertEqual((8192, [
            (0, 3, 1),
            (1, 0, 2),
            (2, 1, 2),
            (4, 2, 3),
            ]), eq._inspect_hash_table())

    def test_build_hash_table_with_wraparound(self):
        eq = self._gc_module.EquivalenceTable([1, 2+8192])
        self.assertEqual([
            (1, 1, 1, -1),
            (8194, 8194, 2, -1),
            ], eq._inspect_left_lines())
        self.assertEqual((8192, [
            (1, 0, 1),
            (2, 1, 1),
            ]), eq._inspect_hash_table())

    def test_build_hash_table_with_collisions(self):
        # We build up backwards, so # 2+8192 will wrap around to 2, and take
        # its spot because the 2 offset is taken, then the real '2' will get
        # bumped to 3, which will bump 3 into 4.  then when we have 5, it will
        # be fine, but the 8192+5 will get bumped to 6
        eq = self._gc_module.EquivalenceTable([1, 5+8192, 5, 3, 2, 2+8192])
        self.assertEqual([
            (1, 1, 1, -1),
            (8197, 8197, 6, -1),
            (5, 5, 5, -1),
            (3, 3, 4, -1),
            (2, 2, 3, -1),
            (8194, 8194, 2, -1),
            ], eq._inspect_left_lines())
        self.assertEqual((8192, [
            (1, 0, 1),
            (2, 5, 1),
            (3, 4, 1),
            (4, 3, 1),
            (5, 2, 1),
            (6, 1, 1),
            ]), eq._inspect_hash_table())

    def test_build_hash_table_with_wrapped_collisions(self):
        eq = self._gc_module.EquivalenceTable([0, 8191+8192, 8191])
        self.assertEqual([
            (0, 0, 1, -1),
            (16383, 16383, 0, -1),
            (8191, 8191, 8191, -1),
            ], eq._inspect_left_lines())
        self.assertEqual((8192, [
            (0, 1, 1),
            (1, 0, 1),
            (8191, 2, 1),
            ]), eq._inspect_hash_table())

    def test_build_hash_table_with_strings(self):
        eq = self._gc_module.EquivalenceTable(['a', 'b', 'c', 'b'])
        self.assertEqual([
            ('a', hash('a'), hash('a') & 8191, -1),
            ('b', hash('b'), hash('b') & 8191, 3),
            ('c', hash('c'), hash('c') & 8191, -1),
            ('b', hash('b'), hash('b') & 8191, -1),
            ], eq._inspect_left_lines())
        self.assertEqual((8192, sorted([
            (hash('a') & 8191, 0, 1),
            (hash('b') & 8191, 1, 2),
            (hash('c') & 8191, 2, 1),
            ])), eq._inspect_hash_table())

    def test_with_unhashable(self):
        self.assertRaises(TypeError, self._gc_module.EquivalenceTable,
                [['unhashable', 'list'], 'foo'])
        self.assertRaises(TypeError, self._gc_module.EquivalenceTable,
                ('foo', ['unhashable', 'list']))

    def test_extend_lines(self):
        eq = self._gc_module.EquivalenceTable([10, 20, 30, 20])
        eq.extend_lines([30, 20, 40], [True, True, True])
        self.assertEqual([
            (10, 10, 10, -1),
            (20, 20, 20, 3),
            (30, 30, 30, 4),
            (20, 20, 20, 5),
            (30, 30, 30, -1),
            (20, 20, 20, -1),
            (40, 40, 40, -1),
            ], eq._inspect_left_lines())
        # (hash_offset, head_offset_in_lines, count)
        self.assertEqual((8192, [
            (10, 0, 1),
            (20, 1, 3),
            (30, 2, 2),
            (40, 6, 1),
            ]), eq._inspect_hash_table())

    def test_extend_lines_ignored(self):
        eq = self._gc_module.EquivalenceTable([10, 20, 30, 20])
        eq.extend_lines([30, 20, 40], [True, False, False])
        self.assertEqual([
            (10, 10, 10, -1),
            (20, 20, 20, 3),
            (30, 30, 30, 4),
            (20, 20, 20, -1),
            (30, 30, 30, -1),
            (20, 20, -1, -1),
            (40, 40, -1, -1),
            ], eq._inspect_left_lines())
        # (hash_offset, head_offset_in_lines, count)
        self.assertEqual((8192, [
            (10, 0, 1),
            (20, 1, 2),
            (30, 2, 2),
            ]), eq._inspect_hash_table())


class TestGetLongestMatch(tests.TestCase):

    def setUp(self):
        super(TestGetLongestMatch, self).setUp()
        self._longest_match = groupcompress._get_longest_match
        self._eq_class = groupcompress.GroupCompressor._equivalence_table_class

    def assertLongestMatch(self, block, end_pos, matching_locs,
                           eq, start_pos, max_len, known_locs):
        """Check that _get_longest_match gives correct results."""
        self.assertEqual((block, end_pos, matching_locs),
                         self._longest_match(eq, start_pos, max_len,
                                             known_locs))

    def test_all_match(self):
        eq = self._eq_class(['a', 'b', 'c'])
        eq.set_right_lines(['a', 'b', 'c'])
        self.assertLongestMatch((0, 0, 3), 3, None,
                                eq, 0, 3, None)
        self.assertLongestMatch((0, 0, 3), 3, None,
                                 eq, 0, 3, [0])
        self.assertLongestMatch((1, 1, 2), 3, None,
                                eq, 1, 3, None)
        self.assertLongestMatch((1, 1, 2), 3, None,
                                eq, 1, 3, [1])

    def test_no_match(self):
        eq = self._eq_class(['a', 'b', 'c'])
        eq.set_right_lines(['d', 'e', 'f'])
        self.assertLongestMatch(None, 1, None,
                                eq, 0, 3, None)
        self.assertLongestMatch(None, 2, None,
                                eq, 1, 3, None)
        self.assertLongestMatch(None, 3, None,
                                eq, 2, 3, None)

    def test_next_match(self):
        eq = self._eq_class(['a', 'b', 'c'])
        eq.set_right_lines(['a', 'c'])
        self.assertLongestMatch((0, 0, 1), 1, [2],
                                eq, 0, 2, None)
        self.assertLongestMatch((2, 1, 1), 2, None,
                                eq, 1, 2, None)


class TestCompiledGetLongestMatch(TestGetLongestMatch):

    _tests_need_features = [CompiledGroupCompress]

    def setUp(self):
        super(TestGetLongestMatch, self).setUp()
        from bzrlib.plugins.groupcompress import _groupcompress_c
        self._longest_match = _groupcompress_c._get_longest_match
        self._eq_class = _groupcompress_c.EquivalenceTable


0.18.15 by John Arbash Meinel Start writing tests directly for the compiled class	1	# Copyright (C) 2008 Canonical Limited.
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License version 2 as published
	5	# by the Free Software Foundation.
	6	#
	7	# This program is distributed in the hope that it will be useful,
	8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	# GNU General Public License for more details.
	11	#
	12	# You should have received a copy of the GNU General Public License
	13	# along with this program; if not, write to the Free Software
	14	# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
	15	#
	16
	17	"""Tests for the pyrex extension of groupcompress"""
	18
	19	from bzrlib import tests
	20
0.18.26 by John Arbash Meinel Start with a copy implementation of the _get_longest_match function.	21	from bzrlib.plugins.groupcompress import groupcompress
	22
0.18.15 by John Arbash Meinel Start writing tests directly for the compiled class	23
	24	class _CompiledGroupCompress(tests.Feature):
	25
	26	def _probe(self):
	27	try:
	28	import bzrlib.plugins.groupcompress._groupcompress_c
	29	except ImportError:
	30	return False
	31	else:
	32	return True
	33
	34	def feature_name(self):
	35	return 'bzrlib.plugins.groupcompress._groupcompress_c'
	36
	37	CompiledGroupCompress = _CompiledGroupCompress()
	38
	39
	40	class TestCompiledEquivalenceTable(tests.TestCase):
	41	"""Direct tests for the compiled Equivalence Table."""
	42
	43	_tests_need_features = [CompiledGroupCompress]
	44
0.18.18 by John Arbash Meinel Start actually storing matches in the hash table, and testing the result.	45	# These tests assume that hash(int) == int
	46	# If that ever changes, we can simply change this code to use a custom
	47	# class that has precomputed values returned from __hash__.
	48
0.18.15 by John Arbash Meinel Start writing tests directly for the compiled class	49	def setUp(self):
	50	super(TestCompiledEquivalenceTable, self).setUp()
	51	from bzrlib.plugins.groupcompress import _groupcompress_c
	52	self._gc_module = _groupcompress_c
	53
	54	def test_minimum_hash_size(self):
	55	eq = self._gc_module.EquivalenceTable([])
0.18.16 by John Arbash Meinel Test the recommended versus minimum hash table sizes.	56	# We request at least 33% free space in the hash (to make collisions
	57	# more bearable)
	58	self.assertEqual(1024, eq._py_compute_minimum_hash_size(683))
	59	self.assertEqual(2048, eq._py_compute_minimum_hash_size(684))
	60	self.assertEqual(2048, eq._py_compute_minimum_hash_size(1000))
	61	self.assertEqual(2048, eq._py_compute_minimum_hash_size(1024))
	62
	63	def test_recommended_hash_size(self):
	64	eq = self._gc_module.EquivalenceTable([])
	65	# We always recommend a minimum of 8k
	66	self.assertEqual(8192, eq._py_compute_recommended_hash_size(10))
	67	self.assertEqual(8192, eq._py_compute_recommended_hash_size(1000))
	68	self.assertEqual(8192, eq._py_compute_recommended_hash_size(2000))
	69	self.assertEqual(8192, eq._py_compute_recommended_hash_size(4000))
	70
	71	# And we recommend at least 50% free slots
	72	self.assertEqual(8192, eq._py_compute_recommended_hash_size(4096))
	73	self.assertEqual(16384, eq._py_compute_recommended_hash_size(4097))
0.18.17 by John Arbash Meinel We now build the appropriate hash table entries.	74
	75	def test__raw_lines(self):
	76	eq = self._gc_module.EquivalenceTable([1, 2, 3])
0.18.18 by John Arbash Meinel Start actually storing matches in the hash table, and testing the result.	77	self.assertEqual([(1, 1, 1, -1), (2, 2, 2, -1), (3, 3, 3, -1)],
0.18.17 by John Arbash Meinel We now build the appropriate hash table entries.	78	eq._inspect_left_lines())
	79
	80	def test_build_hash(self):
	81	eq = self._gc_module.EquivalenceTable([1, 2, 3])
	82	# (size, [(offset, head_offset_in_lines, count)])
	83	self.assertEqual((8192, [(1, 0, 1), (2, 1, 1), (3, 2, 1)]),
	84	eq._inspect_hash_table())
0.18.18 by John Arbash Meinel Start actually storing matches in the hash table, and testing the result.	85
	86	def test_build_hash_with_duplicates(self):
	87	eq = self._gc_module.EquivalenceTable([1, 2, 4, 0, 1, 4, 2, 4])
	88	self.assertEqual([
	89	(1, 1, 1, 4),
	90	(2, 2, 2, 6),
	91	(4, 4, 4, 5),
	92	(0, 0, 0, -1),
	93	(1, 1, 1, -1),
	94	(4, 4, 4, 7),
	95	(2, 2, 2, -1),
	96	(4, 4, 4, -1),
	97	], eq._inspect_left_lines())
	98	# (hash_offset, head_offset_in_lines, count)
	99	self.assertEqual((8192, [
	100	(0, 3, 1),
	101	(1, 0, 2),
	102	(2, 1, 2),
	103	(4, 2, 3),
	104	]), eq._inspect_hash_table())
0.18.19 by John Arbash Meinel Do some more testing about what happens when you get hash collisions, etc.	105
	106	def test_build_hash_table_with_wraparound(self):
	107	eq = self._gc_module.EquivalenceTable([1, 2+8192])
	108	self.assertEqual([
	109	(1, 1, 1, -1),
	110	(8194, 8194, 2, -1),
	111	], eq._inspect_left_lines())
	112	self.assertEqual((8192, [
	113	(1, 0, 1),
	114	(2, 1, 1),
	115	]), eq._inspect_hash_table())
	116
	117	def test_build_hash_table_with_collisions(self):
	118	# We build up backwards, so # 2+8192 will wrap around to 2, and take
	119	# its spot because the 2 offset is taken, then the real '2' will get
	120	# bumped to 3, which will bump 3 into 4. then when we have 5, it will
	121	# be fine, but the 8192+5 will get bumped to 6
	122	eq = self._gc_module.EquivalenceTable([1, 5+8192, 5, 3, 2, 2+8192])
	123	self.assertEqual([
	124	(1, 1, 1, -1),
	125	(8197, 8197, 6, -1),
	126	(5, 5, 5, -1),
	127	(3, 3, 4, -1),
	128	(2, 2, 3, -1),
	129	(8194, 8194, 2, -1),
	130	], eq._inspect_left_lines())
	131	self.assertEqual((8192, [
	132	(1, 0, 1),
	133	(2, 5, 1),
	134	(3, 4, 1),
	135	(4, 3, 1),
	136	(5, 2, 1),
	137	(6, 1, 1),
	138	]), eq._inspect_hash_table())
0.18.20 by John Arbash Meinel Test the results with real strings rather than just integers	139
0.18.21 by John Arbash Meinel Add support for not including certain lines in the hashtable.	140	def test_build_hash_table_with_wrapped_collisions(self):
	141	eq = self._gc_module.EquivalenceTable([0, 8191+8192, 8191])
	142	self.assertEqual([
	143	(0, 0, 1, -1),
	144	(16383, 16383, 0, -1),
	145	(8191, 8191, 8191, -1),
	146	], eq._inspect_left_lines())
	147	self.assertEqual((8192, [
	148	(0, 1, 1),
	149	(1, 0, 1),
	150	(8191, 2, 1),
	151	]), eq._inspect_hash_table())
	152
0.18.20 by John Arbash Meinel Test the results with real strings rather than just integers	153	def test_build_hash_table_with_strings(self):
	154	eq = self._gc_module.EquivalenceTable(['a', 'b', 'c', 'b'])
	155	self.assertEqual([
	156	('a', hash('a'), hash('a') & 8191, -1),
	157	('b', hash('b'), hash('b') & 8191, 3),
	158	('c', hash('c'), hash('c') & 8191, -1),
	159	('b', hash('b'), hash('b') & 8191, -1),
	160	], eq._inspect_left_lines())
	161	self.assertEqual((8192, sorted([
	162	(hash('a') & 8191, 0, 1),
	163	(hash('b') & 8191, 1, 2),
	164	(hash('c') & 8191, 2, 1),
	165	])), eq._inspect_hash_table())
0.18.21 by John Arbash Meinel Add support for not including certain lines in the hashtable.	166
	167	def test_with_unhashable(self):
	168	self.assertRaises(TypeError, self._gc_module.EquivalenceTable,
	169	[['unhashable', 'list'], 'foo'])
	170	self.assertRaises(TypeError, self._gc_module.EquivalenceTable,
	171	('foo', ['unhashable', 'list']))
	172
	173	def test_extend_lines(self):
	174	eq = self._gc_module.EquivalenceTable([10, 20, 30, 20])
	175	eq.extend_lines([30, 20, 40], [True, True, True])
	176	self.assertEqual([
	177	(10, 10, 10, -1),
	178	(20, 20, 20, 3),
	179	(30, 30, 30, 4),
	180	(20, 20, 20, 5),
	181	(30, 30, 30, -1),
	182	(20, 20, 20, -1),
	183	(40, 40, 40, -1),
	184	], eq._inspect_left_lines())
	185	# (hash_offset, head_offset_in_lines, count)
	186	self.assertEqual((8192, [
	187	(10, 0, 1),
	188	(20, 1, 3),
	189	(30, 2, 2),
	190	(40, 6, 1),
	191	]), eq._inspect_hash_table())
	192
	193	def test_extend_lines_ignored(self):
	194	eq = self._gc_module.EquivalenceTable([10, 20, 30, 20])
	195	eq.extend_lines([30, 20, 40], [True, False, False])
	196	self.assertEqual([
	197	(10, 10, 10, -1),
	198	(20, 20, 20, 3),
	199	(30, 30, 30, 4),
	200	(20, 20, 20, -1),
	201	(30, 30, 30, -1),
	202	(20, 20, -1, -1),
	203	(40, 40, -1, -1),
	204	], eq._inspect_left_lines())
	205	# (hash_offset, head_offset_in_lines, count)
	206	self.assertEqual((8192, [
	207	(10, 0, 1),
	208	(20, 1, 2),
	209	(30, 2, 2),
	210	]), eq._inspect_hash_table())
0.18.26 by John Arbash Meinel Start with a copy implementation of the _get_longest_match function.	211
	212
	213	class TestGetLongestMatch(tests.TestCase):
	214
	215	def setUp(self):
	216	super(TestGetLongestMatch, self).setUp()
	217	self._longest_match = groupcompress._get_longest_match
	218	self._eq_class = groupcompress.GroupCompressor._equivalence_table_class
	219
	220	def assertLongestMatch(self, block, end_pos, matching_locs,
	221	eq, start_pos, max_len, known_locs):
	222	"""Check that _get_longest_match gives correct results."""
	223	self.assertEqual((block, end_pos, matching_locs),
	224	self._longest_match(eq, start_pos, max_len,
	225	known_locs))
	226
	227	def test_all_match(self):
	228	eq = self._eq_class(['a', 'b', 'c'])
	229	eq.set_right_lines(['a', 'b', 'c'])
	230	self.assertLongestMatch((0, 0, 3), 3, None,
	231	eq, 0, 3, None)
	232	self.assertLongestMatch((0, 0, 3), 3, None,
	233	eq, 0, 3, [0])
	234	self.assertLongestMatch((1, 1, 2), 3, None,
	235	eq, 1, 3, None)
	236	self.assertLongestMatch((1, 1, 2), 3, None,
	237	eq, 1, 3, [1])
	238
	239	def test_no_match(self):
	240	eq = self._eq_class(['a', 'b', 'c'])
	241	eq.set_right_lines(['d', 'e', 'f'])
	242	self.assertLongestMatch(None, 1, None,
	243	eq, 0, 3, None)
	244	self.assertLongestMatch(None, 2, None,
	245	eq, 1, 3, None)
	246	self.assertLongestMatch(None, 3, None,
	247	eq, 2, 3, None)
	248
	249	def test_next_match(self):
	250	eq = self._eq_class(['a', 'b', 'c'])
	251	eq.set_right_lines(['a', 'c'])
	252	self.assertLongestMatch((0, 0, 1), 1, [2],
	253	eq, 0, 2, None)
	254	self.assertLongestMatch((2, 1, 1), 2, None,
	255	eq, 1, 2, None)
	256
	257
	258	class TestCompiledGetLongestMatch(TestGetLongestMatch):
	259
	260	_tests_need_features = [CompiledGroupCompress]
	261
	262	def setUp(self):
	263	super(TestGetLongestMatch, self).setUp()
	264	from bzrlib.plugins.groupcompress import _groupcompress_c
	265	self._longest_match = _groupcompress_c._get_longest_match
	266	self._eq_class = _groupcompress_c.EquivalenceTable
	267