/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
1
# Copyright (C) 2008 Canonical Limited.
2
# 
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 2 as published
5
# by the Free Software Foundation.
6
# 
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
# GNU General Public License for more details.
11
# 
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
15
# 
16
17
"""Tests for the pyrex extension of groupcompress"""
18
19
from bzrlib import tests
20
0.18.26 by John Arbash Meinel
Start with a copy implementation of the _get_longest_match function.
21
from bzrlib.plugins.groupcompress import groupcompress
22
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
23
24
class _CompiledGroupCompress(tests.Feature):
25
26
    def _probe(self):
27
        try:
28
            import bzrlib.plugins.groupcompress._groupcompress_c
29
        except ImportError:
30
            return False
31
        else:
32
            return True
33
34
    def feature_name(self):
35
        return 'bzrlib.plugins.groupcompress._groupcompress_c'
36
37
CompiledGroupCompress = _CompiledGroupCompress()
38
39
40
class TestCompiledEquivalenceTable(tests.TestCase):
41
    """Direct tests for the compiled Equivalence Table."""
42
43
    _tests_need_features = [CompiledGroupCompress]
44
0.18.18 by John Arbash Meinel
Start actually storing matches in the hash table, and testing the result.
45
    # These tests assume that hash(int) == int
46
    # If that ever changes, we can simply change this code to use a custom
47
    # class that has precomputed values returned from __hash__.
48
0.18.31 by John Arbash Meinel
We had a small bug when we had to rebuild the hash, as we would forget about the non-indexed entries.
49
50
    # TODO: We need a test that forces a rebuild of the hash array, and makes
51
    #       sure that the lines that should not be indexed *stay* not indexed
52
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
53
    def setUp(self):
54
        super(TestCompiledEquivalenceTable, self).setUp()
55
        from bzrlib.plugins.groupcompress import _groupcompress_c
56
        self._gc_module = _groupcompress_c
57
58
    def test_minimum_hash_size(self):
59
        eq = self._gc_module.EquivalenceTable([])
0.18.32 by John Arbash Meinel
Switch away from += for older versions of pyrex,
60
        # We request at least 50% free space in the hash (to make collisions
0.18.16 by John Arbash Meinel
Test the recommended versus minimum hash table sizes.
61
        # more bearable)
0.18.32 by John Arbash Meinel
Switch away from += for older versions of pyrex,
62
        self.assertEqual(1024, eq._py_compute_minimum_hash_size(512))
63
        self.assertEqual(2048, eq._py_compute_minimum_hash_size(513))
0.18.16 by John Arbash Meinel
Test the recommended versus minimum hash table sizes.
64
        self.assertEqual(2048, eq._py_compute_minimum_hash_size(1000))
65
        self.assertEqual(2048, eq._py_compute_minimum_hash_size(1024))
66
67
    def test_recommended_hash_size(self):
68
        eq = self._gc_module.EquivalenceTable([])
69
        # We always recommend a minimum of 8k
70
        self.assertEqual(8192, eq._py_compute_recommended_hash_size(10))
71
        self.assertEqual(8192, eq._py_compute_recommended_hash_size(1000))
72
        self.assertEqual(8192, eq._py_compute_recommended_hash_size(2000))
0.18.32 by John Arbash Meinel
Switch away from += for older versions of pyrex,
73
        self.assertEqual(16384, eq._py_compute_recommended_hash_size(4000))
0.18.16 by John Arbash Meinel
Test the recommended versus minimum hash table sizes.
74
0.18.32 by John Arbash Meinel
Switch away from += for older versions of pyrex,
75
        # And we recommend at least 75% free slots
76
        self.assertEqual(16384, eq._py_compute_recommended_hash_size(4096))
77
        self.assertEqual(32768, eq._py_compute_recommended_hash_size(4097))
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
78
79
    def test__raw_lines(self):
80
        eq = self._gc_module.EquivalenceTable([1, 2, 3])
0.18.18 by John Arbash Meinel
Start actually storing matches in the hash table, and testing the result.
81
        self.assertEqual([(1, 1, 1, -1), (2, 2, 2, -1), (3, 3, 3, -1)],
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
82
                         eq._inspect_left_lines())
83
84
    def test_build_hash(self):
85
        eq = self._gc_module.EquivalenceTable([1, 2, 3])
86
        # (size, [(offset, head_offset_in_lines, count)])
87
        self.assertEqual((8192, [(1, 0, 1), (2, 1, 1), (3, 2, 1)]),
88
                         eq._inspect_hash_table())
0.18.18 by John Arbash Meinel
Start actually storing matches in the hash table, and testing the result.
89
90
    def test_build_hash_with_duplicates(self):
91
        eq = self._gc_module.EquivalenceTable([1, 2, 4, 0, 1, 4, 2, 4])
92
        self.assertEqual([
93
            (1, 1, 1, 4),
94
            (2, 2, 2, 6),
95
            (4, 4, 4, 5),
96
            (0, 0, 0, -1),
97
            (1, 1, 1, -1),
98
            (4, 4, 4, 7),
99
            (2, 2, 2, -1),
100
            (4, 4, 4, -1),
101
            ], eq._inspect_left_lines())
102
        # (hash_offset, head_offset_in_lines, count)
103
        self.assertEqual((8192, [
104
            (0, 3, 1),
105
            (1, 0, 2),
106
            (2, 1, 2),
107
            (4, 2, 3),
108
            ]), eq._inspect_hash_table())
0.18.19 by John Arbash Meinel
Do some more testing about what happens when you get hash collisions, etc.
109
110
    def test_build_hash_table_with_wraparound(self):
111
        eq = self._gc_module.EquivalenceTable([1, 2+8192])
112
        self.assertEqual([
113
            (1, 1, 1, -1),
114
            (8194, 8194, 2, -1),
115
            ], eq._inspect_left_lines())
116
        self.assertEqual((8192, [
117
            (1, 0, 1),
118
            (2, 1, 1),
119
            ]), eq._inspect_hash_table())
120
121
    def test_build_hash_table_with_collisions(self):
122
        # We build up backwards, so # 2+8192 will wrap around to 2, and take
123
        # its spot because the 2 offset is taken, then the real '2' will get
124
        # bumped to 3, which will bump 3 into 4.  then when we have 5, it will
125
        # be fine, but the 8192+5 will get bumped to 6
126
        eq = self._gc_module.EquivalenceTable([1, 5+8192, 5, 3, 2, 2+8192])
127
        self.assertEqual([
128
            (1, 1, 1, -1),
129
            (8197, 8197, 6, -1),
130
            (5, 5, 5, -1),
131
            (3, 3, 4, -1),
132
            (2, 2, 3, -1),
133
            (8194, 8194, 2, -1),
134
            ], eq._inspect_left_lines())
135
        self.assertEqual((8192, [
136
            (1, 0, 1),
137
            (2, 5, 1),
138
            (3, 4, 1),
139
            (4, 3, 1),
140
            (5, 2, 1),
141
            (6, 1, 1),
142
            ]), eq._inspect_hash_table())
0.18.20 by John Arbash Meinel
Test the results with real strings rather than just integers
143
0.18.21 by John Arbash Meinel
Add support for not including certain lines in the hashtable.
144
    def test_build_hash_table_with_wrapped_collisions(self):
145
        eq = self._gc_module.EquivalenceTable([0, 8191+8192, 8191])
146
        self.assertEqual([
147
            (0, 0, 1, -1),
148
            (16383, 16383, 0, -1),
149
            (8191, 8191, 8191, -1),
150
            ], eq._inspect_left_lines())
151
        self.assertEqual((8192, [
152
            (0, 1, 1),
153
            (1, 0, 1),
154
            (8191, 2, 1),
155
            ]), eq._inspect_hash_table())
156
0.18.20 by John Arbash Meinel
Test the results with real strings rather than just integers
157
    def test_build_hash_table_with_strings(self):
158
        eq = self._gc_module.EquivalenceTable(['a', 'b', 'c', 'b'])
159
        self.assertEqual([
160
            ('a', hash('a'), hash('a') & 8191, -1),
161
            ('b', hash('b'), hash('b') & 8191, 3),
162
            ('c', hash('c'), hash('c') & 8191, -1),
163
            ('b', hash('b'), hash('b') & 8191, -1),
164
            ], eq._inspect_left_lines())
165
        self.assertEqual((8192, sorted([
166
            (hash('a') & 8191, 0, 1),
167
            (hash('b') & 8191, 1, 2),
168
            (hash('c') & 8191, 2, 1),
169
            ])), eq._inspect_hash_table())
0.18.21 by John Arbash Meinel
Add support for not including certain lines in the hashtable.
170
171
    def test_with_unhashable(self):
172
        self.assertRaises(TypeError, self._gc_module.EquivalenceTable,
173
                [['unhashable', 'list'], 'foo'])
174
        self.assertRaises(TypeError, self._gc_module.EquivalenceTable,
175
                ('foo', ['unhashable', 'list']))
176
177
    def test_extend_lines(self):
178
        eq = self._gc_module.EquivalenceTable([10, 20, 30, 20])
179
        eq.extend_lines([30, 20, 40], [True, True, True])
180
        self.assertEqual([
181
            (10, 10, 10, -1),
182
            (20, 20, 20, 3),
183
            (30, 30, 30, 4),
184
            (20, 20, 20, 5),
185
            (30, 30, 30, -1),
186
            (20, 20, 20, -1),
187
            (40, 40, 40, -1),
188
            ], eq._inspect_left_lines())
189
        # (hash_offset, head_offset_in_lines, count)
190
        self.assertEqual((8192, [
191
            (10, 0, 1),
192
            (20, 1, 3),
193
            (30, 2, 2),
194
            (40, 6, 1),
195
            ]), eq._inspect_hash_table())
196
197
    def test_extend_lines_ignored(self):
198
        eq = self._gc_module.EquivalenceTable([10, 20, 30, 20])
199
        eq.extend_lines([30, 20, 40], [True, False, False])
200
        self.assertEqual([
201
            (10, 10, 10, -1),
202
            (20, 20, 20, 3),
203
            (30, 30, 30, 4),
204
            (20, 20, 20, -1),
205
            (30, 30, 30, -1),
206
            (20, 20, -1, -1),
207
            (40, 40, -1, -1),
208
            ], eq._inspect_left_lines())
209
        # (hash_offset, head_offset_in_lines, count)
210
        self.assertEqual((8192, [
211
            (10, 0, 1),
212
            (20, 1, 2),
213
            (30, 2, 2),
214
            ]), eq._inspect_hash_table())
0.18.26 by John Arbash Meinel
Start with a copy implementation of the _get_longest_match function.
215
216
217
class TestGetLongestMatch(tests.TestCase):
218
219
    def setUp(self):
220
        super(TestGetLongestMatch, self).setUp()
221
        self._longest_match = groupcompress._get_longest_match
222
        self._eq_class = groupcompress.GroupCompressor._equivalence_table_class
223
224
    def assertLongestMatch(self, block, end_pos, matching_locs,
225
                           eq, start_pos, max_len, known_locs):
226
        """Check that _get_longest_match gives correct results."""
0.18.34 by John Arbash Meinel
Doing the intersection as we go allows us to short-cut a bit more.
227
        the_block, the_end_pos, next_locs = self._longest_match(eq,
228
                                                start_pos, max_len, known_locs)
229
        self.assertEqual(block, the_block)
230
        self.assertEqual(end_pos, the_end_pos)
231
        if next_locs is not None:
232
            self.assertEqual(matching_locs, next_locs)
0.18.26 by John Arbash Meinel
Start with a copy implementation of the _get_longest_match function.
233
234
    def test_all_match(self):
235
        eq = self._eq_class(['a', 'b', 'c'])
236
        eq.set_right_lines(['a', 'b', 'c'])
237
        self.assertLongestMatch((0, 0, 3), 3, None,
238
                                eq, 0, 3, None)
239
        self.assertLongestMatch((0, 0, 3), 3, None,
240
                                 eq, 0, 3, [0])
241
        self.assertLongestMatch((1, 1, 2), 3, None,
242
                                eq, 1, 3, None)
243
        self.assertLongestMatch((1, 1, 2), 3, None,
244
                                eq, 1, 3, [1])
245
246
    def test_no_match(self):
247
        eq = self._eq_class(['a', 'b', 'c'])
248
        eq.set_right_lines(['d', 'e', 'f'])
249
        self.assertLongestMatch(None, 1, None,
250
                                eq, 0, 3, None)
251
        self.assertLongestMatch(None, 2, None,
252
                                eq, 1, 3, None)
253
        self.assertLongestMatch(None, 3, None,
254
                                eq, 2, 3, None)
255
256
    def test_next_match(self):
257
        eq = self._eq_class(['a', 'b', 'c'])
258
        eq.set_right_lines(['a', 'c'])
259
        self.assertLongestMatch((0, 0, 1), 1, [2],
260
                                eq, 0, 2, None)
261
        self.assertLongestMatch((2, 1, 1), 2, None,
262
                                eq, 1, 2, None)
263
0.18.30 by John Arbash Meinel
Restore a test
264
    def test_lots_of_matches(self):
265
        eq = self._eq_class(['a']*1000)
266
        eq.set_right_lines(['a']*1000)
267
        self.assertLongestMatch((0, 0, 1000), 1000, None,
268
                                eq, 0, 1000, None)
269
        eq = self._eq_class(['a']*1000)
270
        eq.set_right_lines(['a']*2000)
271
        self.assertLongestMatch((0, 0, 1000), 1000, range(1000),
272
                                eq, 0, 2000, None)
273
        eq = self._eq_class(['a']*2000)
274
        eq.set_right_lines(['a']*1000)
275
        self.assertLongestMatch((0, 0, 1000), 1000, None,
276
                                eq, 0, 1000, None)
277
0.18.26 by John Arbash Meinel
Start with a copy implementation of the _get_longest_match function.
278
279
class TestCompiledGetLongestMatch(TestGetLongestMatch):
280
281
    _tests_need_features = [CompiledGroupCompress]
282
283
    def setUp(self):
284
        super(TestGetLongestMatch, self).setUp()
285
        from bzrlib.plugins.groupcompress import _groupcompress_c
286
        self._longest_match = _groupcompress_c._get_longest_match
287
        self._eq_class = _groupcompress_c.EquivalenceTable
288