/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
1
# groupcompress, a bzr plugin providing new compression logic.
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Tests for group compression."""
19
20
import zlib
21
22
from bzrlib import tests
0.23.58 by John Arbash Meinel
fix up the failing tests.
23
from bzrlib.osutils import sha_string
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
24
from bzrlib.plugins.groupcompress import errors, groupcompress
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
25
from bzrlib.tests import (
26
    TestCaseWithTransport,
27
    TestScenarioApplier,
28
    adapt_tests,
29
    )
30
from bzrlib.transport import get_transport
31
32
33
def load_tests(standard_tests, module, loader):
34
    from bzrlib.tests.test_versionedfile import TestVersionedFiles
35
    vf_interface_tests = loader.loadTestsFromTestCase(TestVersionedFiles)
36
    cleanup_pack_group = groupcompress.cleanup_pack_group
37
    make_pack_factory = groupcompress.make_pack_factory
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
38
    group_scenario = ('groupcompressrabin-nograph', {
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
39
            'cleanup':cleanup_pack_group,
40
            'factory':make_pack_factory(False, False, 1),
41
            'graph': False,
42
            'key_length':1,
0.20.21 by John Arbash Meinel
Merge the chk sorting code.
43
            'support_partial_insertion':False,
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
44
            }
45
        )
46
    applier = TestScenarioApplier()
47
    applier.scenarios = [group_scenario]
48
    adapt_tests(vf_interface_tests, applier, standard_tests)
49
    return standard_tests
50
51
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
52
class TestGroupCompressor(tests.TestCase):
0.17.2 by Robert Collins
Core proof of concept working.
53
    """Tests for GroupCompressor"""
54
55
    def test_empty_delta(self):
56
        compressor = groupcompress.GroupCompressor(True)
57
        self.assertEqual([], compressor.lines)
58
59
    def test_one_nosha_delta(self):
60
        # diff against NUKK
61
        compressor = groupcompress.GroupCompressor(True)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
62
        sha1, end_point, _, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
63
            'strange\ncommon\n', None)
64
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
0.17.2 by Robert Collins
Core proof of concept working.
65
        expected_lines = [
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
66
            'f', '\x0f', 'strange\ncommon\n',
0.17.2 by Robert Collins
Core proof of concept working.
67
            ]
68
        self.assertEqual(expected_lines, compressor.lines)
69
        self.assertEqual(sum(map(len, expected_lines)), end_point)
70
0.23.58 by John Arbash Meinel
fix up the failing tests.
71
    def _chunks_to_repr_lines(self, chunks):
72
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
73
74
    def assertEqualDiffEncoded(self, expected, actual):
75
        """Compare the actual content to the expected content.
76
77
        :param expected: A group of chunks that we expect to see
78
        :param actual: The measured 'chunks'
79
80
        We will transform the chunks back into lines, and then run 'repr()'
81
        over them to handle non-ascii characters.
82
        """
83
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
84
                             self._chunks_to_repr_lines(actual))
85
0.17.2 by Robert Collins
Core proof of concept working.
86
    def test_two_nosha_delta(self):
87
        compressor = groupcompress.GroupCompressor(True)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
88
        sha1_1, _, _, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
89
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
90
        expected_lines = list(compressor.lines)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
91
        sha1_2, end_point, _, _ = compressor.compress(('newlabel',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
92
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
93
        self.assertEqual(sha_string('common long line\n'
94
                                    'that needs a 16 byte match\n'
95
                                    'different\n'), sha1_2)
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
96
        expected_lines.extend([
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
97
            # 'delta', delta length
98
            'd\x10',
0.23.58 by John Arbash Meinel
fix up the failing tests.
99
            # source and target length
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
100
            '\x36\x36',
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
101
            # copy the line common
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
102
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
0.20.17 by John Arbash Meinel
Fix the test suite now that we don't match short lines
103
            # add the line different, and the trailing newline
0.23.58 by John Arbash Meinel
fix up the failing tests.
104
            '\x0adifferent\n', # insert 10 bytes
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
105
            ])
0.23.58 by John Arbash Meinel
fix up the failing tests.
106
        self.assertEqualDiffEncoded(expected_lines, compressor.lines)
0.17.2 by Robert Collins
Core proof of concept working.
107
        self.assertEqual(sum(map(len, expected_lines)), end_point)
108
109
    def test_three_nosha_delta(self):
110
        # The first interesting test: make a change that should use lines from
111
        # both parents.
112
        compressor = groupcompress.GroupCompressor(True)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
113
        sha1_1, end_point, _, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
114
            'strange\ncommon very very long line\nwith some extra text\n', None)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
115
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
116
            'different\nmoredifferent\nand then some more\n', None)
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
117
        expected_lines = list(compressor.lines)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
118
        sha1_3, end_point, _, _ = compressor.compress(('label3',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
119
            'new\ncommon very very long line\nwith some extra text\n'
120
            'different\nmoredifferent\nand then some more\n',
0.20.17 by John Arbash Meinel
Fix the test suite now that we don't match short lines
121
            None)
0.17.2 by Robert Collins
Core proof of concept working.
122
        self.assertEqual(
0.23.58 by John Arbash Meinel
fix up the failing tests.
123
            sha_string('new\ncommon very very long line\nwith some extra text\n'
124
                       'different\nmoredifferent\nand then some more\n'),
0.17.2 by Robert Collins
Core proof of concept working.
125
            sha1_3)
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
126
        expected_lines.extend([
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
127
            # 'delta', delta length
128
            'd\x0c',
129
            # source and target length
130
            '\x67\x5f'
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
131
            # insert new
0.23.58 by John Arbash Meinel
fix up the failing tests.
132
            '\x03new',
133
            # Copy of first parent 'common' range
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
134
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
0.23.58 by John Arbash Meinel
fix up the failing tests.
135
            # Copy of second parent 'different' range
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
136
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
137
            ])
0.23.58 by John Arbash Meinel
fix up the failing tests.
138
        self.assertEqualDiffEncoded(expected_lines, compressor.lines)
0.17.2 by Robert Collins
Core proof of concept working.
139
        self.assertEqual(sum(map(len, expected_lines)), end_point)
140
141
    def test_stats(self):
142
        compressor = groupcompress.GroupCompressor(True)
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
143
        compressor.compress(('label',), 'strange\ncommon long line\n'
144
                                        'plus more text\n', None)
0.17.2 by Robert Collins
Core proof of concept working.
145
        compressor.compress(('newlabel',),
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
146
                            'common long line\nplus more text\n'
147
                            'different\nmoredifferent\n', None)
0.17.2 by Robert Collins
Core proof of concept working.
148
        compressor.compress(('label3',),
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
149
                            'new\ncommon long line\nplus more text\n'
150
                            '\ndifferent\nmoredifferent\n', None)
151
        self.assertAlmostEqual(1.4, compressor.ratio(), 1)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
152
153
    def test_extract_from_compressor(self):
154
        # Knit fetching will try to reconstruct texts locally which results in
155
        # reading something that is in the compressor stream already.
156
        compressor = groupcompress.GroupCompressor(True)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
157
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
158
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
159
        expected_lines = list(compressor.lines)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
160
        sha1_2, end_point, _, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
161
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
162
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
163
        self.assertEqual(('strange\ncommon long line\n'
164
                          'that needs a 16 byte match\n', sha1_1),
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
165
            compressor.extract(('label',)))
166
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
167
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
168
                          'different\n', sha1_2),
169
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
170
171
0.25.3 by John Arbash Meinel
Add a encode/decode base128 functions.
172
class TestBase128Int(tests.TestCase):
173
174
    def assertEqualEncode(self, bytes, val):
175
        self.assertEqual(bytes, groupcompress.encode_base128_int(val))
176
177
    def assertEqualDecode(self, val, num_decode, bytes):
178
        self.assertEqual((val, num_decode),
179
                         groupcompress.decode_base128_int(bytes))
180
181
    def test_encode(self):
182
        self.assertEqualEncode('\x01', 1)
183
        self.assertEqualEncode('\x02', 2)
184
        self.assertEqualEncode('\x7f', 127)
185
        self.assertEqualEncode('\x80\x01', 128)
186
        self.assertEqualEncode('\xff\x01', 255)
187
        self.assertEqualEncode('\x80\x02', 256)
188
        self.assertEqualEncode('\xff\xff\xff\xff\x0f', 0xFFFFFFFF)
189
190
    def test_decode(self):
191
        self.assertEqualDecode(1, 1, '\x01')
192
        self.assertEqualDecode(2, 1, '\x02')
193
        self.assertEqualDecode(127, 1, '\x7f')
194
        self.assertEqualDecode(128, 2, '\x80\x01')
195
        self.assertEqualDecode(255, 2, '\xff\x01')
196
        self.assertEqualDecode(256, 2, '\x80\x02')
197
        self.assertEqualDecode(0xFFFFFFFF, 5, '\xff\xff\xff\xff\x0f')
198
199
    def test_decode_with_trailing_bytes(self):
200
        self.assertEqualDecode(1, 1, '\x01abcdef')
201
        self.assertEqualDecode(127, 1, '\x7f\x01')
202
        self.assertEqualDecode(128, 2, '\x80\x01abcdef')
203
        self.assertEqualDecode(255, 2, '\xff\x01\xff')
204
205
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
206
class TestGroupCompressBlock(tests.TestCase):
207
208
    def test_from_empty_bytes(self):
209
        self.assertRaises(errors.InvalidGroupCompressBlock,
210
                          groupcompress.GroupCompressBlock.from_bytes, '')
211
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
212
    def test_from_minimal_bytes(self):
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
213
        block = groupcompress.GroupCompressBlock.from_bytes('gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
214
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
215
        self.assertEqual({}, block._entries)
216
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
217
    def test_from_bytes(self):
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
218
        z_header_bytes = (
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
219
            'gcb1z\n' # group compress block v1 plain
220
            '76\n' # Length of zlib bytes
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
221
            '183\n' # Length of all meta-info
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
222
            + zlib.compress(
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
223
            'key:bing\n'
224
            'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
225
            'type:fulltext\n'
226
            'start:100\n'
227
            'length:100\n'
228
            '\n'
229
            'key:foo\x00bar\n'
230
            'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
231
            'type:fulltext\n'
232
            'start:0\n'
233
            'length:100\n'
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
234
            '\n'))
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
235
        block = groupcompress.GroupCompressBlock.from_bytes(
236
            z_header_bytes)
237
        self.assertIs(None, block._content)
238
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
239
        self.assertEqual([('bing',), ('foo', 'bar')], sorted(block._entries))
240
        bing = block._entries[('bing',)]
241
        self.assertEqual(('bing',), bing.key)
242
        self.assertEqual('fulltext', bing.type)
243
        self.assertEqual('abcd'*10, bing.sha1)
244
        self.assertEqual(100, bing.start)
245
        self.assertEqual(100, bing.length)
246
        foobar = block._entries[('foo', 'bar')]
247
        self.assertEqual(('foo', 'bar'), foobar.key)
248
        self.assertEqual('fulltext', foobar.type)
249
        self.assertEqual('abcd'*10, foobar.sha1)
250
        self.assertEqual(0, foobar.start)
251
        self.assertEqual(100, foobar.length)
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
252
253
    def test_add_entry(self):
254
        gcb = groupcompress.GroupCompressBlock()
255
        e = gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
256
        self.assertIsInstance(e, groupcompress.GroupCompressBlockEntry)
257
        self.assertEqual(('foo', 'bar'), e.key)
258
        self.assertEqual('fulltext', e.type)
259
        self.assertEqual('abcd'*10, e.sha1)
260
        self.assertEqual(0, e.start)
261
        self.assertEqual(100, e.length)
262
263
    def test_to_bytes(self):
264
        gcb = groupcompress.GroupCompressBlock()
265
        gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
266
        gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
267
        bytes = gcb.to_bytes()
268
        self.assertStartsWith(bytes,
269
                              'gcb1z\n' # group compress block v1 zlib
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
270
                              '77\n' # Length of compressed bytes
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
271
                              '183\n' # Length of all meta-info
272
                             )
273
        remaining_bytes = bytes[13:]
274
        raw_bytes = zlib.decompress(remaining_bytes)
275
        self.assertEqualDiff('key:bing\n'
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
276
                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
277
                             'type:fulltext\n'
278
                             'start:100\n'
279
                             'length:100\n'
280
                             '\n'
281
                             'key:foo\x00bar\n'
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
282
                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
283
                             'type:fulltext\n'
284
                             'start:0\n'
285
                             'length:100\n'
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
286
                             '\n', raw_bytes)