/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
1
# groupcompress, a bzr plugin providing new compression logic.
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Tests for group compression."""
19
20
import zlib
21
22
from bzrlib import tests
0.23.58 by John Arbash Meinel
fix up the failing tests.
23
from bzrlib.osutils import sha_string
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
24
from bzrlib.plugins.groupcompress import errors, groupcompress
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
25
from bzrlib.tests import (
26
    TestCaseWithTransport,
0.17.42 by Robert Collins
Handle bzr.dev changes.
27
    multiply_tests,
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
28
    )
29
from bzrlib.transport import get_transport
30
31
32
def load_tests(standard_tests, module, loader):
33
    from bzrlib.tests.test_versionedfile import TestVersionedFiles
34
    vf_interface_tests = loader.loadTestsFromTestCase(TestVersionedFiles)
35
    cleanup_pack_group = groupcompress.cleanup_pack_group
36
    make_pack_factory = groupcompress.make_pack_factory
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
37
    group_scenario = ('groupcompressrabin-nograph', {
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
38
            'cleanup':cleanup_pack_group,
39
            'factory':make_pack_factory(False, False, 1),
40
            'graph': False,
41
            'key_length':1,
0.20.21 by John Arbash Meinel
Merge the chk sorting code.
42
            'support_partial_insertion':False,
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
43
            }
44
        )
0.17.42 by Robert Collins
Handle bzr.dev changes.
45
    scenarios = [group_scenario]
46
    return multiply_tests(vf_interface_tests, scenarios, standard_tests)
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
47
48
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
49
class TestGroupCompressor(tests.TestCase):
0.17.2 by Robert Collins
Core proof of concept working.
50
    """Tests for GroupCompressor"""
51
52
    def test_empty_delta(self):
53
        compressor = groupcompress.GroupCompressor(True)
54
        self.assertEqual([], compressor.lines)
55
56
    def test_one_nosha_delta(self):
57
        # diff against NUKK
58
        compressor = groupcompress.GroupCompressor(True)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
59
        sha1, end_point, _, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
60
            'strange\ncommon\n', None)
61
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
0.17.2 by Robert Collins
Core proof of concept working.
62
        expected_lines = [
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
63
            'f', '\x0f', 'strange\ncommon\n',
0.17.2 by Robert Collins
Core proof of concept working.
64
            ]
65
        self.assertEqual(expected_lines, compressor.lines)
66
        self.assertEqual(sum(map(len, expected_lines)), end_point)
67
0.23.58 by John Arbash Meinel
fix up the failing tests.
68
    def _chunks_to_repr_lines(self, chunks):
69
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
70
71
    def assertEqualDiffEncoded(self, expected, actual):
72
        """Compare the actual content to the expected content.
73
74
        :param expected: A group of chunks that we expect to see
75
        :param actual: The measured 'chunks'
76
77
        We will transform the chunks back into lines, and then run 'repr()'
78
        over them to handle non-ascii characters.
79
        """
80
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
81
                             self._chunks_to_repr_lines(actual))
82
0.17.2 by Robert Collins
Core proof of concept working.
83
    def test_two_nosha_delta(self):
84
        compressor = groupcompress.GroupCompressor(True)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
85
        sha1_1, _, _, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
86
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
87
        expected_lines = list(compressor.lines)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
88
        sha1_2, end_point, _, _ = compressor.compress(('newlabel',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
89
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
90
        self.assertEqual(sha_string('common long line\n'
91
                                    'that needs a 16 byte match\n'
92
                                    'different\n'), sha1_2)
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
93
        expected_lines.extend([
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
94
            # 'delta', delta length
95
            'd\x10',
0.23.58 by John Arbash Meinel
fix up the failing tests.
96
            # source and target length
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
97
            '\x36\x36',
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
98
            # copy the line common
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
99
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
0.20.17 by John Arbash Meinel
Fix the test suite now that we don't match short lines
100
            # add the line different, and the trailing newline
0.23.58 by John Arbash Meinel
fix up the failing tests.
101
            '\x0adifferent\n', # insert 10 bytes
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
102
            ])
0.23.58 by John Arbash Meinel
fix up the failing tests.
103
        self.assertEqualDiffEncoded(expected_lines, compressor.lines)
0.17.2 by Robert Collins
Core proof of concept working.
104
        self.assertEqual(sum(map(len, expected_lines)), end_point)
105
106
    def test_three_nosha_delta(self):
107
        # The first interesting test: make a change that should use lines from
108
        # both parents.
109
        compressor = groupcompress.GroupCompressor(True)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
110
        sha1_1, end_point, _, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
111
            'strange\ncommon very very long line\nwith some extra text\n', None)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
112
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
113
            'different\nmoredifferent\nand then some more\n', None)
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
114
        expected_lines = list(compressor.lines)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
115
        sha1_3, end_point, _, _ = compressor.compress(('label3',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
116
            'new\ncommon very very long line\nwith some extra text\n'
117
            'different\nmoredifferent\nand then some more\n',
0.20.17 by John Arbash Meinel
Fix the test suite now that we don't match short lines
118
            None)
0.17.2 by Robert Collins
Core proof of concept working.
119
        self.assertEqual(
0.23.58 by John Arbash Meinel
fix up the failing tests.
120
            sha_string('new\ncommon very very long line\nwith some extra text\n'
121
                       'different\nmoredifferent\nand then some more\n'),
0.17.2 by Robert Collins
Core proof of concept working.
122
            sha1_3)
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
123
        expected_lines.extend([
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
124
            # 'delta', delta length
125
            'd\x0c',
126
            # source and target length
127
            '\x67\x5f'
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
128
            # insert new
0.23.58 by John Arbash Meinel
fix up the failing tests.
129
            '\x03new',
130
            # Copy of first parent 'common' range
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
131
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
0.23.58 by John Arbash Meinel
fix up the failing tests.
132
            # Copy of second parent 'different' range
0.17.36 by John Arbash Meinel
Adding a mini-len to the delta/fulltext bytes
133
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
134
            ])
0.23.58 by John Arbash Meinel
fix up the failing tests.
135
        self.assertEqualDiffEncoded(expected_lines, compressor.lines)
0.17.2 by Robert Collins
Core proof of concept working.
136
        self.assertEqual(sum(map(len, expected_lines)), end_point)
137
138
    def test_stats(self):
139
        compressor = groupcompress.GroupCompressor(True)
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
140
        compressor.compress(('label',), 'strange\ncommon long line\n'
141
                                        'plus more text\n', None)
0.17.2 by Robert Collins
Core proof of concept working.
142
        compressor.compress(('newlabel',),
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
143
                            'common long line\nplus more text\n'
144
                            'different\nmoredifferent\n', None)
0.17.2 by Robert Collins
Core proof of concept working.
145
        compressor.compress(('label3',),
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
146
                            'new\ncommon long line\nplus more text\n'
147
                            '\ndifferent\nmoredifferent\n', None)
148
        self.assertAlmostEqual(1.4, compressor.ratio(), 1)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
149
150
    def test_extract_from_compressor(self):
151
        # Knit fetching will try to reconstruct texts locally which results in
152
        # reading something that is in the compressor stream already.
153
        compressor = groupcompress.GroupCompressor(True)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
154
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
155
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
156
        expected_lines = list(compressor.lines)
0.25.10 by John Arbash Meinel
Play around with detecting compression breaks.
157
        sha1_2, end_point, _, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
158
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
159
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
160
        self.assertEqual(('strange\ncommon long line\n'
161
                          'that needs a 16 byte match\n', sha1_1),
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
162
            compressor.extract(('label',)))
163
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
164
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
165
                          'different\n', sha1_2),
166
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
167
168
0.25.3 by John Arbash Meinel
Add a encode/decode base128 functions.
169
class TestBase128Int(tests.TestCase):
170
171
    def assertEqualEncode(self, bytes, val):
172
        self.assertEqual(bytes, groupcompress.encode_base128_int(val))
173
174
    def assertEqualDecode(self, val, num_decode, bytes):
175
        self.assertEqual((val, num_decode),
176
                         groupcompress.decode_base128_int(bytes))
177
178
    def test_encode(self):
179
        self.assertEqualEncode('\x01', 1)
180
        self.assertEqualEncode('\x02', 2)
181
        self.assertEqualEncode('\x7f', 127)
182
        self.assertEqualEncode('\x80\x01', 128)
183
        self.assertEqualEncode('\xff\x01', 255)
184
        self.assertEqualEncode('\x80\x02', 256)
185
        self.assertEqualEncode('\xff\xff\xff\xff\x0f', 0xFFFFFFFF)
186
187
    def test_decode(self):
188
        self.assertEqualDecode(1, 1, '\x01')
189
        self.assertEqualDecode(2, 1, '\x02')
190
        self.assertEqualDecode(127, 1, '\x7f')
191
        self.assertEqualDecode(128, 2, '\x80\x01')
192
        self.assertEqualDecode(255, 2, '\xff\x01')
193
        self.assertEqualDecode(256, 2, '\x80\x02')
194
        self.assertEqualDecode(0xFFFFFFFF, 5, '\xff\xff\xff\xff\x0f')
195
196
    def test_decode_with_trailing_bytes(self):
197
        self.assertEqualDecode(1, 1, '\x01abcdef')
198
        self.assertEqualDecode(127, 1, '\x7f\x01')
199
        self.assertEqualDecode(128, 2, '\x80\x01abcdef')
200
        self.assertEqualDecode(255, 2, '\xff\x01\xff')
201
202
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
203
class TestGroupCompressBlock(tests.TestCase):
204
205
    def test_from_empty_bytes(self):
206
        self.assertRaises(errors.InvalidGroupCompressBlock,
207
                          groupcompress.GroupCompressBlock.from_bytes, '')
208
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
209
    def test_from_minimal_bytes(self):
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
210
        block = groupcompress.GroupCompressBlock.from_bytes('gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
211
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
212
        self.assertEqual({}, block._entries)
213
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
214
    def test_from_bytes(self):
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
215
        z_header_bytes = (
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
216
            'gcb1z\n' # group compress block v1 plain
217
            '76\n' # Length of zlib bytes
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
218
            '183\n' # Length of all meta-info
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
219
            + zlib.compress(
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
220
            'key:bing\n'
221
            'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
222
            'type:fulltext\n'
223
            'start:100\n'
224
            'length:100\n'
225
            '\n'
226
            'key:foo\x00bar\n'
227
            'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
228
            'type:fulltext\n'
229
            'start:0\n'
230
            'length:100\n'
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
231
            '\n'))
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
232
        block = groupcompress.GroupCompressBlock.from_bytes(
233
            z_header_bytes)
234
        self.assertIs(None, block._content)
235
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
236
        self.assertEqual([('bing',), ('foo', 'bar')], sorted(block._entries))
237
        bing = block._entries[('bing',)]
238
        self.assertEqual(('bing',), bing.key)
239
        self.assertEqual('fulltext', bing.type)
240
        self.assertEqual('abcd'*10, bing.sha1)
241
        self.assertEqual(100, bing.start)
242
        self.assertEqual(100, bing.length)
243
        foobar = block._entries[('foo', 'bar')]
244
        self.assertEqual(('foo', 'bar'), foobar.key)
245
        self.assertEqual('fulltext', foobar.type)
246
        self.assertEqual('abcd'*10, foobar.sha1)
247
        self.assertEqual(0, foobar.start)
248
        self.assertEqual(100, foobar.length)
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
249
250
    def test_add_entry(self):
251
        gcb = groupcompress.GroupCompressBlock()
252
        e = gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
253
        self.assertIsInstance(e, groupcompress.GroupCompressBlockEntry)
254
        self.assertEqual(('foo', 'bar'), e.key)
255
        self.assertEqual('fulltext', e.type)
256
        self.assertEqual('abcd'*10, e.sha1)
257
        self.assertEqual(0, e.start)
258
        self.assertEqual(100, e.length)
259
260
    def test_to_bytes(self):
261
        gcb = groupcompress.GroupCompressBlock()
262
        gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
263
        gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
264
        bytes = gcb.to_bytes()
265
        self.assertStartsWith(bytes,
266
                              'gcb1z\n' # group compress block v1 zlib
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
267
                              '77\n' # Length of compressed bytes
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
268
                              '183\n' # Length of all meta-info
269
                             )
270
        remaining_bytes = bytes[13:]
271
        raw_bytes = zlib.decompress(remaining_bytes)
272
        self.assertEqualDiff('key:bing\n'
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
273
                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
274
                             'type:fulltext\n'
275
                             'start:100\n'
276
                             'length:100\n'
277
                             '\n'
278
                             'key:foo\x00bar\n'
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
279
                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
280
                             'type:fulltext\n'
281
                             'start:0\n'
282
                             'length:100\n'
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
283
                             '\n', raw_bytes)