/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
1
# Copyright (C) 2008, 2009 Canonical Ltd
2
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
16
17
"""Tests for group compression."""
18
19
import zlib
20
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
21
from bzrlib import (
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
22
    btree_index,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
23
    groupcompress,
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
24
    errors,
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
25
    index as _mod_index,
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
26
    osutils,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
27
    tests,
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
28
    versionedfile,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
29
    )
0.23.58 by John Arbash Meinel
fix up the failing tests.
30
from bzrlib.osutils import sha_string
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
31
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
32
33
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
34
def load_tests(standard_tests, module, loader):
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
35
    """Parameterize tests for all versions of groupcompress."""
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
36
    to_adapt, result = tests.split_suite_by_condition(
37
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
38
    scenarios = [
39
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
40
        ]
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
41
    if CompiledGroupCompressFeature.available():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
42
        scenarios.append(('C',
43
            {'compressor': groupcompress.PyrexGroupCompressor}))
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
44
    return tests.multiply_tests(to_adapt, scenarios, result)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
45
46
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
47
class TestGroupCompressor(tests.TestCase):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
48
49
    def _chunks_to_repr_lines(self, chunks):
50
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
51
52
    def assertEqualDiffEncoded(self, expected, actual):
53
        """Compare the actual content to the expected content.
54
55
        :param expected: A group of chunks that we expect to see
56
        :param actual: The measured 'chunks'
57
58
        We will transform the chunks back into lines, and then run 'repr()'
59
        over them to handle non-ascii characters.
60
        """
61
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
62
                             self._chunks_to_repr_lines(actual))
63
64
65
class TestAllGroupCompressors(TestGroupCompressor):
0.17.2 by Robert Collins
Core proof of concept working.
66
    """Tests for GroupCompressor"""
67
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
68
    compressor = None # Set by multiply_tests
69
0.17.2 by Robert Collins
Core proof of concept working.
70
    def test_empty_delta(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
71
        compressor = self.compressor()
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
72
        self.assertEqual([], compressor.chunks)
0.17.2 by Robert Collins
Core proof of concept working.
73
74
    def test_one_nosha_delta(self):
75
        # diff against NUKK
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
76
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
77
        sha1, start_point, end_point, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
78
            'strange\ncommon\n', None)
79
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
80
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
81
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
82
        self.assertEqual(0, start_point)
0.17.2 by Robert Collins
Core proof of concept working.
83
        self.assertEqual(sum(map(len, expected_lines)), end_point)
84
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
85
    def test_empty_content(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
86
        compressor = self.compressor()
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
87
        # Adding empty bytes should return the 'null' record
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
88
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
89
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
90
        self.assertEqual(0, start_point)
91
        self.assertEqual(0, end_point)
92
        self.assertEqual('fulltext', kind)
93
        self.assertEqual(groupcompress._null_sha1, sha1)
94
        self.assertEqual(0, compressor.endpoint)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
95
        self.assertEqual([], compressor.chunks)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
96
        # Even after adding some content
97
        compressor.compress(('content',), 'some\nbytes\n', None)
98
        self.assertTrue(compressor.endpoint > 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
99
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
100
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
101
        self.assertEqual(0, start_point)
102
        self.assertEqual(0, end_point)
103
        self.assertEqual('fulltext', kind)
104
        self.assertEqual(groupcompress._null_sha1, sha1)
105
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
106
    def test_extract_from_compressor(self):
107
        # Knit fetching will try to reconstruct texts locally which results in
108
        # reading something that is in the compressor stream already.
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
109
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
110
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
111
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
112
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
113
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
114
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
115
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
116
        self.assertEqual(('strange\ncommon long line\n'
117
                          'that needs a 16 byte match\n', sha1_1),
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
118
                         compressor.extract(('label',)))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
119
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
120
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
121
                          'different\n', sha1_2),
122
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
123
4241.17.2 by John Arbash Meinel
PythonGroupCompressor needs to support pop_last() properly.
124
    def test_pop_last(self):
125
        compressor = self.compressor()
126
        _, _, _, _ = compressor.compress(('key1',),
127
            'some text\nfor the first entry\n', None)
128
        expected_lines = list(compressor.chunks)
129
        _, _, _, _ = compressor.compress(('key2',),
130
            'some text\nfor the second entry\n', None)
131
        compressor.pop_last()
132
        self.assertEqual(expected_lines, compressor.chunks)
133
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
134
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
135
class TestPyrexGroupCompressor(TestGroupCompressor):
136
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
137
    _test_needs_features = [CompiledGroupCompressFeature]
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
138
    compressor = groupcompress.PyrexGroupCompressor
139
140
    def test_stats(self):
141
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
142
        compressor.compress(('label',),
143
                            'strange\n'
144
                            'common very very long line\n'
145
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
146
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
147
                            'common very very long line\n'
148
                            'plus more text\n'
149
                            'different\n'
150
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
151
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
152
                            'new\n'
153
                            'common very very long line\n'
154
                            'plus more text\n'
155
                            'different\n'
156
                            'moredifferent\n', None)
157
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
158
159
    def test_two_nosha_delta(self):
160
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
161
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
162
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
163
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
164
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
165
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
166
        self.assertEqual(sha_string('common long line\n'
167
                                    'that needs a 16 byte match\n'
168
                                    'different\n'), sha1_2)
169
        expected_lines.extend([
170
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
171
            'd\x0f',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
172
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
173
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
174
            # copy the line common
175
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
176
            # add the line different, and the trailing newline
177
            '\x0adifferent\n', # insert 10 bytes
178
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
179
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
180
        self.assertEqual(sum(map(len, expected_lines)), end_point)
181
182
    def test_three_nosha_delta(self):
183
        # The first interesting test: make a change that should use lines from
184
        # both parents.
185
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
186
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
187
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
188
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
189
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
190
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
191
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
192
            'new\ncommon very very long line\nwith some extra text\n'
193
            'different\nmoredifferent\nand then some more\n',
194
            None)
195
        self.assertEqual(
196
            sha_string('new\ncommon very very long line\nwith some extra text\n'
197
                       'different\nmoredifferent\nand then some more\n'),
198
            sha1_3)
199
        expected_lines.extend([
200
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
201
            'd\x0b',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
202
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
203
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
204
            # insert new
205
            '\x03new',
206
            # Copy of first parent 'common' range
207
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
208
            # Copy of second parent 'different' range
209
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
210
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
211
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
212
        self.assertEqual(sum(map(len, expected_lines)), end_point)
213
214
215
class TestPythonGroupCompressor(TestGroupCompressor):
216
217
    compressor = groupcompress.PythonGroupCompressor
218
219
    def test_stats(self):
220
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
221
        compressor.compress(('label',),
222
                            'strange\n'
223
                            'common very very long line\n'
224
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
225
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
226
                            'common very very long line\n'
227
                            'plus more text\n'
228
                            'different\n'
229
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
230
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
231
                            'new\n'
232
                            'common very very long line\n'
233
                            'plus more text\n'
234
                            'different\n'
235
                            'moredifferent\n', None)
236
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
237
238
    def test_two_nosha_delta(self):
239
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
240
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
241
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
242
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
243
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
244
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
245
        self.assertEqual(sha_string('common long line\n'
246
                                    'that needs a 16 byte match\n'
247
                                    'different\n'), sha1_2)
248
        expected_lines.extend([
249
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
250
            'd\x0f',
251
            # target length
252
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
253
            # copy the line common
254
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
255
            # add the line different, and the trailing newline
256
            '\x0adifferent\n', # insert 10 bytes
257
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
258
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
259
        self.assertEqual(sum(map(len, expected_lines)), end_point)
260
261
    def test_three_nosha_delta(self):
262
        # The first interesting test: make a change that should use lines from
263
        # both parents.
264
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
265
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
266
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
267
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
268
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
269
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
270
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
271
            'new\ncommon very very long line\nwith some extra text\n'
272
            'different\nmoredifferent\nand then some more\n',
273
            None)
274
        self.assertEqual(
275
            sha_string('new\ncommon very very long line\nwith some extra text\n'
276
                       'different\nmoredifferent\nand then some more\n'),
277
            sha1_3)
278
        expected_lines.extend([
279
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
280
            'd\x0c',
281
            # target length
282
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
283
            # insert new
284
            '\x04new\n',
285
            # Copy of first parent 'common' range
286
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
287
            # Copy of second parent 'different' range
288
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
289
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
290
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
291
        self.assertEqual(sum(map(len, expected_lines)), end_point)
292
293
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
294
class TestGroupCompressBlock(tests.TestCase):
295
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
296
    def make_block(self, key_to_text):
297
        """Create a GroupCompressBlock, filling it with the given texts."""
298
        compressor = groupcompress.GroupCompressor()
299
        start = 0
300
        for key in sorted(key_to_text):
301
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
302
        locs = dict((key, (start, end)) for key, (start, _, end, _)
303
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
304
        block = compressor.flush()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
305
        raw_bytes = block.to_bytes()
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
306
        # Go through from_bytes(to_bytes()) so that we start with a compressed
307
        # content object
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
308
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
309
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
310
    def test_from_empty_bytes(self):
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
311
        self.assertRaises(ValueError,
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
312
                          groupcompress.GroupCompressBlock.from_bytes, '')
313
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
314
    def test_from_minimal_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
315
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.38.4 by John Arbash Meinel
Another disk format change.
316
            'gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
317
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
318
        self.assertIs(None, block._content)
319
        self.assertEqual('', block._z_content)
320
        block._ensure_content()
3735.32.5 by John Arbash Meinel
Change the parsing code to start out just holding the compressed bytes.
321
        self.assertEqual('', block._content)
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
322
        self.assertEqual('', block._z_content)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
323
        block._ensure_content() # Ensure content is safe to call 2x
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
324
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
325
    def test_from_invalid(self):
326
        self.assertRaises(ValueError,
327
                          groupcompress.GroupCompressBlock.from_bytes,
328
                          'this is not a valid header')
329
3735.38.4 by John Arbash Meinel
Another disk format change.
330
    def test_from_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
331
        content = ('a tiny bit of content\n')
332
        z_content = zlib.compress(content)
333
        z_bytes = (
334
            'gcb1z\n' # group compress block v1 plain
335
            '%d\n' # Length of compressed content
336
            '%d\n' # Length of uncompressed content
337
            '%s'   # Compressed content
3735.38.4 by John Arbash Meinel
Another disk format change.
338
            ) % (len(z_content), len(content), z_content)
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
339
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
340
            z_bytes)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
341
        self.assertEqual(z_content, block._z_content)
342
        self.assertIs(None, block._content)
3735.38.4 by John Arbash Meinel
Another disk format change.
343
        self.assertEqual(len(z_content), block._z_content_length)
344
        self.assertEqual(len(content), block._content_length)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
345
        block._ensure_content()
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
346
        self.assertEqual(z_content, block._z_content)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
347
        self.assertEqual(content, block._content)
348
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
349
    def test_to_bytes(self):
3735.38.4 by John Arbash Meinel
Another disk format change.
350
        content = ('this is some content\n'
351
                   'this content will be compressed\n')
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
352
        gcb = groupcompress.GroupCompressBlock()
3735.38.4 by John Arbash Meinel
Another disk format change.
353
        gcb.set_content(content)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
354
        bytes = gcb.to_bytes()
3735.38.4 by John Arbash Meinel
Another disk format change.
355
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
356
        self.assertEqual(gcb._content_length, len(content))
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
357
        expected_header =('gcb1z\n' # group compress block v1 zlib
3735.38.4 by John Arbash Meinel
Another disk format change.
358
                          '%d\n' # Length of compressed content
359
                          '%d\n' # Length of uncompressed content
360
                         ) % (gcb._z_content_length, gcb._content_length)
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
361
        self.assertStartsWith(bytes, expected_header)
362
        remaining_bytes = bytes[len(expected_header):]
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
363
        raw_bytes = zlib.decompress(remaining_bytes)
3735.38.4 by John Arbash Meinel
Another disk format change.
364
        self.assertEqual(content, raw_bytes)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
365
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
366
        # we should get the same results if using the chunked version
367
        gcb = groupcompress.GroupCompressBlock()
368
        gcb.set_chunked_content(['this is some content\n'
4469.1.2 by John Arbash Meinel
The only caller already knows the content length, so make the api such that
369
                                 'this content will be compressed\n'],
370
                                 len(content))
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
371
        old_bytes = bytes
372
        bytes = gcb.to_bytes()
373
        self.assertEqual(old_bytes, bytes)
374
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
375
    def test_partial_decomp(self):
376
        content_chunks = []
377
        # We need a sufficient amount of data so that zlib.decompress has
378
        # partial decompression to work with. Most auto-generated data
379
        # compresses a bit too well, we want a combination, so we combine a sha
380
        # hash with compressible data.
381
        for i in xrange(2048):
382
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
383
            content_chunks.append(next_content)
384
            next_sha1 = osutils.sha_string(next_content)
385
            content_chunks.append(next_sha1 + '\n')
386
        content = ''.join(content_chunks)
387
        self.assertEqual(158634, len(content))
388
        z_content = zlib.compress(content)
389
        self.assertEqual(57182, len(z_content))
390
        block = groupcompress.GroupCompressBlock()
391
        block._z_content = z_content
392
        block._z_content_length = len(z_content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
393
        block._compressor_name = 'zlib'
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
394
        block._content_length = 158634
395
        self.assertIs(None, block._content)
396
        block._ensure_content(100)
397
        self.assertIsNot(None, block._content)
398
        # We have decompressed at least 100 bytes
399
        self.assertTrue(len(block._content) >= 100)
400
        # We have not decompressed the whole content
401
        self.assertTrue(len(block._content) < 158634)
402
        self.assertEqualDiff(content[:len(block._content)], block._content)
403
        # ensuring content that we already have shouldn't cause any more data
404
        # to be extracted
405
        cur_len = len(block._content)
406
        block._ensure_content(cur_len - 10)
407
        self.assertEqual(cur_len, len(block._content))
408
        # Now we want a bit more content
409
        cur_len += 10
410
        block._ensure_content(cur_len)
411
        self.assertTrue(len(block._content) >= cur_len)
412
        self.assertTrue(len(block._content) < 158634)
413
        self.assertEqualDiff(content[:len(block._content)], block._content)
414
        # And now lets finish
415
        block._ensure_content(158634)
416
        self.assertEqualDiff(content, block._content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
417
        # And the decompressor is finalized
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
418
        self.assertIs(None, block._z_content_decompressor)
419
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
420
    def test_partial_decomp_no_known_length(self):
421
        content_chunks = []
422
        for i in xrange(2048):
423
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
424
            content_chunks.append(next_content)
425
            next_sha1 = osutils.sha_string(next_content)
426
            content_chunks.append(next_sha1 + '\n')
427
        content = ''.join(content_chunks)
428
        self.assertEqual(158634, len(content))
429
        z_content = zlib.compress(content)
430
        self.assertEqual(57182, len(z_content))
431
        block = groupcompress.GroupCompressBlock()
432
        block._z_content = z_content
433
        block._z_content_length = len(z_content)
434
        block._compressor_name = 'zlib'
435
        block._content_length = None # Don't tell the decompressed length
436
        self.assertIs(None, block._content)
437
        block._ensure_content(100)
438
        self.assertIsNot(None, block._content)
439
        # We have decompressed at least 100 bytes
440
        self.assertTrue(len(block._content) >= 100)
441
        # We have not decompressed the whole content
442
        self.assertTrue(len(block._content) < 158634)
443
        self.assertEqualDiff(content[:len(block._content)], block._content)
444
        # ensuring content that we already have shouldn't cause any more data
445
        # to be extracted
446
        cur_len = len(block._content)
447
        block._ensure_content(cur_len - 10)
448
        self.assertEqual(cur_len, len(block._content))
449
        # Now we want a bit more content
450
        cur_len += 10
451
        block._ensure_content(cur_len)
452
        self.assertTrue(len(block._content) >= cur_len)
453
        self.assertTrue(len(block._content) < 158634)
454
        self.assertEqualDiff(content[:len(block._content)], block._content)
455
        # And now lets finish
456
        block._ensure_content()
457
        self.assertEqualDiff(content, block._content)
458
        # And the decompressor is finalized
459
        self.assertIs(None, block._z_content_decompressor)
460
4300.1.1 by John Arbash Meinel
Add the ability to convert a gc block into 'human readable' form.
461
    def test__dump(self):
462
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
463
        key_to_text = {('1',): dup_content + '1 unique\n',
464
                       ('2',): dup_content + '2 extra special\n'}
465
        locs, block = self.make_block(key_to_text)
466
        self.assertEqual([('f', len(key_to_text[('1',)])),
467
                          ('d', 21, len(key_to_text[('2',)]),
468
                           [('c', 2, len(dup_content)),
469
                            ('i', len('2 extra special\n'), '')
470
                           ]),
471
                         ], block._dump())
472
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
473
474
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
475
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
476
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
477
                     dir='.'):
478
        t = self.get_transport(dir)
479
        t.ensure_base()
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
480
        vf = groupcompress.make_pack_factory(graph=create_graph,
481
            delta=False, keylength=keylength)(t)
482
        if do_cleanup:
483
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
484
        return vf
485
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
486
487
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
488
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
489
    def make_g_index(self, name, ref_lists=0, nodes=[]):
490
        builder = btree_index.BTreeBuilder(ref_lists)
491
        for node, references, value in nodes:
492
            builder.add_node(node, references, value)
493
        stream = builder.finish()
494
        trans = self.get_transport()
495
        size = trans.put_file(name, stream)
496
        return btree_index.BTreeGraphIndex(trans, name, size)
497
498
    def make_g_index_missing_parent(self):
499
        graph_index = self.make_g_index('missing_parent', 1,
500
            [(('parent', ), '2 78 2 10', ([],)),
501
             (('tip', ), '2 78 2 10',
502
              ([('parent', ), ('missing-parent', )],)),
503
              ])
504
        return graph_index
505
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
506
    def test_get_record_stream_as_requested(self):
507
        # Consider promoting 'as-requested' to general availability, and
508
        # make this a VF interface test
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
509
        vf = self.make_test_vf(False, dir='source')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
510
        vf.add_lines(('a',), (), ['lines\n'])
511
        vf.add_lines(('b',), (), ['lines\n'])
512
        vf.add_lines(('c',), (), ['lines\n'])
513
        vf.add_lines(('d',), (), ['lines\n'])
514
        vf.writer.end()
515
        keys = [record.key for record in vf.get_record_stream(
516
                    [('a',), ('b',), ('c',), ('d',)],
517
                    'as-requested', False)]
518
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
519
        keys = [record.key for record in vf.get_record_stream(
520
                    [('b',), ('a',), ('d',), ('c',)],
521
                    'as-requested', False)]
522
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
523
524
        # It should work even after being repacked into another VF
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
525
        vf2 = self.make_test_vf(False, dir='target')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
526
        vf2.insert_record_stream(vf.get_record_stream(
527
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
528
        vf2.writer.end()
529
530
        keys = [record.key for record in vf2.get_record_stream(
531
                    [('a',), ('b',), ('c',), ('d',)],
532
                    'as-requested', False)]
533
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
534
        keys = [record.key for record in vf2.get_record_stream(
535
                    [('b',), ('a',), ('d',), ('c',)],
536
                    'as-requested', False)]
537
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
538
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
539
    def test_insert_record_stream_re_uses_blocks(self):
540
        vf = self.make_test_vf(True, dir='source')
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
541
        def grouped_stream(revision_ids, first_parents=()):
542
            parents = first_parents
543
            for revision_id in revision_ids:
544
                key = (revision_id,)
545
                record = versionedfile.FulltextContentFactory(
546
                    key, parents, None,
547
                    'some content that is\n'
548
                    'identical except for\n'
549
                    'revision_id:%s\n' % (revision_id,))
550
                yield record
551
                parents = (key,)
552
        # One group, a-d
553
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
554
        # Second group, e-h
555
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
556
                                               first_parents=(('d',),)))
557
        block_bytes = {}
558
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
559
                                      'unordered', False)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
560
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
561
        for record in stream:
562
            if record.key in [('a',), ('e',)]:
563
                self.assertEqual('groupcompress-block', record.storage_kind)
564
            else:
565
                self.assertEqual('groupcompress-block-ref',
566
                                 record.storage_kind)
567
            block_bytes[record.key] = record._manager._block._z_content
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
568
            num_records += 1
569
        self.assertEqual(8, num_records)
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
570
        for r in 'abcd':
571
            key = (r,)
572
            self.assertIs(block_bytes[key], block_bytes[('a',)])
573
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
574
        for r in 'efgh':
575
            key = (r,)
576
            self.assertIs(block_bytes[key], block_bytes[('e',)])
577
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
578
        # Now copy the blocks into another vf, and ensure that the blocks are
579
        # preserved without creating new entries
580
        vf2 = self.make_test_vf(True, dir='target')
581
        # ordering in 'groupcompress' order, should actually swap the groups in
582
        # the target vf, but the groups themselves should not be disturbed.
583
        vf2.insert_record_stream(vf.get_record_stream(
584
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
585
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
586
                                       'groupcompress', False)
587
        vf2.writer.end()
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
588
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
589
        for record in stream:
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
590
            num_records += 1
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
591
            self.assertEqual(block_bytes[record.key],
592
                             record._manager._block._z_content)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
593
        self.assertEqual(8, num_records)
594
595
    def test__insert_record_stream_no_reuse_block(self):
596
        vf = self.make_test_vf(True, dir='source')
597
        def grouped_stream(revision_ids, first_parents=()):
598
            parents = first_parents
599
            for revision_id in revision_ids:
600
                key = (revision_id,)
601
                record = versionedfile.FulltextContentFactory(
602
                    key, parents, None,
603
                    'some content that is\n'
604
                    'identical except for\n'
605
                    'revision_id:%s\n' % (revision_id,))
606
                yield record
607
                parents = (key,)
608
        # One group, a-d
609
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
610
        # Second group, e-h
611
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
612
                                               first_parents=(('d',),)))
613
        vf.writer.end()
614
        self.assertEqual(8, len(list(vf.get_record_stream(
615
                                        [(r,) for r in 'abcdefgh'],
616
                                        'unordered', False))))
617
        # Now copy the blocks into another vf, and ensure that the blocks are
618
        # preserved without creating new entries
619
        vf2 = self.make_test_vf(True, dir='target')
620
        # ordering in 'groupcompress' order, should actually swap the groups in
621
        # the target vf, but the groups themselves should not be disturbed.
622
        list(vf2._insert_record_stream(vf.get_record_stream(
623
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
624
            reuse_blocks=False))
625
        vf2.writer.end()
626
        # After inserting with reuse_blocks=False, we should have everything in
627
        # a single new block.
628
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
629
                                       'groupcompress', False)
630
        block = None
631
        for record in stream:
632
            if block is None:
633
                block = record._manager._block
634
            else:
635
                self.assertIs(block, record._manager._block)
636
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
637
    def test_add_missing_noncompression_parent_unvalidated_index(self):
638
        unvalidated = self.make_g_index_missing_parent()
639
        combined = _mod_index.CombinedGraphIndex([unvalidated])
640
        index = groupcompress._GCGraphIndex(combined,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
641
            is_locked=lambda: True, parents=True,
642
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
643
        index.scan_unvalidated_index(unvalidated)
644
        self.assertEqual(
645
            frozenset([('missing-parent',)]), index.get_missing_parents())
646
647
    def test_track_external_parent_refs(self):
648
        g_index = self.make_g_index('empty', 1, [])
649
        mod_index = btree_index.BTreeBuilder(1, 1)
650
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
651
        index = groupcompress._GCGraphIndex(combined,
652
            is_locked=lambda: True, parents=True,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
653
            add_callback=mod_index.add_nodes,
654
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
655
        index.add_records([
656
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
657
        self.assertEqual(
658
            frozenset([('parent-1',), ('parent-2',)]),
659
            index.get_missing_parents())
660
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
661
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
662
class TestLazyGroupCompress(tests.TestCaseWithTransport):
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
663
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
664
    _texts = {
665
        ('key1',): "this is a text\n"
666
                   "with a reasonable amount of compressible bytes\n",
667
        ('key2',): "another text\n"
668
                   "with a reasonable amount of compressible bytes\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
669
        ('key3',): "yet another text which won't be extracted\n"
670
                   "with a reasonable amount of compressible bytes\n",
671
        ('key4',): "this will be extracted\n"
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
672
                   "but references most of its bytes from\n"
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
673
                   "yet another text which won't be extracted\n"
674
                   "with a reasonable amount of compressible bytes\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
675
    }
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
676
    def make_block(self, key_to_text):
677
        """Create a GroupCompressBlock, filling it with the given texts."""
678
        compressor = groupcompress.GroupCompressor()
679
        start = 0
680
        for key in sorted(key_to_text):
681
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
682
        locs = dict((key, (start, end)) for key, (start, _, end, _)
683
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
684
        block = compressor.flush()
685
        raw_bytes = block.to_bytes()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
686
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
687
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
688
    def add_key_to_manager(self, key, locations, block, manager):
689
        start, end = locations[key]
690
        manager.add_factory(key, (), start, end)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
691
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
692
    def test_get_fulltexts(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
693
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
694
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
695
        self.add_key_to_manager(('key1',), locations, block, manager)
696
        self.add_key_to_manager(('key2',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
697
        result_order = []
698
        for record in manager.get_record_stream():
699
            result_order.append(record.key)
700
            text = self._texts[record.key]
701
            self.assertEqual(text, record.get_bytes_as('fulltext'))
702
        self.assertEqual([('key1',), ('key2',)], result_order)
703
704
        # If we build the manager in the opposite order, we should get them
705
        # back in the opposite order
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
706
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
707
        self.add_key_to_manager(('key2',), locations, block, manager)
708
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
709
        result_order = []
710
        for record in manager.get_record_stream():
711
            result_order.append(record.key)
712
            text = self._texts[record.key]
713
            self.assertEqual(text, record.get_bytes_as('fulltext'))
714
        self.assertEqual([('key2',), ('key1',)], result_order)
715
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
716
    def test__wire_bytes_no_keys(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
717
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
718
        manager = groupcompress._LazyGroupContentManager(block)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
719
        wire_bytes = manager._wire_bytes()
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
720
        block_length = len(block.to_bytes())
3735.32.24 by John Arbash Meinel
_wire_bytes() now strips groups as necessary, as does _insert_record_stream
721
        # We should have triggered a strip, since we aren't using any content
722
        stripped_block = manager._block.to_bytes()
723
        self.assertTrue(block_length > len(stripped_block))
724
        empty_z_header = zlib.compress('')
725
        self.assertEqual('groupcompress-block\n'
726
                         '8\n' # len(compress(''))
727
                         '0\n' # len('')
728
                         '%d\n'# compressed block len
729
                         '%s'  # zheader
730
                         '%s'  # block
731
                         % (len(stripped_block), empty_z_header,
732
                            stripped_block),
733
                         wire_bytes)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
734
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
735
    def test__wire_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
736
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
737
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
738
        self.add_key_to_manager(('key1',), locations, block, manager)
739
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
740
        block_bytes = block.to_bytes()
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
741
        wire_bytes = manager._wire_bytes()
742
        (storage_kind, z_header_len, header_len,
743
         block_len, rest) = wire_bytes.split('\n', 4)
744
        z_header_len = int(z_header_len)
745
        header_len = int(header_len)
746
        block_len = int(block_len)
747
        self.assertEqual('groupcompress-block', storage_kind)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
748
        self.assertEqual(33, z_header_len)
749
        self.assertEqual(25, header_len)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
750
        self.assertEqual(len(block_bytes), block_len)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
751
        z_header = rest[:z_header_len]
752
        header = zlib.decompress(z_header)
753
        self.assertEqual(header_len, len(header))
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
754
        entry1 = locations[('key1',)]
755
        entry4 = locations[('key4',)]
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
756
        self.assertEqualDiff('key1\n'
757
                             '\n'  # no parents
758
                             '%d\n' # start offset
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
759
                             '%d\n' # end offset
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
760
                             'key4\n'
761
                             '\n'
762
                             '%d\n'
763
                             '%d\n'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
764
                             % (entry1[0], entry1[1],
765
                                entry4[0], entry4[1]),
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
766
                            header)
767
        z_block = rest[z_header_len:]
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
768
        self.assertEqual(block_bytes, z_block)
769
770
    def test_from_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
771
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
772
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
773
        self.add_key_to_manager(('key1',), locations, block, manager)
774
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
775
        wire_bytes = manager._wire_bytes()
776
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
3735.32.18 by John Arbash Meinel
We now support generating a network stream.
777
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
778
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
779
        self.assertEqual(2, len(manager._factories))
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
780
        self.assertEqual(block._z_content, manager._block._z_content)
781
        result_order = []
782
        for record in manager.get_record_stream():
783
            result_order.append(record.key)
784
            text = self._texts[record.key]
785
            self.assertEqual(text, record.get_bytes_as('fulltext'))
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
786
        self.assertEqual([('key1',), ('key4',)], result_order)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
787
788
    def test__check_rebuild_no_changes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
789
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
790
        manager = groupcompress._LazyGroupContentManager(block)
791
        # Request all the keys, which ensures that we won't rebuild
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
792
        self.add_key_to_manager(('key1',), locations, block, manager)
793
        self.add_key_to_manager(('key2',), locations, block, manager)
794
        self.add_key_to_manager(('key3',), locations, block, manager)
795
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
796
        manager._check_rebuild_block()
797
        self.assertIs(block, manager._block)
798
799
    def test__check_rebuild_only_one(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
800
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
801
        manager = groupcompress._LazyGroupContentManager(block)
802
        # Request just the first key, which should trigger a 'strip' action
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
803
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
804
        manager._check_rebuild_block()
805
        self.assertIsNot(block, manager._block)
806
        self.assertTrue(block._content_length > manager._block._content_length)
807
        # We should be able to still get the content out of this block, though
808
        # it should only have 1 entry
809
        for record in manager.get_record_stream():
810
            self.assertEqual(('key1',), record.key)
811
            self.assertEqual(self._texts[record.key],
812
                             record.get_bytes_as('fulltext'))
813
814
    def test__check_rebuild_middle(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
815
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
816
        manager = groupcompress._LazyGroupContentManager(block)
817
        # Request a small key in the middle should trigger a 'rebuild'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
818
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
819
        manager._check_rebuild_block()
820
        self.assertIsNot(block, manager._block)
821
        self.assertTrue(block._content_length > manager._block._content_length)
822
        for record in manager.get_record_stream():
823
            self.assertEqual(('key4',), record.key)
824
            self.assertEqual(self._texts[record.key],
825
                             record.get_bytes_as('fulltext'))