/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
1
# Copyright (C) 2008, 2009 Canonical Ltd
2
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
16
17
"""Tests for group compression."""
18
19
import zlib
20
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
21
from bzrlib import (
22
    groupcompress,
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
23
    errors,
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
24
    osutils,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
25
    tests,
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
26
    versionedfile,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
27
    )
0.23.58 by John Arbash Meinel
fix up the failing tests.
28
from bzrlib.osutils import sha_string
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
29
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
30
31
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
32
def load_tests(standard_tests, module, loader):
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
33
    """Parameterize tests for all versions of groupcompress."""
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
34
    to_adapt, result = tests.split_suite_by_condition(
35
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
36
    scenarios = [
37
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
38
        ]
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
39
    if CompiledGroupCompressFeature.available():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
40
        scenarios.append(('C',
41
            {'compressor': groupcompress.PyrexGroupCompressor}))
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
42
    return tests.multiply_tests(to_adapt, scenarios, result)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
43
44
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
45
class TestGroupCompressor(tests.TestCase):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
46
47
    def _chunks_to_repr_lines(self, chunks):
48
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
49
50
    def assertEqualDiffEncoded(self, expected, actual):
51
        """Compare the actual content to the expected content.
52
53
        :param expected: A group of chunks that we expect to see
54
        :param actual: The measured 'chunks'
55
56
        We will transform the chunks back into lines, and then run 'repr()'
57
        over them to handle non-ascii characters.
58
        """
59
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
60
                             self._chunks_to_repr_lines(actual))
61
62
63
class TestAllGroupCompressors(TestGroupCompressor):
0.17.2 by Robert Collins
Core proof of concept working.
64
    """Tests for GroupCompressor"""
65
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
66
    compressor = None # Set by multiply_tests
67
0.17.2 by Robert Collins
Core proof of concept working.
68
    def test_empty_delta(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
69
        compressor = self.compressor()
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
70
        self.assertEqual([], compressor.chunks)
0.17.2 by Robert Collins
Core proof of concept working.
71
72
    def test_one_nosha_delta(self):
73
        # diff against NUKK
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
74
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
75
        sha1, start_point, end_point, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
76
            'strange\ncommon\n', None)
77
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
78
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
79
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
80
        self.assertEqual(0, start_point)
0.17.2 by Robert Collins
Core proof of concept working.
81
        self.assertEqual(sum(map(len, expected_lines)), end_point)
82
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
83
    def test_empty_content(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
84
        compressor = self.compressor()
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
85
        # Adding empty bytes should return the 'null' record
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
86
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
87
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
88
        self.assertEqual(0, start_point)
89
        self.assertEqual(0, end_point)
90
        self.assertEqual('fulltext', kind)
91
        self.assertEqual(groupcompress._null_sha1, sha1)
92
        self.assertEqual(0, compressor.endpoint)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
93
        self.assertEqual([], compressor.chunks)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
94
        # Even after adding some content
95
        compressor.compress(('content',), 'some\nbytes\n', None)
96
        self.assertTrue(compressor.endpoint > 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
97
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
98
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
99
        self.assertEqual(0, start_point)
100
        self.assertEqual(0, end_point)
101
        self.assertEqual('fulltext', kind)
102
        self.assertEqual(groupcompress._null_sha1, sha1)
103
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
104
    def test_extract_from_compressor(self):
105
        # Knit fetching will try to reconstruct texts locally which results in
106
        # reading something that is in the compressor stream already.
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
107
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
108
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
109
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
110
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
111
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
112
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
113
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
114
        self.assertEqual(('strange\ncommon long line\n'
115
                          'that needs a 16 byte match\n', sha1_1),
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
116
                         compressor.extract(('label',)))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
117
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
118
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
119
                          'different\n', sha1_2),
120
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
121
4241.17.2 by John Arbash Meinel
PythonGroupCompressor needs to support pop_last() properly.
122
    def test_pop_last(self):
123
        compressor = self.compressor()
124
        _, _, _, _ = compressor.compress(('key1',),
125
            'some text\nfor the first entry\n', None)
126
        expected_lines = list(compressor.chunks)
127
        _, _, _, _ = compressor.compress(('key2',),
128
            'some text\nfor the second entry\n', None)
129
        compressor.pop_last()
130
        self.assertEqual(expected_lines, compressor.chunks)
131
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
132
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
133
class TestPyrexGroupCompressor(TestGroupCompressor):
134
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
135
    _test_needs_features = [CompiledGroupCompressFeature]
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
136
    compressor = groupcompress.PyrexGroupCompressor
137
138
    def test_stats(self):
139
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
140
        compressor.compress(('label',),
141
                            'strange\n'
142
                            'common very very long line\n'
143
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
144
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
145
                            'common very very long line\n'
146
                            'plus more text\n'
147
                            'different\n'
148
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
149
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
150
                            'new\n'
151
                            'common very very long line\n'
152
                            'plus more text\n'
153
                            'different\n'
154
                            'moredifferent\n', None)
155
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
156
157
    def test_two_nosha_delta(self):
158
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
159
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
160
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
161
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
162
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
163
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
164
        self.assertEqual(sha_string('common long line\n'
165
                                    'that needs a 16 byte match\n'
166
                                    'different\n'), sha1_2)
167
        expected_lines.extend([
168
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
169
            'd\x0f',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
170
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
171
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
172
            # copy the line common
173
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
174
            # add the line different, and the trailing newline
175
            '\x0adifferent\n', # insert 10 bytes
176
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
177
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
178
        self.assertEqual(sum(map(len, expected_lines)), end_point)
179
180
    def test_three_nosha_delta(self):
181
        # The first interesting test: make a change that should use lines from
182
        # both parents.
183
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
184
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
185
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
186
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
187
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
188
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
189
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
190
            'new\ncommon very very long line\nwith some extra text\n'
191
            'different\nmoredifferent\nand then some more\n',
192
            None)
193
        self.assertEqual(
194
            sha_string('new\ncommon very very long line\nwith some extra text\n'
195
                       'different\nmoredifferent\nand then some more\n'),
196
            sha1_3)
197
        expected_lines.extend([
198
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
199
            'd\x0b',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
200
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
201
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
202
            # insert new
203
            '\x03new',
204
            # Copy of first parent 'common' range
205
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
206
            # Copy of second parent 'different' range
207
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
208
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
209
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
210
        self.assertEqual(sum(map(len, expected_lines)), end_point)
211
212
213
class TestPythonGroupCompressor(TestGroupCompressor):
214
215
    compressor = groupcompress.PythonGroupCompressor
216
217
    def test_stats(self):
218
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
219
        compressor.compress(('label',),
220
                            'strange\n'
221
                            'common very very long line\n'
222
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
223
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
224
                            'common very very long line\n'
225
                            'plus more text\n'
226
                            'different\n'
227
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
228
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
229
                            'new\n'
230
                            'common very very long line\n'
231
                            'plus more text\n'
232
                            'different\n'
233
                            'moredifferent\n', None)
234
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
235
236
    def test_two_nosha_delta(self):
237
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
238
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
239
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
240
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
241
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
242
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
243
        self.assertEqual(sha_string('common long line\n'
244
                                    'that needs a 16 byte match\n'
245
                                    'different\n'), sha1_2)
246
        expected_lines.extend([
247
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
248
            'd\x0f',
249
            # target length
250
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
251
            # copy the line common
252
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
253
            # add the line different, and the trailing newline
254
            '\x0adifferent\n', # insert 10 bytes
255
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
256
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
257
        self.assertEqual(sum(map(len, expected_lines)), end_point)
258
259
    def test_three_nosha_delta(self):
260
        # The first interesting test: make a change that should use lines from
261
        # both parents.
262
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
263
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
264
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
265
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
266
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
267
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
268
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
269
            'new\ncommon very very long line\nwith some extra text\n'
270
            'different\nmoredifferent\nand then some more\n',
271
            None)
272
        self.assertEqual(
273
            sha_string('new\ncommon very very long line\nwith some extra text\n'
274
                       'different\nmoredifferent\nand then some more\n'),
275
            sha1_3)
276
        expected_lines.extend([
277
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
278
            'd\x0c',
279
            # target length
280
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
281
            # insert new
282
            '\x04new\n',
283
            # Copy of first parent 'common' range
284
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
285
            # Copy of second parent 'different' range
286
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
287
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
288
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
289
        self.assertEqual(sum(map(len, expected_lines)), end_point)
290
291
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
292
class TestGroupCompressBlock(tests.TestCase):
293
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
294
    def make_block(self, key_to_text):
295
        """Create a GroupCompressBlock, filling it with the given texts."""
296
        compressor = groupcompress.GroupCompressor()
297
        start = 0
298
        for key in sorted(key_to_text):
299
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
300
        locs = dict((key, (start, end)) for key, (start, _, end, _)
301
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
302
        block = compressor.flush()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
303
        raw_bytes = block.to_bytes()
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
304
        # Go through from_bytes(to_bytes()) so that we start with a compressed
305
        # content object
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
306
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
307
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
308
    def test_from_empty_bytes(self):
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
309
        self.assertRaises(ValueError,
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
310
                          groupcompress.GroupCompressBlock.from_bytes, '')
311
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
312
    def test_from_minimal_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
313
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.38.4 by John Arbash Meinel
Another disk format change.
314
            'gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
315
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
316
        self.assertIs(None, block._content)
317
        self.assertEqual('', block._z_content)
318
        block._ensure_content()
3735.32.5 by John Arbash Meinel
Change the parsing code to start out just holding the compressed bytes.
319
        self.assertEqual('', block._content)
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
320
        self.assertEqual('', block._z_content)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
321
        block._ensure_content() # Ensure content is safe to call 2x
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
322
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
323
    def test_from_invalid(self):
324
        self.assertRaises(ValueError,
325
                          groupcompress.GroupCompressBlock.from_bytes,
326
                          'this is not a valid header')
327
3735.38.4 by John Arbash Meinel
Another disk format change.
328
    def test_from_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
329
        content = ('a tiny bit of content\n')
330
        z_content = zlib.compress(content)
331
        z_bytes = (
332
            'gcb1z\n' # group compress block v1 plain
333
            '%d\n' # Length of compressed content
334
            '%d\n' # Length of uncompressed content
335
            '%s'   # Compressed content
3735.38.4 by John Arbash Meinel
Another disk format change.
336
            ) % (len(z_content), len(content), z_content)
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
337
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
338
            z_bytes)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
339
        self.assertEqual(z_content, block._z_content)
340
        self.assertIs(None, block._content)
3735.38.4 by John Arbash Meinel
Another disk format change.
341
        self.assertEqual(len(z_content), block._z_content_length)
342
        self.assertEqual(len(content), block._content_length)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
343
        block._ensure_content()
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
344
        self.assertEqual(z_content, block._z_content)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
345
        self.assertEqual(content, block._content)
346
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
347
    def test_to_bytes(self):
3735.38.4 by John Arbash Meinel
Another disk format change.
348
        content = ('this is some content\n'
349
                   'this content will be compressed\n')
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
350
        gcb = groupcompress.GroupCompressBlock()
3735.38.4 by John Arbash Meinel
Another disk format change.
351
        gcb.set_content(content)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
352
        bytes = gcb.to_bytes()
3735.38.4 by John Arbash Meinel
Another disk format change.
353
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
354
        self.assertEqual(gcb._content_length, len(content))
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
355
        expected_header =('gcb1z\n' # group compress block v1 zlib
3735.38.4 by John Arbash Meinel
Another disk format change.
356
                          '%d\n' # Length of compressed content
357
                          '%d\n' # Length of uncompressed content
358
                         ) % (gcb._z_content_length, gcb._content_length)
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
359
        self.assertStartsWith(bytes, expected_header)
360
        remaining_bytes = bytes[len(expected_header):]
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
361
        raw_bytes = zlib.decompress(remaining_bytes)
3735.38.4 by John Arbash Meinel
Another disk format change.
362
        self.assertEqual(content, raw_bytes)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
363
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
364
    def test_partial_decomp(self):
365
        content_chunks = []
366
        # We need a sufficient amount of data so that zlib.decompress has
367
        # partial decompression to work with. Most auto-generated data
368
        # compresses a bit too well, we want a combination, so we combine a sha
369
        # hash with compressible data.
370
        for i in xrange(2048):
371
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
372
            content_chunks.append(next_content)
373
            next_sha1 = osutils.sha_string(next_content)
374
            content_chunks.append(next_sha1 + '\n')
375
        content = ''.join(content_chunks)
376
        self.assertEqual(158634, len(content))
377
        z_content = zlib.compress(content)
378
        self.assertEqual(57182, len(z_content))
379
        block = groupcompress.GroupCompressBlock()
380
        block._z_content = z_content
381
        block._z_content_length = len(z_content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
382
        block._compressor_name = 'zlib'
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
383
        block._content_length = 158634
384
        self.assertIs(None, block._content)
385
        block._ensure_content(100)
386
        self.assertIsNot(None, block._content)
387
        # We have decompressed at least 100 bytes
388
        self.assertTrue(len(block._content) >= 100)
389
        # We have not decompressed the whole content
390
        self.assertTrue(len(block._content) < 158634)
391
        self.assertEqualDiff(content[:len(block._content)], block._content)
392
        # ensuring content that we already have shouldn't cause any more data
393
        # to be extracted
394
        cur_len = len(block._content)
395
        block._ensure_content(cur_len - 10)
396
        self.assertEqual(cur_len, len(block._content))
397
        # Now we want a bit more content
398
        cur_len += 10
399
        block._ensure_content(cur_len)
400
        self.assertTrue(len(block._content) >= cur_len)
401
        self.assertTrue(len(block._content) < 158634)
402
        self.assertEqualDiff(content[:len(block._content)], block._content)
403
        # And now lets finish
404
        block._ensure_content(158634)
405
        self.assertEqualDiff(content, block._content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
406
        # And the decompressor is finalized
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
407
        self.assertIs(None, block._z_content_decompressor)
408
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
409
    def test_partial_decomp_no_known_length(self):
410
        content_chunks = []
411
        for i in xrange(2048):
412
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
413
            content_chunks.append(next_content)
414
            next_sha1 = osutils.sha_string(next_content)
415
            content_chunks.append(next_sha1 + '\n')
416
        content = ''.join(content_chunks)
417
        self.assertEqual(158634, len(content))
418
        z_content = zlib.compress(content)
419
        self.assertEqual(57182, len(z_content))
420
        block = groupcompress.GroupCompressBlock()
421
        block._z_content = z_content
422
        block._z_content_length = len(z_content)
423
        block._compressor_name = 'zlib'
424
        block._content_length = None # Don't tell the decompressed length
425
        self.assertIs(None, block._content)
426
        block._ensure_content(100)
427
        self.assertIsNot(None, block._content)
428
        # We have decompressed at least 100 bytes
429
        self.assertTrue(len(block._content) >= 100)
430
        # We have not decompressed the whole content
431
        self.assertTrue(len(block._content) < 158634)
432
        self.assertEqualDiff(content[:len(block._content)], block._content)
433
        # ensuring content that we already have shouldn't cause any more data
434
        # to be extracted
435
        cur_len = len(block._content)
436
        block._ensure_content(cur_len - 10)
437
        self.assertEqual(cur_len, len(block._content))
438
        # Now we want a bit more content
439
        cur_len += 10
440
        block._ensure_content(cur_len)
441
        self.assertTrue(len(block._content) >= cur_len)
442
        self.assertTrue(len(block._content) < 158634)
443
        self.assertEqualDiff(content[:len(block._content)], block._content)
444
        # And now lets finish
445
        block._ensure_content()
446
        self.assertEqualDiff(content, block._content)
447
        # And the decompressor is finalized
448
        self.assertIs(None, block._z_content_decompressor)
449
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
450
451
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
452
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
453
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
454
                     dir='.'):
455
        t = self.get_transport(dir)
456
        t.ensure_base()
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
457
        vf = groupcompress.make_pack_factory(graph=create_graph,
458
            delta=False, keylength=keylength)(t)
459
        if do_cleanup:
460
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
461
        return vf
462
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
463
464
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
465
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
466
    def test_get_record_stream_as_requested(self):
467
        # Consider promoting 'as-requested' to general availability, and
468
        # make this a VF interface test
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
469
        vf = self.make_test_vf(False, dir='source')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
470
        vf.add_lines(('a',), (), ['lines\n'])
471
        vf.add_lines(('b',), (), ['lines\n'])
472
        vf.add_lines(('c',), (), ['lines\n'])
473
        vf.add_lines(('d',), (), ['lines\n'])
474
        vf.writer.end()
475
        keys = [record.key for record in vf.get_record_stream(
476
                    [('a',), ('b',), ('c',), ('d',)],
477
                    'as-requested', False)]
478
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
479
        keys = [record.key for record in vf.get_record_stream(
480
                    [('b',), ('a',), ('d',), ('c',)],
481
                    'as-requested', False)]
482
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
483
484
        # It should work even after being repacked into another VF
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
485
        vf2 = self.make_test_vf(False, dir='target')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
486
        vf2.insert_record_stream(vf.get_record_stream(
487
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
488
        vf2.writer.end()
489
490
        keys = [record.key for record in vf2.get_record_stream(
491
                    [('a',), ('b',), ('c',), ('d',)],
492
                    'as-requested', False)]
493
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
494
        keys = [record.key for record in vf2.get_record_stream(
495
                    [('b',), ('a',), ('d',), ('c',)],
496
                    'as-requested', False)]
497
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
498
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
499
    def test_insert_record_stream_re_uses_blocks(self):
500
        vf = self.make_test_vf(True, dir='source')
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
501
        def grouped_stream(revision_ids, first_parents=()):
502
            parents = first_parents
503
            for revision_id in revision_ids:
504
                key = (revision_id,)
505
                record = versionedfile.FulltextContentFactory(
506
                    key, parents, None,
507
                    'some content that is\n'
508
                    'identical except for\n'
509
                    'revision_id:%s\n' % (revision_id,))
510
                yield record
511
                parents = (key,)
512
        # One group, a-d
513
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
514
        # Second group, e-h
515
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
516
                                               first_parents=(('d',),)))
517
        block_bytes = {}
518
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
519
                                      'unordered', False)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
520
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
521
        for record in stream:
522
            if record.key in [('a',), ('e',)]:
523
                self.assertEqual('groupcompress-block', record.storage_kind)
524
            else:
525
                self.assertEqual('groupcompress-block-ref',
526
                                 record.storage_kind)
527
            block_bytes[record.key] = record._manager._block._z_content
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
528
            num_records += 1
529
        self.assertEqual(8, num_records)
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
530
        for r in 'abcd':
531
            key = (r,)
532
            self.assertIs(block_bytes[key], block_bytes[('a',)])
533
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
534
        for r in 'efgh':
535
            key = (r,)
536
            self.assertIs(block_bytes[key], block_bytes[('e',)])
537
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
538
        # Now copy the blocks into another vf, and ensure that the blocks are
539
        # preserved without creating new entries
540
        vf2 = self.make_test_vf(True, dir='target')
541
        # ordering in 'groupcompress' order, should actually swap the groups in
542
        # the target vf, but the groups themselves should not be disturbed.
543
        vf2.insert_record_stream(vf.get_record_stream(
544
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
545
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
546
                                       'groupcompress', False)
547
        vf2.writer.end()
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
548
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
549
        for record in stream:
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
550
            num_records += 1
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
551
            self.assertEqual(block_bytes[record.key],
552
                             record._manager._block._z_content)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
553
        self.assertEqual(8, num_records)
554
555
    def test__insert_record_stream_no_reuse_block(self):
556
        vf = self.make_test_vf(True, dir='source')
557
        def grouped_stream(revision_ids, first_parents=()):
558
            parents = first_parents
559
            for revision_id in revision_ids:
560
                key = (revision_id,)
561
                record = versionedfile.FulltextContentFactory(
562
                    key, parents, None,
563
                    'some content that is\n'
564
                    'identical except for\n'
565
                    'revision_id:%s\n' % (revision_id,))
566
                yield record
567
                parents = (key,)
568
        # One group, a-d
569
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
570
        # Second group, e-h
571
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
572
                                               first_parents=(('d',),)))
573
        vf.writer.end()
574
        self.assertEqual(8, len(list(vf.get_record_stream(
575
                                        [(r,) for r in 'abcdefgh'],
576
                                        'unordered', False))))
577
        # Now copy the blocks into another vf, and ensure that the blocks are
578
        # preserved without creating new entries
579
        vf2 = self.make_test_vf(True, dir='target')
580
        # ordering in 'groupcompress' order, should actually swap the groups in
581
        # the target vf, but the groups themselves should not be disturbed.
582
        list(vf2._insert_record_stream(vf.get_record_stream(
583
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
584
            reuse_blocks=False))
585
        vf2.writer.end()
586
        # After inserting with reuse_blocks=False, we should have everything in
587
        # a single new block.
588
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
589
                                       'groupcompress', False)
590
        block = None
591
        for record in stream:
592
            if block is None:
593
                block = record._manager._block
594
            else:
595
                self.assertIs(block, record._manager._block)
596
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
597
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
598
class TestLazyGroupCompress(tests.TestCaseWithTransport):
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
599
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
600
    _texts = {
601
        ('key1',): "this is a text\n"
602
                   "with a reasonable amount of compressible bytes\n",
603
        ('key2',): "another text\n"
604
                   "with a reasonable amount of compressible bytes\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
605
        ('key3',): "yet another text which won't be extracted\n"
606
                   "with a reasonable amount of compressible bytes\n",
607
        ('key4',): "this will be extracted\n"
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
608
                   "but references most of its bytes from\n"
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
609
                   "yet another text which won't be extracted\n"
610
                   "with a reasonable amount of compressible bytes\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
611
    }
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
612
    def make_block(self, key_to_text):
613
        """Create a GroupCompressBlock, filling it with the given texts."""
614
        compressor = groupcompress.GroupCompressor()
615
        start = 0
616
        for key in sorted(key_to_text):
617
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
618
        locs = dict((key, (start, end)) for key, (start, _, end, _)
619
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
620
        block = compressor.flush()
621
        raw_bytes = block.to_bytes()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
622
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
623
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
624
    def add_key_to_manager(self, key, locations, block, manager):
625
        start, end = locations[key]
626
        manager.add_factory(key, (), start, end)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
627
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
628
    def test_get_fulltexts(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
629
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
630
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
631
        self.add_key_to_manager(('key1',), locations, block, manager)
632
        self.add_key_to_manager(('key2',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
633
        result_order = []
634
        for record in manager.get_record_stream():
635
            result_order.append(record.key)
636
            text = self._texts[record.key]
637
            self.assertEqual(text, record.get_bytes_as('fulltext'))
638
        self.assertEqual([('key1',), ('key2',)], result_order)
639
640
        # If we build the manager in the opposite order, we should get them
641
        # back in the opposite order
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
642
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
643
        self.add_key_to_manager(('key2',), locations, block, manager)
644
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
645
        result_order = []
646
        for record in manager.get_record_stream():
647
            result_order.append(record.key)
648
            text = self._texts[record.key]
649
            self.assertEqual(text, record.get_bytes_as('fulltext'))
650
        self.assertEqual([('key2',), ('key1',)], result_order)
651
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
652
    def test__wire_bytes_no_keys(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
653
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
654
        manager = groupcompress._LazyGroupContentManager(block)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
655
        wire_bytes = manager._wire_bytes()
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
656
        block_length = len(block.to_bytes())
3735.32.24 by John Arbash Meinel
_wire_bytes() now strips groups as necessary, as does _insert_record_stream
657
        # We should have triggered a strip, since we aren't using any content
658
        stripped_block = manager._block.to_bytes()
659
        self.assertTrue(block_length > len(stripped_block))
660
        empty_z_header = zlib.compress('')
661
        self.assertEqual('groupcompress-block\n'
662
                         '8\n' # len(compress(''))
663
                         '0\n' # len('')
664
                         '%d\n'# compressed block len
665
                         '%s'  # zheader
666
                         '%s'  # block
667
                         % (len(stripped_block), empty_z_header,
668
                            stripped_block),
669
                         wire_bytes)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
670
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
671
    def test__wire_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
672
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
673
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
674
        self.add_key_to_manager(('key1',), locations, block, manager)
675
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
676
        block_bytes = block.to_bytes()
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
677
        wire_bytes = manager._wire_bytes()
678
        (storage_kind, z_header_len, header_len,
679
         block_len, rest) = wire_bytes.split('\n', 4)
680
        z_header_len = int(z_header_len)
681
        header_len = int(header_len)
682
        block_len = int(block_len)
683
        self.assertEqual('groupcompress-block', storage_kind)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
684
        self.assertEqual(33, z_header_len)
685
        self.assertEqual(25, header_len)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
686
        self.assertEqual(len(block_bytes), block_len)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
687
        z_header = rest[:z_header_len]
688
        header = zlib.decompress(z_header)
689
        self.assertEqual(header_len, len(header))
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
690
        entry1 = locations[('key1',)]
691
        entry4 = locations[('key4',)]
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
692
        self.assertEqualDiff('key1\n'
693
                             '\n'  # no parents
694
                             '%d\n' # start offset
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
695
                             '%d\n' # end offset
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
696
                             'key4\n'
697
                             '\n'
698
                             '%d\n'
699
                             '%d\n'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
700
                             % (entry1[0], entry1[1],
701
                                entry4[0], entry4[1]),
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
702
                            header)
703
        z_block = rest[z_header_len:]
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
704
        self.assertEqual(block_bytes, z_block)
705
706
    def test_from_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
707
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
708
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
709
        self.add_key_to_manager(('key1',), locations, block, manager)
710
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
711
        wire_bytes = manager._wire_bytes()
712
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
3735.32.18 by John Arbash Meinel
We now support generating a network stream.
713
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
714
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
715
        self.assertEqual(2, len(manager._factories))
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
716
        self.assertEqual(block._z_content, manager._block._z_content)
717
        result_order = []
718
        for record in manager.get_record_stream():
719
            result_order.append(record.key)
720
            text = self._texts[record.key]
721
            self.assertEqual(text, record.get_bytes_as('fulltext'))
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
722
        self.assertEqual([('key1',), ('key4',)], result_order)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
723
724
    def test__check_rebuild_no_changes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
725
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
726
        manager = groupcompress._LazyGroupContentManager(block)
727
        # Request all the keys, which ensures that we won't rebuild
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
728
        self.add_key_to_manager(('key1',), locations, block, manager)
729
        self.add_key_to_manager(('key2',), locations, block, manager)
730
        self.add_key_to_manager(('key3',), locations, block, manager)
731
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
732
        manager._check_rebuild_block()
733
        self.assertIs(block, manager._block)
734
735
    def test__check_rebuild_only_one(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
736
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
737
        manager = groupcompress._LazyGroupContentManager(block)
738
        # Request just the first key, which should trigger a 'strip' action
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
739
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
740
        manager._check_rebuild_block()
741
        self.assertIsNot(block, manager._block)
742
        self.assertTrue(block._content_length > manager._block._content_length)
743
        # We should be able to still get the content out of this block, though
744
        # it should only have 1 entry
745
        for record in manager.get_record_stream():
746
            self.assertEqual(('key1',), record.key)
747
            self.assertEqual(self._texts[record.key],
748
                             record.get_bytes_as('fulltext'))
749
750
    def test__check_rebuild_middle(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
751
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
752
        manager = groupcompress._LazyGroupContentManager(block)
753
        # Request a small key in the middle should trigger a 'rebuild'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
754
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
755
        manager._check_rebuild_block()
756
        self.assertIsNot(block, manager._block)
757
        self.assertTrue(block._content_length > manager._block._content_length)
758
        for record in manager.get_record_stream():
759
            self.assertEqual(('key4',), record.key)
760
            self.assertEqual(self._texts[record.key],
761
                             record.get_bytes_as('fulltext'))