/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
1
# Copyright (C) 2008, 2009 Canonical Ltd
2
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
16
17
"""Tests for group compression."""
18
19
import zlib
20
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
21
from bzrlib import (
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
22
    btree_index,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
23
    groupcompress,
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
24
    errors,
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
25
    index as _mod_index,
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
26
    osutils,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
27
    tests,
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
28
    versionedfile,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
29
    )
0.23.58 by John Arbash Meinel
fix up the failing tests.
30
from bzrlib.osutils import sha_string
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
31
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
32
33
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
34
def load_tests(standard_tests, module, loader):
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
35
    """Parameterize tests for all versions of groupcompress."""
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
36
    to_adapt, result = tests.split_suite_by_condition(
37
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
38
    scenarios = [
39
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
40
        ]
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
41
    if CompiledGroupCompressFeature.available():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
42
        scenarios.append(('C',
43
            {'compressor': groupcompress.PyrexGroupCompressor}))
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
44
    return tests.multiply_tests(to_adapt, scenarios, result)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
45
46
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
47
class TestGroupCompressor(tests.TestCase):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
48
49
    def _chunks_to_repr_lines(self, chunks):
50
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
51
52
    def assertEqualDiffEncoded(self, expected, actual):
53
        """Compare the actual content to the expected content.
54
55
        :param expected: A group of chunks that we expect to see
56
        :param actual: The measured 'chunks'
57
58
        We will transform the chunks back into lines, and then run 'repr()'
59
        over them to handle non-ascii characters.
60
        """
61
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
62
                             self._chunks_to_repr_lines(actual))
63
64
65
class TestAllGroupCompressors(TestGroupCompressor):
0.17.2 by Robert Collins
Core proof of concept working.
66
    """Tests for GroupCompressor"""
67
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
68
    compressor = None # Set by multiply_tests
69
0.17.2 by Robert Collins
Core proof of concept working.
70
    def test_empty_delta(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
71
        compressor = self.compressor()
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
72
        self.assertEqual([], compressor.chunks)
0.17.2 by Robert Collins
Core proof of concept working.
73
74
    def test_one_nosha_delta(self):
75
        # diff against NUKK
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
76
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
77
        sha1, start_point, end_point, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
78
            'strange\ncommon\n', None)
79
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
80
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
81
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
82
        self.assertEqual(0, start_point)
0.17.2 by Robert Collins
Core proof of concept working.
83
        self.assertEqual(sum(map(len, expected_lines)), end_point)
84
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
85
    def test_empty_content(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
86
        compressor = self.compressor()
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
87
        # Adding empty bytes should return the 'null' record
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
88
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
89
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
90
        self.assertEqual(0, start_point)
91
        self.assertEqual(0, end_point)
92
        self.assertEqual('fulltext', kind)
93
        self.assertEqual(groupcompress._null_sha1, sha1)
94
        self.assertEqual(0, compressor.endpoint)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
95
        self.assertEqual([], compressor.chunks)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
96
        # Even after adding some content
97
        compressor.compress(('content',), 'some\nbytes\n', None)
98
        self.assertTrue(compressor.endpoint > 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
99
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
100
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
101
        self.assertEqual(0, start_point)
102
        self.assertEqual(0, end_point)
103
        self.assertEqual('fulltext', kind)
104
        self.assertEqual(groupcompress._null_sha1, sha1)
105
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
106
    def test_extract_from_compressor(self):
107
        # Knit fetching will try to reconstruct texts locally which results in
108
        # reading something that is in the compressor stream already.
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
109
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
110
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
111
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
112
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
113
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
114
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
115
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
116
        self.assertEqual(('strange\ncommon long line\n'
117
                          'that needs a 16 byte match\n', sha1_1),
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
118
                         compressor.extract(('label',)))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
119
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
120
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
121
                          'different\n', sha1_2),
122
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
123
4241.17.2 by John Arbash Meinel
PythonGroupCompressor needs to support pop_last() properly.
124
    def test_pop_last(self):
125
        compressor = self.compressor()
126
        _, _, _, _ = compressor.compress(('key1',),
127
            'some text\nfor the first entry\n', None)
128
        expected_lines = list(compressor.chunks)
129
        _, _, _, _ = compressor.compress(('key2',),
130
            'some text\nfor the second entry\n', None)
131
        compressor.pop_last()
132
        self.assertEqual(expected_lines, compressor.chunks)
133
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
134
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
135
class TestPyrexGroupCompressor(TestGroupCompressor):
136
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
137
    _test_needs_features = [CompiledGroupCompressFeature]
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
138
    compressor = groupcompress.PyrexGroupCompressor
139
140
    def test_stats(self):
141
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
142
        compressor.compress(('label',),
143
                            'strange\n'
144
                            'common very very long line\n'
145
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
146
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
147
                            'common very very long line\n'
148
                            'plus more text\n'
149
                            'different\n'
150
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
151
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
152
                            'new\n'
153
                            'common very very long line\n'
154
                            'plus more text\n'
155
                            'different\n'
156
                            'moredifferent\n', None)
157
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
158
159
    def test_two_nosha_delta(self):
160
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
161
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
162
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
163
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
164
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
165
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
166
        self.assertEqual(sha_string('common long line\n'
167
                                    'that needs a 16 byte match\n'
168
                                    'different\n'), sha1_2)
169
        expected_lines.extend([
170
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
171
            'd\x0f',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
172
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
173
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
174
            # copy the line common
175
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
176
            # add the line different, and the trailing newline
177
            '\x0adifferent\n', # insert 10 bytes
178
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
179
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
180
        self.assertEqual(sum(map(len, expected_lines)), end_point)
181
182
    def test_three_nosha_delta(self):
183
        # The first interesting test: make a change that should use lines from
184
        # both parents.
185
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
186
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
187
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
188
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
189
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
190
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
191
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
192
            'new\ncommon very very long line\nwith some extra text\n'
193
            'different\nmoredifferent\nand then some more\n',
194
            None)
195
        self.assertEqual(
196
            sha_string('new\ncommon very very long line\nwith some extra text\n'
197
                       'different\nmoredifferent\nand then some more\n'),
198
            sha1_3)
199
        expected_lines.extend([
200
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
201
            'd\x0b',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
202
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
203
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
204
            # insert new
205
            '\x03new',
206
            # Copy of first parent 'common' range
207
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
208
            # Copy of second parent 'different' range
209
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
210
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
211
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
212
        self.assertEqual(sum(map(len, expected_lines)), end_point)
213
214
215
class TestPythonGroupCompressor(TestGroupCompressor):
216
217
    compressor = groupcompress.PythonGroupCompressor
218
219
    def test_stats(self):
220
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
221
        compressor.compress(('label',),
222
                            'strange\n'
223
                            'common very very long line\n'
224
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
225
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
226
                            'common very very long line\n'
227
                            'plus more text\n'
228
                            'different\n'
229
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
230
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
231
                            'new\n'
232
                            'common very very long line\n'
233
                            'plus more text\n'
234
                            'different\n'
235
                            'moredifferent\n', None)
236
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
237
238
    def test_two_nosha_delta(self):
239
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
240
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
241
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
242
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
243
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
244
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
245
        self.assertEqual(sha_string('common long line\n'
246
                                    'that needs a 16 byte match\n'
247
                                    'different\n'), sha1_2)
248
        expected_lines.extend([
249
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
250
            'd\x0f',
251
            # target length
252
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
253
            # copy the line common
254
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
255
            # add the line different, and the trailing newline
256
            '\x0adifferent\n', # insert 10 bytes
257
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
258
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
259
        self.assertEqual(sum(map(len, expected_lines)), end_point)
260
261
    def test_three_nosha_delta(self):
262
        # The first interesting test: make a change that should use lines from
263
        # both parents.
264
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
265
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
266
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
267
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
268
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
269
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
270
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
271
            'new\ncommon very very long line\nwith some extra text\n'
272
            'different\nmoredifferent\nand then some more\n',
273
            None)
274
        self.assertEqual(
275
            sha_string('new\ncommon very very long line\nwith some extra text\n'
276
                       'different\nmoredifferent\nand then some more\n'),
277
            sha1_3)
278
        expected_lines.extend([
279
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
280
            'd\x0c',
281
            # target length
282
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
283
            # insert new
284
            '\x04new\n',
285
            # Copy of first parent 'common' range
286
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
287
            # Copy of second parent 'different' range
288
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
289
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
290
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
291
        self.assertEqual(sum(map(len, expected_lines)), end_point)
292
293
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
294
class TestGroupCompressBlock(tests.TestCase):
295
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
296
    def make_block(self, key_to_text):
297
        """Create a GroupCompressBlock, filling it with the given texts."""
298
        compressor = groupcompress.GroupCompressor()
299
        start = 0
300
        for key in sorted(key_to_text):
301
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
302
        locs = dict((key, (start, end)) for key, (start, _, end, _)
303
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
304
        block = compressor.flush()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
305
        raw_bytes = block.to_bytes()
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
306
        # Go through from_bytes(to_bytes()) so that we start with a compressed
307
        # content object
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
308
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
309
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
310
    def test_from_empty_bytes(self):
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
311
        self.assertRaises(ValueError,
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
312
                          groupcompress.GroupCompressBlock.from_bytes, '')
313
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
314
    def test_from_minimal_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
315
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.38.4 by John Arbash Meinel
Another disk format change.
316
            'gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
317
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
318
        self.assertIs(None, block._content)
319
        self.assertEqual('', block._z_content)
320
        block._ensure_content()
3735.32.5 by John Arbash Meinel
Change the parsing code to start out just holding the compressed bytes.
321
        self.assertEqual('', block._content)
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
322
        self.assertEqual('', block._z_content)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
323
        block._ensure_content() # Ensure content is safe to call 2x
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
324
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
325
    def test_from_invalid(self):
326
        self.assertRaises(ValueError,
327
                          groupcompress.GroupCompressBlock.from_bytes,
328
                          'this is not a valid header')
329
3735.38.4 by John Arbash Meinel
Another disk format change.
330
    def test_from_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
331
        content = ('a tiny bit of content\n')
332
        z_content = zlib.compress(content)
333
        z_bytes = (
334
            'gcb1z\n' # group compress block v1 plain
335
            '%d\n' # Length of compressed content
336
            '%d\n' # Length of uncompressed content
337
            '%s'   # Compressed content
3735.38.4 by John Arbash Meinel
Another disk format change.
338
            ) % (len(z_content), len(content), z_content)
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
339
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
340
            z_bytes)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
341
        self.assertEqual(z_content, block._z_content)
342
        self.assertIs(None, block._content)
3735.38.4 by John Arbash Meinel
Another disk format change.
343
        self.assertEqual(len(z_content), block._z_content_length)
344
        self.assertEqual(len(content), block._content_length)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
345
        block._ensure_content()
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
346
        self.assertEqual(z_content, block._z_content)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
347
        self.assertEqual(content, block._content)
348
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
349
    def test_to_bytes(self):
3735.38.4 by John Arbash Meinel
Another disk format change.
350
        content = ('this is some content\n'
351
                   'this content will be compressed\n')
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
352
        gcb = groupcompress.GroupCompressBlock()
3735.38.4 by John Arbash Meinel
Another disk format change.
353
        gcb.set_content(content)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
354
        bytes = gcb.to_bytes()
3735.38.4 by John Arbash Meinel
Another disk format change.
355
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
356
        self.assertEqual(gcb._content_length, len(content))
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
357
        expected_header =('gcb1z\n' # group compress block v1 zlib
3735.38.4 by John Arbash Meinel
Another disk format change.
358
                          '%d\n' # Length of compressed content
359
                          '%d\n' # Length of uncompressed content
360
                         ) % (gcb._z_content_length, gcb._content_length)
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
361
        self.assertStartsWith(bytes, expected_header)
362
        remaining_bytes = bytes[len(expected_header):]
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
363
        raw_bytes = zlib.decompress(remaining_bytes)
3735.38.4 by John Arbash Meinel
Another disk format change.
364
        self.assertEqual(content, raw_bytes)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
365
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
366
    def test_partial_decomp(self):
367
        content_chunks = []
368
        # We need a sufficient amount of data so that zlib.decompress has
369
        # partial decompression to work with. Most auto-generated data
370
        # compresses a bit too well, we want a combination, so we combine a sha
371
        # hash with compressible data.
372
        for i in xrange(2048):
373
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
374
            content_chunks.append(next_content)
375
            next_sha1 = osutils.sha_string(next_content)
376
            content_chunks.append(next_sha1 + '\n')
377
        content = ''.join(content_chunks)
378
        self.assertEqual(158634, len(content))
379
        z_content = zlib.compress(content)
380
        self.assertEqual(57182, len(z_content))
381
        block = groupcompress.GroupCompressBlock()
382
        block._z_content = z_content
383
        block._z_content_length = len(z_content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
384
        block._compressor_name = 'zlib'
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
385
        block._content_length = 158634
386
        self.assertIs(None, block._content)
387
        block._ensure_content(100)
388
        self.assertIsNot(None, block._content)
389
        # We have decompressed at least 100 bytes
390
        self.assertTrue(len(block._content) >= 100)
391
        # We have not decompressed the whole content
392
        self.assertTrue(len(block._content) < 158634)
393
        self.assertEqualDiff(content[:len(block._content)], block._content)
394
        # ensuring content that we already have shouldn't cause any more data
395
        # to be extracted
396
        cur_len = len(block._content)
397
        block._ensure_content(cur_len - 10)
398
        self.assertEqual(cur_len, len(block._content))
399
        # Now we want a bit more content
400
        cur_len += 10
401
        block._ensure_content(cur_len)
402
        self.assertTrue(len(block._content) >= cur_len)
403
        self.assertTrue(len(block._content) < 158634)
404
        self.assertEqualDiff(content[:len(block._content)], block._content)
405
        # And now lets finish
406
        block._ensure_content(158634)
407
        self.assertEqualDiff(content, block._content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
408
        # And the decompressor is finalized
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
409
        self.assertIs(None, block._z_content_decompressor)
410
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
411
    def test_partial_decomp_no_known_length(self):
412
        content_chunks = []
413
        for i in xrange(2048):
414
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
415
            content_chunks.append(next_content)
416
            next_sha1 = osutils.sha_string(next_content)
417
            content_chunks.append(next_sha1 + '\n')
418
        content = ''.join(content_chunks)
419
        self.assertEqual(158634, len(content))
420
        z_content = zlib.compress(content)
421
        self.assertEqual(57182, len(z_content))
422
        block = groupcompress.GroupCompressBlock()
423
        block._z_content = z_content
424
        block._z_content_length = len(z_content)
425
        block._compressor_name = 'zlib'
426
        block._content_length = None # Don't tell the decompressed length
427
        self.assertIs(None, block._content)
428
        block._ensure_content(100)
429
        self.assertIsNot(None, block._content)
430
        # We have decompressed at least 100 bytes
431
        self.assertTrue(len(block._content) >= 100)
432
        # We have not decompressed the whole content
433
        self.assertTrue(len(block._content) < 158634)
434
        self.assertEqualDiff(content[:len(block._content)], block._content)
435
        # ensuring content that we already have shouldn't cause any more data
436
        # to be extracted
437
        cur_len = len(block._content)
438
        block._ensure_content(cur_len - 10)
439
        self.assertEqual(cur_len, len(block._content))
440
        # Now we want a bit more content
441
        cur_len += 10
442
        block._ensure_content(cur_len)
443
        self.assertTrue(len(block._content) >= cur_len)
444
        self.assertTrue(len(block._content) < 158634)
445
        self.assertEqualDiff(content[:len(block._content)], block._content)
446
        # And now lets finish
447
        block._ensure_content()
448
        self.assertEqualDiff(content, block._content)
449
        # And the decompressor is finalized
450
        self.assertIs(None, block._z_content_decompressor)
451
4300.1.1 by John Arbash Meinel
Add the ability to convert a gc block into 'human readable' form.
452
    def test__dump(self):
453
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
454
        key_to_text = {('1',): dup_content + '1 unique\n',
455
                       ('2',): dup_content + '2 extra special\n'}
456
        locs, block = self.make_block(key_to_text)
457
        self.assertEqual([('f', len(key_to_text[('1',)])),
458
                          ('d', 21, len(key_to_text[('2',)]),
459
                           [('c', 2, len(dup_content)),
460
                            ('i', len('2 extra special\n'), '')
461
                           ]),
462
                         ], block._dump())
463
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
464
465
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
466
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
467
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
468
                     dir='.'):
469
        t = self.get_transport(dir)
470
        t.ensure_base()
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
471
        vf = groupcompress.make_pack_factory(graph=create_graph,
472
            delta=False, keylength=keylength)(t)
473
        if do_cleanup:
474
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
475
        return vf
476
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
477
478
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
479
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
480
    def make_g_index(self, name, ref_lists=0, nodes=[]):
481
        builder = btree_index.BTreeBuilder(ref_lists)
482
        for node, references, value in nodes:
483
            builder.add_node(node, references, value)
484
        stream = builder.finish()
485
        trans = self.get_transport()
486
        size = trans.put_file(name, stream)
487
        return btree_index.BTreeGraphIndex(trans, name, size)
488
489
    def make_g_index_missing_parent(self):
490
        graph_index = self.make_g_index('missing_parent', 1,
491
            [(('parent', ), '2 78 2 10', ([],)),
492
             (('tip', ), '2 78 2 10',
493
              ([('parent', ), ('missing-parent', )],)),
494
              ])
495
        return graph_index
496
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
497
    def test_get_record_stream_as_requested(self):
498
        # Consider promoting 'as-requested' to general availability, and
499
        # make this a VF interface test
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
500
        vf = self.make_test_vf(False, dir='source')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
501
        vf.add_lines(('a',), (), ['lines\n'])
502
        vf.add_lines(('b',), (), ['lines\n'])
503
        vf.add_lines(('c',), (), ['lines\n'])
504
        vf.add_lines(('d',), (), ['lines\n'])
505
        vf.writer.end()
506
        keys = [record.key for record in vf.get_record_stream(
507
                    [('a',), ('b',), ('c',), ('d',)],
508
                    'as-requested', False)]
509
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
510
        keys = [record.key for record in vf.get_record_stream(
511
                    [('b',), ('a',), ('d',), ('c',)],
512
                    'as-requested', False)]
513
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
514
515
        # It should work even after being repacked into another VF
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
516
        vf2 = self.make_test_vf(False, dir='target')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
517
        vf2.insert_record_stream(vf.get_record_stream(
518
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
519
        vf2.writer.end()
520
521
        keys = [record.key for record in vf2.get_record_stream(
522
                    [('a',), ('b',), ('c',), ('d',)],
523
                    'as-requested', False)]
524
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
525
        keys = [record.key for record in vf2.get_record_stream(
526
                    [('b',), ('a',), ('d',), ('c',)],
527
                    'as-requested', False)]
528
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
529
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
530
    def test_insert_record_stream_re_uses_blocks(self):
531
        vf = self.make_test_vf(True, dir='source')
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
532
        def grouped_stream(revision_ids, first_parents=()):
533
            parents = first_parents
534
            for revision_id in revision_ids:
535
                key = (revision_id,)
536
                record = versionedfile.FulltextContentFactory(
537
                    key, parents, None,
538
                    'some content that is\n'
539
                    'identical except for\n'
540
                    'revision_id:%s\n' % (revision_id,))
541
                yield record
542
                parents = (key,)
543
        # One group, a-d
544
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
545
        # Second group, e-h
546
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
547
                                               first_parents=(('d',),)))
548
        block_bytes = {}
549
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
550
                                      'unordered', False)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
551
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
552
        for record in stream:
553
            if record.key in [('a',), ('e',)]:
554
                self.assertEqual('groupcompress-block', record.storage_kind)
555
            else:
556
                self.assertEqual('groupcompress-block-ref',
557
                                 record.storage_kind)
558
            block_bytes[record.key] = record._manager._block._z_content
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
559
            num_records += 1
560
        self.assertEqual(8, num_records)
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
561
        for r in 'abcd':
562
            key = (r,)
563
            self.assertIs(block_bytes[key], block_bytes[('a',)])
564
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
565
        for r in 'efgh':
566
            key = (r,)
567
            self.assertIs(block_bytes[key], block_bytes[('e',)])
568
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
569
        # Now copy the blocks into another vf, and ensure that the blocks are
570
        # preserved without creating new entries
571
        vf2 = self.make_test_vf(True, dir='target')
572
        # ordering in 'groupcompress' order, should actually swap the groups in
573
        # the target vf, but the groups themselves should not be disturbed.
574
        vf2.insert_record_stream(vf.get_record_stream(
575
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
576
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
577
                                       'groupcompress', False)
578
        vf2.writer.end()
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
579
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
580
        for record in stream:
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
581
            num_records += 1
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
582
            self.assertEqual(block_bytes[record.key],
583
                             record._manager._block._z_content)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
584
        self.assertEqual(8, num_records)
585
586
    def test__insert_record_stream_no_reuse_block(self):
587
        vf = self.make_test_vf(True, dir='source')
588
        def grouped_stream(revision_ids, first_parents=()):
589
            parents = first_parents
590
            for revision_id in revision_ids:
591
                key = (revision_id,)
592
                record = versionedfile.FulltextContentFactory(
593
                    key, parents, None,
594
                    'some content that is\n'
595
                    'identical except for\n'
596
                    'revision_id:%s\n' % (revision_id,))
597
                yield record
598
                parents = (key,)
599
        # One group, a-d
600
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
601
        # Second group, e-h
602
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
603
                                               first_parents=(('d',),)))
604
        vf.writer.end()
605
        self.assertEqual(8, len(list(vf.get_record_stream(
606
                                        [(r,) for r in 'abcdefgh'],
607
                                        'unordered', False))))
608
        # Now copy the blocks into another vf, and ensure that the blocks are
609
        # preserved without creating new entries
610
        vf2 = self.make_test_vf(True, dir='target')
611
        # ordering in 'groupcompress' order, should actually swap the groups in
612
        # the target vf, but the groups themselves should not be disturbed.
613
        list(vf2._insert_record_stream(vf.get_record_stream(
614
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
615
            reuse_blocks=False))
616
        vf2.writer.end()
617
        # After inserting with reuse_blocks=False, we should have everything in
618
        # a single new block.
619
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
620
                                       'groupcompress', False)
621
        block = None
622
        for record in stream:
623
            if block is None:
624
                block = record._manager._block
625
            else:
626
                self.assertIs(block, record._manager._block)
627
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
628
    def test_add_missing_noncompression_parent_unvalidated_index(self):
629
        unvalidated = self.make_g_index_missing_parent()
630
        combined = _mod_index.CombinedGraphIndex([unvalidated])
631
        index = groupcompress._GCGraphIndex(combined,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
632
            is_locked=lambda: True, parents=True,
633
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
634
        index.scan_unvalidated_index(unvalidated)
635
        self.assertEqual(
636
            frozenset([('missing-parent',)]), index.get_missing_parents())
637
638
    def test_track_external_parent_refs(self):
639
        g_index = self.make_g_index('empty', 1, [])
640
        mod_index = btree_index.BTreeBuilder(1, 1)
641
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
642
        index = groupcompress._GCGraphIndex(combined,
643
            is_locked=lambda: True, parents=True,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
644
            add_callback=mod_index.add_nodes,
645
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
646
        index.add_records([
647
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
648
        self.assertEqual(
649
            frozenset([('parent-1',), ('parent-2',)]),
650
            index.get_missing_parents())
651
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
652
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
653
class TestLazyGroupCompress(tests.TestCaseWithTransport):
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
654
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
655
    _texts = {
656
        ('key1',): "this is a text\n"
657
                   "with a reasonable amount of compressible bytes\n",
658
        ('key2',): "another text\n"
659
                   "with a reasonable amount of compressible bytes\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
660
        ('key3',): "yet another text which won't be extracted\n"
661
                   "with a reasonable amount of compressible bytes\n",
662
        ('key4',): "this will be extracted\n"
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
663
                   "but references most of its bytes from\n"
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
664
                   "yet another text which won't be extracted\n"
665
                   "with a reasonable amount of compressible bytes\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
666
    }
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
667
    def make_block(self, key_to_text):
668
        """Create a GroupCompressBlock, filling it with the given texts."""
669
        compressor = groupcompress.GroupCompressor()
670
        start = 0
671
        for key in sorted(key_to_text):
672
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
673
        locs = dict((key, (start, end)) for key, (start, _, end, _)
674
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
675
        block = compressor.flush()
676
        raw_bytes = block.to_bytes()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
677
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
678
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
679
    def add_key_to_manager(self, key, locations, block, manager):
680
        start, end = locations[key]
681
        manager.add_factory(key, (), start, end)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
682
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
683
    def test_get_fulltexts(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
684
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
685
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
686
        self.add_key_to_manager(('key1',), locations, block, manager)
687
        self.add_key_to_manager(('key2',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
688
        result_order = []
689
        for record in manager.get_record_stream():
690
            result_order.append(record.key)
691
            text = self._texts[record.key]
692
            self.assertEqual(text, record.get_bytes_as('fulltext'))
693
        self.assertEqual([('key1',), ('key2',)], result_order)
694
695
        # If we build the manager in the opposite order, we should get them
696
        # back in the opposite order
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
697
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
698
        self.add_key_to_manager(('key2',), locations, block, manager)
699
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
700
        result_order = []
701
        for record in manager.get_record_stream():
702
            result_order.append(record.key)
703
            text = self._texts[record.key]
704
            self.assertEqual(text, record.get_bytes_as('fulltext'))
705
        self.assertEqual([('key2',), ('key1',)], result_order)
706
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
707
    def test__wire_bytes_no_keys(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
708
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
709
        manager = groupcompress._LazyGroupContentManager(block)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
710
        wire_bytes = manager._wire_bytes()
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
711
        block_length = len(block.to_bytes())
3735.32.24 by John Arbash Meinel
_wire_bytes() now strips groups as necessary, as does _insert_record_stream
712
        # We should have triggered a strip, since we aren't using any content
713
        stripped_block = manager._block.to_bytes()
714
        self.assertTrue(block_length > len(stripped_block))
715
        empty_z_header = zlib.compress('')
716
        self.assertEqual('groupcompress-block\n'
717
                         '8\n' # len(compress(''))
718
                         '0\n' # len('')
719
                         '%d\n'# compressed block len
720
                         '%s'  # zheader
721
                         '%s'  # block
722
                         % (len(stripped_block), empty_z_header,
723
                            stripped_block),
724
                         wire_bytes)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
725
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
726
    def test__wire_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
727
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
728
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
729
        self.add_key_to_manager(('key1',), locations, block, manager)
730
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
731
        block_bytes = block.to_bytes()
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
732
        wire_bytes = manager._wire_bytes()
733
        (storage_kind, z_header_len, header_len,
734
         block_len, rest) = wire_bytes.split('\n', 4)
735
        z_header_len = int(z_header_len)
736
        header_len = int(header_len)
737
        block_len = int(block_len)
738
        self.assertEqual('groupcompress-block', storage_kind)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
739
        self.assertEqual(33, z_header_len)
740
        self.assertEqual(25, header_len)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
741
        self.assertEqual(len(block_bytes), block_len)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
742
        z_header = rest[:z_header_len]
743
        header = zlib.decompress(z_header)
744
        self.assertEqual(header_len, len(header))
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
745
        entry1 = locations[('key1',)]
746
        entry4 = locations[('key4',)]
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
747
        self.assertEqualDiff('key1\n'
748
                             '\n'  # no parents
749
                             '%d\n' # start offset
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
750
                             '%d\n' # end offset
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
751
                             'key4\n'
752
                             '\n'
753
                             '%d\n'
754
                             '%d\n'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
755
                             % (entry1[0], entry1[1],
756
                                entry4[0], entry4[1]),
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
757
                            header)
758
        z_block = rest[z_header_len:]
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
759
        self.assertEqual(block_bytes, z_block)
760
761
    def test_from_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
762
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
763
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
764
        self.add_key_to_manager(('key1',), locations, block, manager)
765
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
766
        wire_bytes = manager._wire_bytes()
767
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
3735.32.18 by John Arbash Meinel
We now support generating a network stream.
768
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
769
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
770
        self.assertEqual(2, len(manager._factories))
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
771
        self.assertEqual(block._z_content, manager._block._z_content)
772
        result_order = []
773
        for record in manager.get_record_stream():
774
            result_order.append(record.key)
775
            text = self._texts[record.key]
776
            self.assertEqual(text, record.get_bytes_as('fulltext'))
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
777
        self.assertEqual([('key1',), ('key4',)], result_order)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
778
779
    def test__check_rebuild_no_changes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
780
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
781
        manager = groupcompress._LazyGroupContentManager(block)
782
        # Request all the keys, which ensures that we won't rebuild
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
783
        self.add_key_to_manager(('key1',), locations, block, manager)
784
        self.add_key_to_manager(('key2',), locations, block, manager)
785
        self.add_key_to_manager(('key3',), locations, block, manager)
786
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
787
        manager._check_rebuild_block()
788
        self.assertIs(block, manager._block)
789
790
    def test__check_rebuild_only_one(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
791
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
792
        manager = groupcompress._LazyGroupContentManager(block)
793
        # Request just the first key, which should trigger a 'strip' action
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
794
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
795
        manager._check_rebuild_block()
796
        self.assertIsNot(block, manager._block)
797
        self.assertTrue(block._content_length > manager._block._content_length)
798
        # We should be able to still get the content out of this block, though
799
        # it should only have 1 entry
800
        for record in manager.get_record_stream():
801
            self.assertEqual(('key1',), record.key)
802
            self.assertEqual(self._texts[record.key],
803
                             record.get_bytes_as('fulltext'))
804
805
    def test__check_rebuild_middle(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
806
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
807
        manager = groupcompress._LazyGroupContentManager(block)
808
        # Request a small key in the middle should trigger a 'rebuild'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
809
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
810
        manager._check_rebuild_block()
811
        self.assertIsNot(block, manager._block)
812
        self.assertTrue(block._content_length > manager._block._content_length)
813
        for record in manager.get_record_stream():
814
            self.assertEqual(('key4',), record.key)
815
            self.assertEqual(self._texts[record.key],
816
                             record.get_bytes_as('fulltext'))