/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
1
# Copyright (C) 2008, 2009 Canonical Ltd
2
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
16
17
"""Tests for group compression."""
18
19
import zlib
20
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
21
from bzrlib import (
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
22
    btree_index,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
23
    groupcompress,
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
24
    errors,
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
25
    index as _mod_index,
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
26
    osutils,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
27
    tests,
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
28
    trace,
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
29
    versionedfile,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
30
    )
0.23.58 by John Arbash Meinel
fix up the failing tests.
31
from bzrlib.osutils import sha_string
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
32
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
33
34
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
35
def load_tests(standard_tests, module, loader):
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
36
    """Parameterize tests for all versions of groupcompress."""
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
37
    to_adapt, result = tests.split_suite_by_condition(
38
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
39
    scenarios = [
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
        ]
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
42
    if CompiledGroupCompressFeature.available():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
43
        scenarios.append(('C',
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
45
    return tests.multiply_tests(to_adapt, scenarios, result)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
46
47
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
48
class TestGroupCompressor(tests.TestCase):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
49
50
    def _chunks_to_repr_lines(self, chunks):
51
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
52
53
    def assertEqualDiffEncoded(self, expected, actual):
54
        """Compare the actual content to the expected content.
55
56
        :param expected: A group of chunks that we expect to see
57
        :param actual: The measured 'chunks'
58
59
        We will transform the chunks back into lines, and then run 'repr()'
60
        over them to handle non-ascii characters.
61
        """
62
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
                             self._chunks_to_repr_lines(actual))
64
65
66
class TestAllGroupCompressors(TestGroupCompressor):
0.17.2 by Robert Collins
Core proof of concept working.
67
    """Tests for GroupCompressor"""
68
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
69
    compressor = None # Set by multiply_tests
70
0.17.2 by Robert Collins
Core proof of concept working.
71
    def test_empty_delta(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
72
        compressor = self.compressor()
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
73
        self.assertEqual([], compressor.chunks)
0.17.2 by Robert Collins
Core proof of concept working.
74
75
    def test_one_nosha_delta(self):
76
        # diff against NUKK
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
77
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
79
            'strange\ncommon\n', None)
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
82
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
83
        self.assertEqual(0, start_point)
0.17.2 by Robert Collins
Core proof of concept working.
84
        self.assertEqual(sum(map(len, expected_lines)), end_point)
85
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
86
    def test_empty_content(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
87
        compressor = self.compressor()
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
88
        # Adding empty bytes should return the 'null' record
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
91
        self.assertEqual(0, start_point)
92
        self.assertEqual(0, end_point)
93
        self.assertEqual('fulltext', kind)
94
        self.assertEqual(groupcompress._null_sha1, sha1)
95
        self.assertEqual(0, compressor.endpoint)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
96
        self.assertEqual([], compressor.chunks)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
97
        # Even after adding some content
98
        compressor.compress(('content',), 'some\nbytes\n', None)
99
        self.assertTrue(compressor.endpoint > 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
102
        self.assertEqual(0, start_point)
103
        self.assertEqual(0, end_point)
104
        self.assertEqual('fulltext', kind)
105
        self.assertEqual(groupcompress._null_sha1, sha1)
106
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
107
    def test_extract_from_compressor(self):
108
        # Knit fetching will try to reconstruct texts locally which results in
109
        # reading something that is in the compressor stream already.
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
110
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
111
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
113
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
116
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
117
        self.assertEqual(('strange\ncommon long line\n'
118
                          'that needs a 16 byte match\n', sha1_1),
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
119
                         compressor.extract(('label',)))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
120
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
121
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
                          'different\n', sha1_2),
123
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
124
4241.17.2 by John Arbash Meinel
PythonGroupCompressor needs to support pop_last() properly.
125
    def test_pop_last(self):
126
        compressor = self.compressor()
127
        _, _, _, _ = compressor.compress(('key1',),
128
            'some text\nfor the first entry\n', None)
129
        expected_lines = list(compressor.chunks)
130
        _, _, _, _ = compressor.compress(('key2',),
131
            'some text\nfor the second entry\n', None)
132
        compressor.pop_last()
133
        self.assertEqual(expected_lines, compressor.chunks)
134
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
135
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
136
class TestPyrexGroupCompressor(TestGroupCompressor):
137
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
138
    _test_needs_features = [CompiledGroupCompressFeature]
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
139
    compressor = groupcompress.PyrexGroupCompressor
140
141
    def test_stats(self):
142
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
143
        compressor.compress(('label',),
144
                            'strange\n'
145
                            'common very very long line\n'
146
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
147
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
148
                            'common very very long line\n'
149
                            'plus more text\n'
150
                            'different\n'
151
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
152
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
153
                            'new\n'
154
                            'common very very long line\n'
155
                            'plus more text\n'
156
                            'different\n'
157
                            'moredifferent\n', None)
158
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
159
160
    def test_two_nosha_delta(self):
161
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
162
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
164
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
        self.assertEqual(sha_string('common long line\n'
168
                                    'that needs a 16 byte match\n'
169
                                    'different\n'), sha1_2)
170
        expected_lines.extend([
171
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
172
            'd\x0f',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
173
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
174
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
175
            # copy the line common
176
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
            # add the line different, and the trailing newline
178
            '\x0adifferent\n', # insert 10 bytes
179
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
180
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
181
        self.assertEqual(sum(map(len, expected_lines)), end_point)
182
183
    def test_three_nosha_delta(self):
184
        # The first interesting test: make a change that should use lines from
185
        # both parents.
186
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
187
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
190
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
191
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
193
            'new\ncommon very very long line\nwith some extra text\n'
194
            'different\nmoredifferent\nand then some more\n',
195
            None)
196
        self.assertEqual(
197
            sha_string('new\ncommon very very long line\nwith some extra text\n'
198
                       'different\nmoredifferent\nand then some more\n'),
199
            sha1_3)
200
        expected_lines.extend([
201
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
202
            'd\x0b',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
203
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
204
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
205
            # insert new
206
            '\x03new',
207
            # Copy of first parent 'common' range
208
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
            # Copy of second parent 'different' range
210
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
211
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
212
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
213
        self.assertEqual(sum(map(len, expected_lines)), end_point)
214
215
216
class TestPythonGroupCompressor(TestGroupCompressor):
217
218
    compressor = groupcompress.PythonGroupCompressor
219
220
    def test_stats(self):
221
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
222
        compressor.compress(('label',),
223
                            'strange\n'
224
                            'common very very long line\n'
225
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
226
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
227
                            'common very very long line\n'
228
                            'plus more text\n'
229
                            'different\n'
230
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
231
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
232
                            'new\n'
233
                            'common very very long line\n'
234
                            'plus more text\n'
235
                            'different\n'
236
                            'moredifferent\n', None)
237
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
238
239
    def test_two_nosha_delta(self):
240
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
241
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
243
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
        self.assertEqual(sha_string('common long line\n'
247
                                    'that needs a 16 byte match\n'
248
                                    'different\n'), sha1_2)
249
        expected_lines.extend([
250
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
251
            'd\x0f',
252
            # target length
253
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
254
            # copy the line common
255
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
            # add the line different, and the trailing newline
257
            '\x0adifferent\n', # insert 10 bytes
258
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
259
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
260
        self.assertEqual(sum(map(len, expected_lines)), end_point)
261
262
    def test_three_nosha_delta(self):
263
        # The first interesting test: make a change that should use lines from
264
        # both parents.
265
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
266
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
269
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
270
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
272
            'new\ncommon very very long line\nwith some extra text\n'
273
            'different\nmoredifferent\nand then some more\n',
274
            None)
275
        self.assertEqual(
276
            sha_string('new\ncommon very very long line\nwith some extra text\n'
277
                       'different\nmoredifferent\nand then some more\n'),
278
            sha1_3)
279
        expected_lines.extend([
280
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
281
            'd\x0c',
282
            # target length
283
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
284
            # insert new
285
            '\x04new\n',
286
            # Copy of first parent 'common' range
287
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
            # Copy of second parent 'different' range
289
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
290
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
291
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
292
        self.assertEqual(sum(map(len, expected_lines)), end_point)
293
294
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
295
class TestGroupCompressBlock(tests.TestCase):
296
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
297
    def make_block(self, key_to_text):
298
        """Create a GroupCompressBlock, filling it with the given texts."""
299
        compressor = groupcompress.GroupCompressor()
300
        start = 0
301
        for key in sorted(key_to_text):
302
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
303
        locs = dict((key, (start, end)) for key, (start, _, end, _)
304
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
305
        block = compressor.flush()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
306
        raw_bytes = block.to_bytes()
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
307
        # Go through from_bytes(to_bytes()) so that we start with a compressed
308
        # content object
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
309
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
310
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
311
    def test_from_empty_bytes(self):
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
312
        self.assertRaises(ValueError,
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
313
                          groupcompress.GroupCompressBlock.from_bytes, '')
314
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
315
    def test_from_minimal_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
316
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.38.4 by John Arbash Meinel
Another disk format change.
317
            'gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
318
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
319
        self.assertIs(None, block._content)
320
        self.assertEqual('', block._z_content)
321
        block._ensure_content()
3735.32.5 by John Arbash Meinel
Change the parsing code to start out just holding the compressed bytes.
322
        self.assertEqual('', block._content)
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
323
        self.assertEqual('', block._z_content)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
324
        block._ensure_content() # Ensure content is safe to call 2x
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
325
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
326
    def test_from_invalid(self):
327
        self.assertRaises(ValueError,
328
                          groupcompress.GroupCompressBlock.from_bytes,
329
                          'this is not a valid header')
330
3735.38.4 by John Arbash Meinel
Another disk format change.
331
    def test_from_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
332
        content = ('a tiny bit of content\n')
333
        z_content = zlib.compress(content)
334
        z_bytes = (
335
            'gcb1z\n' # group compress block v1 plain
336
            '%d\n' # Length of compressed content
337
            '%d\n' # Length of uncompressed content
338
            '%s'   # Compressed content
3735.38.4 by John Arbash Meinel
Another disk format change.
339
            ) % (len(z_content), len(content), z_content)
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
340
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
341
            z_bytes)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
342
        self.assertEqual(z_content, block._z_content)
343
        self.assertIs(None, block._content)
3735.38.4 by John Arbash Meinel
Another disk format change.
344
        self.assertEqual(len(z_content), block._z_content_length)
345
        self.assertEqual(len(content), block._content_length)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
346
        block._ensure_content()
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
347
        self.assertEqual(z_content, block._z_content)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
348
        self.assertEqual(content, block._content)
349
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
350
    def test_to_bytes(self):
3735.38.4 by John Arbash Meinel
Another disk format change.
351
        content = ('this is some content\n'
352
                   'this content will be compressed\n')
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
353
        gcb = groupcompress.GroupCompressBlock()
3735.38.4 by John Arbash Meinel
Another disk format change.
354
        gcb.set_content(content)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
355
        bytes = gcb.to_bytes()
3735.38.4 by John Arbash Meinel
Another disk format change.
356
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
        self.assertEqual(gcb._content_length, len(content))
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
358
        expected_header =('gcb1z\n' # group compress block v1 zlib
3735.38.4 by John Arbash Meinel
Another disk format change.
359
                          '%d\n' # Length of compressed content
360
                          '%d\n' # Length of uncompressed content
361
                         ) % (gcb._z_content_length, gcb._content_length)
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
362
        self.assertStartsWith(bytes, expected_header)
363
        remaining_bytes = bytes[len(expected_header):]
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
364
        raw_bytes = zlib.decompress(remaining_bytes)
3735.38.4 by John Arbash Meinel
Another disk format change.
365
        self.assertEqual(content, raw_bytes)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
366
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
367
        # we should get the same results if using the chunked version
368
        gcb = groupcompress.GroupCompressBlock()
369
        gcb.set_chunked_content(['this is some content\n'
4469.1.2 by John Arbash Meinel
The only caller already knows the content length, so make the api such that
370
                                 'this content will be compressed\n'],
371
                                 len(content))
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
372
        old_bytes = bytes
373
        bytes = gcb.to_bytes()
374
        self.assertEqual(old_bytes, bytes)
375
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
376
    def test_partial_decomp(self):
377
        content_chunks = []
378
        # We need a sufficient amount of data so that zlib.decompress has
379
        # partial decompression to work with. Most auto-generated data
380
        # compresses a bit too well, we want a combination, so we combine a sha
381
        # hash with compressible data.
382
        for i in xrange(2048):
383
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
384
            content_chunks.append(next_content)
385
            next_sha1 = osutils.sha_string(next_content)
386
            content_chunks.append(next_sha1 + '\n')
387
        content = ''.join(content_chunks)
388
        self.assertEqual(158634, len(content))
389
        z_content = zlib.compress(content)
390
        self.assertEqual(57182, len(z_content))
391
        block = groupcompress.GroupCompressBlock()
392
        block._z_content = z_content
393
        block._z_content_length = len(z_content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
394
        block._compressor_name = 'zlib'
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
395
        block._content_length = 158634
396
        self.assertIs(None, block._content)
397
        block._ensure_content(100)
398
        self.assertIsNot(None, block._content)
399
        # We have decompressed at least 100 bytes
400
        self.assertTrue(len(block._content) >= 100)
401
        # We have not decompressed the whole content
402
        self.assertTrue(len(block._content) < 158634)
403
        self.assertEqualDiff(content[:len(block._content)], block._content)
404
        # ensuring content that we already have shouldn't cause any more data
405
        # to be extracted
406
        cur_len = len(block._content)
407
        block._ensure_content(cur_len - 10)
408
        self.assertEqual(cur_len, len(block._content))
409
        # Now we want a bit more content
410
        cur_len += 10
411
        block._ensure_content(cur_len)
412
        self.assertTrue(len(block._content) >= cur_len)
413
        self.assertTrue(len(block._content) < 158634)
414
        self.assertEqualDiff(content[:len(block._content)], block._content)
415
        # And now lets finish
416
        block._ensure_content(158634)
417
        self.assertEqualDiff(content, block._content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
418
        # And the decompressor is finalized
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
419
        self.assertIs(None, block._z_content_decompressor)
420
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
421
    def test_partial_decomp_no_known_length(self):
422
        content_chunks = []
423
        for i in xrange(2048):
424
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
425
            content_chunks.append(next_content)
426
            next_sha1 = osutils.sha_string(next_content)
427
            content_chunks.append(next_sha1 + '\n')
428
        content = ''.join(content_chunks)
429
        self.assertEqual(158634, len(content))
430
        z_content = zlib.compress(content)
431
        self.assertEqual(57182, len(z_content))
432
        block = groupcompress.GroupCompressBlock()
433
        block._z_content = z_content
434
        block._z_content_length = len(z_content)
435
        block._compressor_name = 'zlib'
436
        block._content_length = None # Don't tell the decompressed length
437
        self.assertIs(None, block._content)
438
        block._ensure_content(100)
439
        self.assertIsNot(None, block._content)
440
        # We have decompressed at least 100 bytes
441
        self.assertTrue(len(block._content) >= 100)
442
        # We have not decompressed the whole content
443
        self.assertTrue(len(block._content) < 158634)
444
        self.assertEqualDiff(content[:len(block._content)], block._content)
445
        # ensuring content that we already have shouldn't cause any more data
446
        # to be extracted
447
        cur_len = len(block._content)
448
        block._ensure_content(cur_len - 10)
449
        self.assertEqual(cur_len, len(block._content))
450
        # Now we want a bit more content
451
        cur_len += 10
452
        block._ensure_content(cur_len)
453
        self.assertTrue(len(block._content) >= cur_len)
454
        self.assertTrue(len(block._content) < 158634)
455
        self.assertEqualDiff(content[:len(block._content)], block._content)
456
        # And now lets finish
457
        block._ensure_content()
458
        self.assertEqualDiff(content, block._content)
459
        # And the decompressor is finalized
460
        self.assertIs(None, block._z_content_decompressor)
461
4300.1.1 by John Arbash Meinel
Add the ability to convert a gc block into 'human readable' form.
462
    def test__dump(self):
463
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
464
        key_to_text = {('1',): dup_content + '1 unique\n',
465
                       ('2',): dup_content + '2 extra special\n'}
466
        locs, block = self.make_block(key_to_text)
467
        self.assertEqual([('f', len(key_to_text[('1',)])),
468
                          ('d', 21, len(key_to_text[('2',)]),
469
                           [('c', 2, len(dup_content)),
470
                            ('i', len('2 extra special\n'), '')
471
                           ]),
472
                         ], block._dump())
473
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
474
475
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
476
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
477
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
478
                     dir='.', inconsistency_fatal=True):
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
479
        t = self.get_transport(dir)
480
        t.ensure_base()
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
481
        vf = groupcompress.make_pack_factory(graph=create_graph,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
482
            delta=False, keylength=keylength,
483
            inconsistency_fatal=inconsistency_fatal)(t)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
484
        if do_cleanup:
485
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
486
        return vf
487
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
488
489
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
490
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
491
    def make_g_index(self, name, ref_lists=0, nodes=[]):
492
        builder = btree_index.BTreeBuilder(ref_lists)
493
        for node, references, value in nodes:
494
            builder.add_node(node, references, value)
495
        stream = builder.finish()
496
        trans = self.get_transport()
497
        size = trans.put_file(name, stream)
498
        return btree_index.BTreeGraphIndex(trans, name, size)
499
500
    def make_g_index_missing_parent(self):
501
        graph_index = self.make_g_index('missing_parent', 1,
502
            [(('parent', ), '2 78 2 10', ([],)),
503
             (('tip', ), '2 78 2 10',
504
              ([('parent', ), ('missing-parent', )],)),
505
              ])
506
        return graph_index
507
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
508
    def test_get_record_stream_as_requested(self):
509
        # Consider promoting 'as-requested' to general availability, and
510
        # make this a VF interface test
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
511
        vf = self.make_test_vf(False, dir='source')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
512
        vf.add_lines(('a',), (), ['lines\n'])
513
        vf.add_lines(('b',), (), ['lines\n'])
514
        vf.add_lines(('c',), (), ['lines\n'])
515
        vf.add_lines(('d',), (), ['lines\n'])
516
        vf.writer.end()
517
        keys = [record.key for record in vf.get_record_stream(
518
                    [('a',), ('b',), ('c',), ('d',)],
519
                    'as-requested', False)]
520
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
521
        keys = [record.key for record in vf.get_record_stream(
522
                    [('b',), ('a',), ('d',), ('c',)],
523
                    'as-requested', False)]
524
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
525
526
        # It should work even after being repacked into another VF
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
527
        vf2 = self.make_test_vf(False, dir='target')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
528
        vf2.insert_record_stream(vf.get_record_stream(
529
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
530
        vf2.writer.end()
531
532
        keys = [record.key for record in vf2.get_record_stream(
533
                    [('a',), ('b',), ('c',), ('d',)],
534
                    'as-requested', False)]
535
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
536
        keys = [record.key for record in vf2.get_record_stream(
537
                    [('b',), ('a',), ('d',), ('c',)],
538
                    'as-requested', False)]
539
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
540
4634.23.1 by Robert Collins
Cherrypick from bzr.dev: Fix bug 402652: recompress badly packed groups during fetch. (John Arbash Meinel, Robert Collins)
541
    def test_insert_record_stream_reuses_blocks(self):
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
542
        vf = self.make_test_vf(True, dir='source')
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
543
        def grouped_stream(revision_ids, first_parents=()):
544
            parents = first_parents
545
            for revision_id in revision_ids:
546
                key = (revision_id,)
547
                record = versionedfile.FulltextContentFactory(
548
                    key, parents, None,
549
                    'some content that is\n'
550
                    'identical except for\n'
551
                    'revision_id:%s\n' % (revision_id,))
552
                yield record
553
                parents = (key,)
554
        # One group, a-d
555
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
556
        # Second group, e-h
557
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
558
                                               first_parents=(('d',),)))
559
        block_bytes = {}
560
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
561
                                      'unordered', False)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
562
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
563
        for record in stream:
564
            if record.key in [('a',), ('e',)]:
565
                self.assertEqual('groupcompress-block', record.storage_kind)
566
            else:
567
                self.assertEqual('groupcompress-block-ref',
568
                                 record.storage_kind)
569
            block_bytes[record.key] = record._manager._block._z_content
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
570
            num_records += 1
571
        self.assertEqual(8, num_records)
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
572
        for r in 'abcd':
573
            key = (r,)
574
            self.assertIs(block_bytes[key], block_bytes[('a',)])
575
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
576
        for r in 'efgh':
577
            key = (r,)
578
            self.assertIs(block_bytes[key], block_bytes[('e',)])
579
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
580
        # Now copy the blocks into another vf, and ensure that the blocks are
581
        # preserved without creating new entries
582
        vf2 = self.make_test_vf(True, dir='target')
583
        # ordering in 'groupcompress' order, should actually swap the groups in
584
        # the target vf, but the groups themselves should not be disturbed.
4634.23.1 by Robert Collins
Cherrypick from bzr.dev: Fix bug 402652: recompress badly packed groups during fetch. (John Arbash Meinel, Robert Collins)
585
        def small_size_stream():
586
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
587
                                               'groupcompress', False):
588
                record._manager._full_enough_block_size = \
589
                    record._manager._block._content_length
590
                yield record
591
                        
592
        vf2.insert_record_stream(small_size_stream())
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
593
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
594
                                       'groupcompress', False)
595
        vf2.writer.end()
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
596
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
597
        for record in stream:
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
598
            num_records += 1
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
599
            self.assertEqual(block_bytes[record.key],
600
                             record._manager._block._z_content)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
601
        self.assertEqual(8, num_records)
602
4634.23.1 by Robert Collins
Cherrypick from bzr.dev: Fix bug 402652: recompress badly packed groups during fetch. (John Arbash Meinel, Robert Collins)
603
    def test_insert_record_stream_packs_on_the_fly(self):
604
        vf = self.make_test_vf(True, dir='source')
605
        def grouped_stream(revision_ids, first_parents=()):
606
            parents = first_parents
607
            for revision_id in revision_ids:
608
                key = (revision_id,)
609
                record = versionedfile.FulltextContentFactory(
610
                    key, parents, None,
611
                    'some content that is\n'
612
                    'identical except for\n'
613
                    'revision_id:%s\n' % (revision_id,))
614
                yield record
615
                parents = (key,)
616
        # One group, a-d
617
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
618
        # Second group, e-h
619
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
620
                                               first_parents=(('d',),)))
621
        # Now copy the blocks into another vf, and see that the
622
        # insert_record_stream rebuilt a new block on-the-fly because of
623
        # under-utilization
624
        vf2 = self.make_test_vf(True, dir='target')
625
        vf2.insert_record_stream(vf.get_record_stream(
626
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
627
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
628
                                       'groupcompress', False)
629
        vf2.writer.end()
630
        num_records = 0
631
        # All of the records should be recombined into a single block
632
        block = None
633
        for record in stream:
634
            num_records += 1
635
            if block is None:
636
                block = record._manager._block
637
            else:
638
                self.assertIs(block, record._manager._block)
639
        self.assertEqual(8, num_records)
640
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
641
    def test__insert_record_stream_no_reuse_block(self):
642
        vf = self.make_test_vf(True, dir='source')
643
        def grouped_stream(revision_ids, first_parents=()):
644
            parents = first_parents
645
            for revision_id in revision_ids:
646
                key = (revision_id,)
647
                record = versionedfile.FulltextContentFactory(
648
                    key, parents, None,
649
                    'some content that is\n'
650
                    'identical except for\n'
651
                    'revision_id:%s\n' % (revision_id,))
652
                yield record
653
                parents = (key,)
654
        # One group, a-d
655
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
656
        # Second group, e-h
657
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
658
                                               first_parents=(('d',),)))
659
        vf.writer.end()
660
        self.assertEqual(8, len(list(vf.get_record_stream(
661
                                        [(r,) for r in 'abcdefgh'],
662
                                        'unordered', False))))
663
        # Now copy the blocks into another vf, and ensure that the blocks are
664
        # preserved without creating new entries
665
        vf2 = self.make_test_vf(True, dir='target')
666
        # ordering in 'groupcompress' order, should actually swap the groups in
667
        # the target vf, but the groups themselves should not be disturbed.
668
        list(vf2._insert_record_stream(vf.get_record_stream(
669
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
670
            reuse_blocks=False))
671
        vf2.writer.end()
672
        # After inserting with reuse_blocks=False, we should have everything in
673
        # a single new block.
674
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
675
                                       'groupcompress', False)
676
        block = None
677
        for record in stream:
678
            if block is None:
679
                block = record._manager._block
680
            else:
681
                self.assertIs(block, record._manager._block)
682
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
683
    def test_add_missing_noncompression_parent_unvalidated_index(self):
684
        unvalidated = self.make_g_index_missing_parent()
685
        combined = _mod_index.CombinedGraphIndex([unvalidated])
686
        index = groupcompress._GCGraphIndex(combined,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
687
            is_locked=lambda: True, parents=True,
688
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
689
        index.scan_unvalidated_index(unvalidated)
690
        self.assertEqual(
691
            frozenset([('missing-parent',)]), index.get_missing_parents())
692
693
    def test_track_external_parent_refs(self):
694
        g_index = self.make_g_index('empty', 1, [])
695
        mod_index = btree_index.BTreeBuilder(1, 1)
696
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
697
        index = groupcompress._GCGraphIndex(combined,
698
            is_locked=lambda: True, parents=True,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
699
            add_callback=mod_index.add_nodes,
700
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
701
        index.add_records([
702
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
703
        self.assertEqual(
704
            frozenset([('parent-1',), ('parent-2',)]),
705
            index.get_missing_parents())
706
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
707
    def make_source_with_b(self, a_parent, path):
708
        source = self.make_test_vf(True, dir=path)
709
        source.add_lines(('a',), (), ['lines\n'])
710
        if a_parent:
711
            b_parents = (('a',),)
712
        else:
713
            b_parents = ()
714
        source.add_lines(('b',), b_parents, ['lines\n'])
715
        return source
716
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
717
    def do_inconsistent_inserts(self, inconsistency_fatal):
718
        target = self.make_test_vf(True, dir='target',
719
                                   inconsistency_fatal=inconsistency_fatal)
720
        for x in range(2):
721
            source = self.make_source_with_b(x==1, 'source%s' % x)
722
            target.insert_record_stream(source.get_record_stream(
723
                [('b',)], 'unordered', False))
724
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
725
    def test_inconsistent_redundant_inserts_warn(self):
4465.2.2 by Aaron Bentley
Add test that duplicates are skipped.
726
        """Should not insert a record that is already present."""
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
727
        warnings = []
728
        def warning(template, args):
729
            warnings.append(template % args)
730
        _trace_warning = trace.warning
731
        trace.warning = warning
732
        try:
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
733
            self.do_inconsistent_inserts(inconsistency_fatal=False)
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
734
        finally:
735
            trace.warning = _trace_warning
736
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
737
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
738
                         warnings)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
739
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
740
    def test_inconsistent_redundant_inserts_raises(self):
741
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
742
                              inconsistency_fatal=True)
743
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
744
                              " in add_records:"
745
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
746
                              " 0 8', \(\(\('a',\),\),\)\)")
747
748
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
749
class TestLazyGroupCompress(tests.TestCaseWithTransport):
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
750
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
751
    _texts = {
752
        ('key1',): "this is a text\n"
4634.23.1 by Robert Collins
Cherrypick from bzr.dev: Fix bug 402652: recompress badly packed groups during fetch. (John Arbash Meinel, Robert Collins)
753
                   "with a reasonable amount of compressible bytes\n"
754
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
755
        ('key2',): "another text\n"
4634.23.1 by Robert Collins
Cherrypick from bzr.dev: Fix bug 402652: recompress badly packed groups during fetch. (John Arbash Meinel, Robert Collins)
756
                   "with a reasonable amount of compressible bytes\n"
757
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
758
        ('key3',): "yet another text which won't be extracted\n"
4634.23.1 by Robert Collins
Cherrypick from bzr.dev: Fix bug 402652: recompress badly packed groups during fetch. (John Arbash Meinel, Robert Collins)
759
                   "with a reasonable amount of compressible bytes\n"
760
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
761
        ('key4',): "this will be extracted\n"
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
762
                   "but references most of its bytes from\n"
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
763
                   "yet another text which won't be extracted\n"
4634.23.1 by Robert Collins
Cherrypick from bzr.dev: Fix bug 402652: recompress badly packed groups during fetch. (John Arbash Meinel, Robert Collins)
764
                   "with a reasonable amount of compressible bytes\n"
765
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
766
    }
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
767
    def make_block(self, key_to_text):
768
        """Create a GroupCompressBlock, filling it with the given texts."""
769
        compressor = groupcompress.GroupCompressor()
770
        start = 0
771
        for key in sorted(key_to_text):
772
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
773
        locs = dict((key, (start, end)) for key, (start, _, end, _)
774
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
775
        block = compressor.flush()
776
        raw_bytes = block.to_bytes()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
777
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
778
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
779
    def add_key_to_manager(self, key, locations, block, manager):
780
        start, end = locations[key]
781
        manager.add_factory(key, (), start, end)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
782
4634.23.1 by Robert Collins
Cherrypick from bzr.dev: Fix bug 402652: recompress badly packed groups during fetch. (John Arbash Meinel, Robert Collins)
783
    def make_block_and_full_manager(self, texts):
784
        locations, block = self.make_block(texts)
785
        manager = groupcompress._LazyGroupContentManager(block)
786
        for key in sorted(texts):
787
            self.add_key_to_manager(key, locations, block, manager)
788
        return block, manager
789
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
790
    def test_get_fulltexts(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
791
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
792
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
793
        self.add_key_to_manager(('key1',), locations, block, manager)
794
        self.add_key_to_manager(('key2',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
795
        result_order = []
796
        for record in manager.get_record_stream():
797
            result_order.append(record.key)
798
            text = self._texts[record.key]
799
            self.assertEqual(text, record.get_bytes_as('fulltext'))
800
        self.assertEqual([('key1',), ('key2',)], result_order)
801
802
        # If we build the manager in the opposite order, we should get them
803
        # back in the opposite order
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
804
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
805
        self.add_key_to_manager(('key2',), locations, block, manager)
806
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
807
        result_order = []
808
        for record in manager.get_record_stream():
809
            result_order.append(record.key)
810
            text = self._texts[record.key]
811
            self.assertEqual(text, record.get_bytes_as('fulltext'))
812
        self.assertEqual([('key2',), ('key1',)], result_order)
813
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
814
    def test__wire_bytes_no_keys(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
815
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
816
        manager = groupcompress._LazyGroupContentManager(block)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
817
        wire_bytes = manager._wire_bytes()
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
818
        block_length = len(block.to_bytes())
3735.32.24 by John Arbash Meinel
_wire_bytes() now strips groups as necessary, as does _insert_record_stream
819
        # We should have triggered a strip, since we aren't using any content
820
        stripped_block = manager._block.to_bytes()
821
        self.assertTrue(block_length > len(stripped_block))
822
        empty_z_header = zlib.compress('')
823
        self.assertEqual('groupcompress-block\n'
824
                         '8\n' # len(compress(''))
825
                         '0\n' # len('')
826
                         '%d\n'# compressed block len
827
                         '%s'  # zheader
828
                         '%s'  # block
829
                         % (len(stripped_block), empty_z_header,
830
                            stripped_block),
831
                         wire_bytes)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
832
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
833
    def test__wire_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
834
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
835
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
836
        self.add_key_to_manager(('key1',), locations, block, manager)
837
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
838
        block_bytes = block.to_bytes()
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
839
        wire_bytes = manager._wire_bytes()
840
        (storage_kind, z_header_len, header_len,
841
         block_len, rest) = wire_bytes.split('\n', 4)
842
        z_header_len = int(z_header_len)
843
        header_len = int(header_len)
844
        block_len = int(block_len)
845
        self.assertEqual('groupcompress-block', storage_kind)
4634.23.1 by Robert Collins
Cherrypick from bzr.dev: Fix bug 402652: recompress badly packed groups during fetch. (John Arbash Meinel, Robert Collins)
846
        self.assertEqual(34, z_header_len)
847
        self.assertEqual(26, header_len)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
848
        self.assertEqual(len(block_bytes), block_len)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
849
        z_header = rest[:z_header_len]
850
        header = zlib.decompress(z_header)
851
        self.assertEqual(header_len, len(header))
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
852
        entry1 = locations[('key1',)]
853
        entry4 = locations[('key4',)]
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
854
        self.assertEqualDiff('key1\n'
855
                             '\n'  # no parents
856
                             '%d\n' # start offset
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
857
                             '%d\n' # end offset
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
858
                             'key4\n'
859
                             '\n'
860
                             '%d\n'
861
                             '%d\n'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
862
                             % (entry1[0], entry1[1],
863
                                entry4[0], entry4[1]),
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
864
                            header)
865
        z_block = rest[z_header_len:]
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
866
        self.assertEqual(block_bytes, z_block)
867
868
    def test_from_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
869
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
870
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
871
        self.add_key_to_manager(('key1',), locations, block, manager)
872
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
873
        wire_bytes = manager._wire_bytes()
874
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
3735.32.18 by John Arbash Meinel
We now support generating a network stream.
875
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
876
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
877
        self.assertEqual(2, len(manager._factories))
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
878
        self.assertEqual(block._z_content, manager._block._z_content)
879
        result_order = []
880
        for record in manager.get_record_stream():
881
            result_order.append(record.key)
882
            text = self._texts[record.key]
883
            self.assertEqual(text, record.get_bytes_as('fulltext'))
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
884
        self.assertEqual([('key1',), ('key4',)], result_order)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
885
886
    def test__check_rebuild_no_changes(self):
4634.23.1 by Robert Collins
Cherrypick from bzr.dev: Fix bug 402652: recompress badly packed groups during fetch. (John Arbash Meinel, Robert Collins)
887
        block, manager = self.make_block_and_full_manager(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
888
        manager._check_rebuild_block()
889
        self.assertIs(block, manager._block)
890
891
    def test__check_rebuild_only_one(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
892
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
893
        manager = groupcompress._LazyGroupContentManager(block)
894
        # Request just the first key, which should trigger a 'strip' action
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
895
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
896
        manager._check_rebuild_block()
897
        self.assertIsNot(block, manager._block)
898
        self.assertTrue(block._content_length > manager._block._content_length)
899
        # We should be able to still get the content out of this block, though
900
        # it should only have 1 entry
901
        for record in manager.get_record_stream():
902
            self.assertEqual(('key1',), record.key)
903
            self.assertEqual(self._texts[record.key],
904
                             record.get_bytes_as('fulltext'))
905
906
    def test__check_rebuild_middle(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
907
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
908
        manager = groupcompress._LazyGroupContentManager(block)
909
        # Request a small key in the middle should trigger a 'rebuild'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
910
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
911
        manager._check_rebuild_block()
912
        self.assertIsNot(block, manager._block)
913
        self.assertTrue(block._content_length > manager._block._content_length)
914
        for record in manager.get_record_stream():
915
            self.assertEqual(('key4',), record.key)
916
            self.assertEqual(self._texts[record.key],
917
                             record.get_bytes_as('fulltext'))
4634.23.1 by Robert Collins
Cherrypick from bzr.dev: Fix bug 402652: recompress badly packed groups during fetch. (John Arbash Meinel, Robert Collins)
918
919
    def test_check_is_well_utilized_all_keys(self):
920
        block, manager = self.make_block_and_full_manager(self._texts)
921
        self.assertFalse(manager.check_is_well_utilized())
922
        # Though we can fake it by changing the recommended minimum size
923
        manager._full_enough_block_size = block._content_length
924
        self.assertTrue(manager.check_is_well_utilized())
925
        # Setting it just above causes it to fail
926
        manager._full_enough_block_size = block._content_length + 1
927
        self.assertFalse(manager.check_is_well_utilized())
928
        # Setting the mixed-block size doesn't do anything, because the content
929
        # is considered to not be 'mixed'
930
        manager._full_enough_mixed_block_size = block._content_length
931
        self.assertFalse(manager.check_is_well_utilized())
932
933
    def test_check_is_well_utilized_mixed_keys(self):
934
        texts = {}
935
        f1k1 = ('f1', 'k1')
936
        f1k2 = ('f1', 'k2')
937
        f2k1 = ('f2', 'k1')
938
        f2k2 = ('f2', 'k2')
939
        texts[f1k1] = self._texts[('key1',)]
940
        texts[f1k2] = self._texts[('key2',)]
941
        texts[f2k1] = self._texts[('key3',)]
942
        texts[f2k2] = self._texts[('key4',)]
943
        block, manager = self.make_block_and_full_manager(texts)
944
        self.assertFalse(manager.check_is_well_utilized())
945
        manager._full_enough_block_size = block._content_length
946
        self.assertTrue(manager.check_is_well_utilized())
947
        manager._full_enough_block_size = block._content_length + 1
948
        self.assertFalse(manager.check_is_well_utilized())
949
        manager._full_enough_mixed_block_size = block._content_length
950
        self.assertTrue(manager.check_is_well_utilized())
951
952
    def test_check_is_well_utilized_partial_use(self):
953
        locations, block = self.make_block(self._texts)
954
        manager = groupcompress._LazyGroupContentManager(block)
955
        manager._full_enough_block_size = block._content_length
956
        self.add_key_to_manager(('key1',), locations, block, manager)
957
        self.add_key_to_manager(('key2',), locations, block, manager)
958
        # Just using the content from key1 and 2 is not enough to be considered
959
        # 'complete'
960
        self.assertFalse(manager.check_is_well_utilized())
961
        # However if we add key3, then we have enough, as we only require 75%
962
        # consumption
963
        self.add_key_to_manager(('key4',), locations, block, manager)
964
        self.assertTrue(manager.check_is_well_utilized())