/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5557.1.7 by John Arbash Meinel
Merge in the bzr.dev 5582
1
# Copyright (C) 2008-2011 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
16
17
"""Tests for group compression."""
18
19
import zlib
20
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
21
from .. import (
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
22
    config,
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
23
    errors,
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
24
    osutils,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
25
    tests,
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
26
    trace,
6670.4.1 by Jelmer Vernooij
Update imports.
27
    )
28
from ..bzr import (
29
    btree_index,
30
    groupcompress,
31
    index as _mod_index,
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
32
    versionedfile,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
33
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
34
from ..osutils import sha_string
35
from .test__groupcompress import compiled_groupcompress_feature
36
from .scenarios import load_tests_apply_scenarios
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
37
38
39
def group_compress_implementation_scenarios():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
40
    scenarios = [
41
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
42
        ]
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
43
    if compiled_groupcompress_feature.available():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
44
        scenarios.append(('C',
45
            {'compressor': groupcompress.PyrexGroupCompressor}))
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
46
    return scenarios
47
48
49
load_tests = load_tests_apply_scenarios
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
50
51
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
52
class TestGroupCompressor(tests.TestCase):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
53
54
    def _chunks_to_repr_lines(self, chunks):
55
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
56
57
    def assertEqualDiffEncoded(self, expected, actual):
58
        """Compare the actual content to the expected content.
59
60
        :param expected: A group of chunks that we expect to see
61
        :param actual: The measured 'chunks'
62
63
        We will transform the chunks back into lines, and then run 'repr()'
64
        over them to handle non-ascii characters.
65
        """
66
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
67
                             self._chunks_to_repr_lines(actual))
68
69
70
class TestAllGroupCompressors(TestGroupCompressor):
0.17.2 by Robert Collins
Core proof of concept working.
71
    """Tests for GroupCompressor"""
72
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
73
    scenarios = group_compress_implementation_scenarios()
74
    compressor = None # Set by scenario
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
75
0.17.2 by Robert Collins
Core proof of concept working.
76
    def test_empty_delta(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
77
        compressor = self.compressor()
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
78
        self.assertEqual([], compressor.chunks)
0.17.2 by Robert Collins
Core proof of concept working.
79
80
    def test_one_nosha_delta(self):
81
        # diff against NUKK
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
82
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
83
        sha1, start_point, end_point, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
84
            'strange\ncommon\n', None)
85
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
86
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
87
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
88
        self.assertEqual(0, start_point)
0.17.2 by Robert Collins
Core proof of concept working.
89
        self.assertEqual(sum(map(len, expected_lines)), end_point)
90
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
91
    def test_empty_content(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
92
        compressor = self.compressor()
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
93
        # Adding empty bytes should return the 'null' record
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
94
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
95
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
96
        self.assertEqual(0, start_point)
97
        self.assertEqual(0, end_point)
98
        self.assertEqual('fulltext', kind)
99
        self.assertEqual(groupcompress._null_sha1, sha1)
100
        self.assertEqual(0, compressor.endpoint)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
101
        self.assertEqual([], compressor.chunks)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
102
        # Even after adding some content
103
        compressor.compress(('content',), 'some\nbytes\n', None)
104
        self.assertTrue(compressor.endpoint > 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
105
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
106
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
107
        self.assertEqual(0, start_point)
108
        self.assertEqual(0, end_point)
109
        self.assertEqual('fulltext', kind)
110
        self.assertEqual(groupcompress._null_sha1, sha1)
111
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
112
    def test_extract_from_compressor(self):
113
        # Knit fetching will try to reconstruct texts locally which results in
114
        # reading something that is in the compressor stream already.
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
115
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
116
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
117
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
118
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
119
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
120
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
121
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
122
        self.assertEqual(('strange\ncommon long line\n'
123
                          'that needs a 16 byte match\n', sha1_1),
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
124
                         compressor.extract(('label',)))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
125
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
126
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
127
                          'different\n', sha1_2),
128
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
129
4241.17.2 by John Arbash Meinel
PythonGroupCompressor needs to support pop_last() properly.
130
    def test_pop_last(self):
131
        compressor = self.compressor()
132
        _, _, _, _ = compressor.compress(('key1',),
133
            'some text\nfor the first entry\n', None)
134
        expected_lines = list(compressor.chunks)
135
        _, _, _, _ = compressor.compress(('key2',),
136
            'some text\nfor the second entry\n', None)
137
        compressor.pop_last()
138
        self.assertEqual(expected_lines, compressor.chunks)
139
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
140
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
141
class TestPyrexGroupCompressor(TestGroupCompressor):
142
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
143
    _test_needs_features = [compiled_groupcompress_feature]
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
144
    compressor = groupcompress.PyrexGroupCompressor
145
146
    def test_stats(self):
147
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
148
        compressor.compress(('label',),
149
                            'strange\n'
150
                            'common very very long line\n'
151
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
152
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
153
                            'common very very long line\n'
154
                            'plus more text\n'
155
                            'different\n'
156
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
157
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
158
                            'new\n'
159
                            'common very very long line\n'
160
                            'plus more text\n'
161
                            'different\n'
162
                            'moredifferent\n', None)
163
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
164
165
    def test_two_nosha_delta(self):
166
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
167
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
168
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
169
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
170
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
171
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
172
        self.assertEqual(sha_string('common long line\n'
173
                                    'that needs a 16 byte match\n'
174
                                    'different\n'), sha1_2)
175
        expected_lines.extend([
176
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
177
            'd\x0f',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
178
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
179
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
180
            # copy the line common
181
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
182
            # add the line different, and the trailing newline
183
            '\x0adifferent\n', # insert 10 bytes
184
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
185
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
186
        self.assertEqual(sum(map(len, expected_lines)), end_point)
187
188
    def test_three_nosha_delta(self):
189
        # The first interesting test: make a change that should use lines from
190
        # both parents.
191
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
192
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
193
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
194
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
195
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
196
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
197
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
198
            'new\ncommon very very long line\nwith some extra text\n'
199
            'different\nmoredifferent\nand then some more\n',
200
            None)
201
        self.assertEqual(
202
            sha_string('new\ncommon very very long line\nwith some extra text\n'
203
                       'different\nmoredifferent\nand then some more\n'),
204
            sha1_3)
205
        expected_lines.extend([
206
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
207
            'd\x0b',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
208
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
209
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
210
            # insert new
211
            '\x03new',
212
            # Copy of first parent 'common' range
213
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
214
            # Copy of second parent 'different' range
215
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
216
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
217
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
218
        self.assertEqual(sum(map(len, expected_lines)), end_point)
219
220
221
class TestPythonGroupCompressor(TestGroupCompressor):
222
223
    compressor = groupcompress.PythonGroupCompressor
224
225
    def test_stats(self):
226
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
227
        compressor.compress(('label',),
228
                            'strange\n'
229
                            'common very very long line\n'
230
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
231
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
232
                            'common very very long line\n'
233
                            'plus more text\n'
234
                            'different\n'
235
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
236
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
237
                            'new\n'
238
                            'common very very long line\n'
239
                            'plus more text\n'
240
                            'different\n'
241
                            'moredifferent\n', None)
242
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
243
244
    def test_two_nosha_delta(self):
245
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
246
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
247
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
248
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
249
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
250
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
251
        self.assertEqual(sha_string('common long line\n'
252
                                    'that needs a 16 byte match\n'
253
                                    'different\n'), sha1_2)
254
        expected_lines.extend([
255
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
256
            'd\x0f',
257
            # target length
258
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
259
            # copy the line common
260
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
261
            # add the line different, and the trailing newline
262
            '\x0adifferent\n', # insert 10 bytes
263
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
264
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
265
        self.assertEqual(sum(map(len, expected_lines)), end_point)
266
267
    def test_three_nosha_delta(self):
268
        # The first interesting test: make a change that should use lines from
269
        # both parents.
270
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
271
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
272
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
273
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
274
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
275
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
276
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
277
            'new\ncommon very very long line\nwith some extra text\n'
278
            'different\nmoredifferent\nand then some more\n',
279
            None)
280
        self.assertEqual(
281
            sha_string('new\ncommon very very long line\nwith some extra text\n'
282
                       'different\nmoredifferent\nand then some more\n'),
283
            sha1_3)
284
        expected_lines.extend([
285
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
286
            'd\x0c',
287
            # target length
288
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
289
            # insert new
290
            '\x04new\n',
291
            # Copy of first parent 'common' range
292
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
293
            # Copy of second parent 'different' range
294
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
295
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
296
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
297
        self.assertEqual(sum(map(len, expected_lines)), end_point)
298
299
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
300
class TestGroupCompressBlock(tests.TestCase):
301
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
302
    def make_block(self, key_to_text):
303
        """Create a GroupCompressBlock, filling it with the given texts."""
304
        compressor = groupcompress.GroupCompressor()
305
        start = 0
306
        for key in sorted(key_to_text):
307
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
308
        locs = dict((key, (start, end)) for key, (start, _, end, _)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
309
                    in compressor.labels_deltas.items())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
310
        block = compressor.flush()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
311
        raw_bytes = block.to_bytes()
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
312
        # Go through from_bytes(to_bytes()) so that we start with a compressed
313
        # content object
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
314
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
315
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
316
    def test_from_empty_bytes(self):
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
317
        self.assertRaises(ValueError,
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
318
                          groupcompress.GroupCompressBlock.from_bytes, '')
319
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
320
    def test_from_minimal_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
321
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.38.4 by John Arbash Meinel
Another disk format change.
322
            'gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
323
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
324
        self.assertIs(None, block._content)
325
        self.assertEqual('', block._z_content)
326
        block._ensure_content()
3735.32.5 by John Arbash Meinel
Change the parsing code to start out just holding the compressed bytes.
327
        self.assertEqual('', block._content)
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
328
        self.assertEqual('', block._z_content)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
329
        block._ensure_content() # Ensure content is safe to call 2x
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
330
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
331
    def test_from_invalid(self):
332
        self.assertRaises(ValueError,
333
                          groupcompress.GroupCompressBlock.from_bytes,
334
                          'this is not a valid header')
335
3735.38.4 by John Arbash Meinel
Another disk format change.
336
    def test_from_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
337
        content = ('a tiny bit of content\n')
338
        z_content = zlib.compress(content)
339
        z_bytes = (
340
            'gcb1z\n' # group compress block v1 plain
341
            '%d\n' # Length of compressed content
342
            '%d\n' # Length of uncompressed content
343
            '%s'   # Compressed content
3735.38.4 by John Arbash Meinel
Another disk format change.
344
            ) % (len(z_content), len(content), z_content)
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
345
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
346
            z_bytes)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
347
        self.assertEqual(z_content, block._z_content)
348
        self.assertIs(None, block._content)
3735.38.4 by John Arbash Meinel
Another disk format change.
349
        self.assertEqual(len(z_content), block._z_content_length)
350
        self.assertEqual(len(content), block._content_length)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
351
        block._ensure_content()
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
352
        self.assertEqual(z_content, block._z_content)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
353
        self.assertEqual(content, block._content)
354
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
355
    def test_to_chunks(self):
356
        content_chunks = ['this is some content\n',
357
                          'this content will be compressed\n']
358
        content_len = sum(map(len, content_chunks))
359
        content = ''.join(content_chunks)
360
        gcb = groupcompress.GroupCompressBlock()
361
        gcb.set_chunked_content(content_chunks, content_len)
362
        total_len, block_chunks = gcb.to_chunks()
363
        block_bytes = ''.join(block_chunks)
364
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
365
        self.assertEqual(total_len, len(block_bytes))
366
        self.assertEqual(gcb._content_length, content_len)
367
        expected_header =('gcb1z\n' # group compress block v1 zlib
368
                          '%d\n' # Length of compressed content
369
                          '%d\n' # Length of uncompressed content
370
                         ) % (gcb._z_content_length, gcb._content_length)
371
        # The first chunk should be the header chunk. It is small, fixed size,
372
        # and there is no compelling reason to split it up
373
        self.assertEqual(expected_header, block_chunks[0])
374
        self.assertStartsWith(block_bytes, expected_header)
375
        remaining_bytes = block_bytes[len(expected_header):]
376
        raw_bytes = zlib.decompress(remaining_bytes)
377
        self.assertEqual(content, raw_bytes)
378
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
379
    def test_to_bytes(self):
3735.38.4 by John Arbash Meinel
Another disk format change.
380
        content = ('this is some content\n'
381
                   'this content will be compressed\n')
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
382
        gcb = groupcompress.GroupCompressBlock()
3735.38.4 by John Arbash Meinel
Another disk format change.
383
        gcb.set_content(content)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
384
        bytes = gcb.to_bytes()
3735.38.4 by John Arbash Meinel
Another disk format change.
385
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
386
        self.assertEqual(gcb._content_length, len(content))
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
387
        expected_header =('gcb1z\n' # group compress block v1 zlib
3735.38.4 by John Arbash Meinel
Another disk format change.
388
                          '%d\n' # Length of compressed content
389
                          '%d\n' # Length of uncompressed content
390
                         ) % (gcb._z_content_length, gcb._content_length)
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
391
        self.assertStartsWith(bytes, expected_header)
392
        remaining_bytes = bytes[len(expected_header):]
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
393
        raw_bytes = zlib.decompress(remaining_bytes)
3735.38.4 by John Arbash Meinel
Another disk format change.
394
        self.assertEqual(content, raw_bytes)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
395
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
396
        # we should get the same results if using the chunked version
397
        gcb = groupcompress.GroupCompressBlock()
398
        gcb.set_chunked_content(['this is some content\n'
4469.1.2 by John Arbash Meinel
The only caller already knows the content length, so make the api such that
399
                                 'this content will be compressed\n'],
400
                                 len(content))
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
401
        old_bytes = bytes
402
        bytes = gcb.to_bytes()
403
        self.assertEqual(old_bytes, bytes)
404
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
405
    def test_partial_decomp(self):
406
        content_chunks = []
407
        # We need a sufficient amount of data so that zlib.decompress has
408
        # partial decompression to work with. Most auto-generated data
409
        # compresses a bit too well, we want a combination, so we combine a sha
410
        # hash with compressible data.
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
411
        for i in range(2048):
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
412
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
413
            content_chunks.append(next_content)
414
            next_sha1 = osutils.sha_string(next_content)
415
            content_chunks.append(next_sha1 + '\n')
416
        content = ''.join(content_chunks)
417
        self.assertEqual(158634, len(content))
418
        z_content = zlib.compress(content)
419
        self.assertEqual(57182, len(z_content))
420
        block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
421
        block._z_content_chunks = (z_content,)
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
422
        block._z_content_length = len(z_content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
423
        block._compressor_name = 'zlib'
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
424
        block._content_length = 158634
425
        self.assertIs(None, block._content)
426
        block._ensure_content(100)
427
        self.assertIsNot(None, block._content)
428
        # We have decompressed at least 100 bytes
429
        self.assertTrue(len(block._content) >= 100)
430
        # We have not decompressed the whole content
431
        self.assertTrue(len(block._content) < 158634)
432
        self.assertEqualDiff(content[:len(block._content)], block._content)
433
        # ensuring content that we already have shouldn't cause any more data
434
        # to be extracted
435
        cur_len = len(block._content)
436
        block._ensure_content(cur_len - 10)
437
        self.assertEqual(cur_len, len(block._content))
438
        # Now we want a bit more content
439
        cur_len += 10
440
        block._ensure_content(cur_len)
441
        self.assertTrue(len(block._content) >= cur_len)
442
        self.assertTrue(len(block._content) < 158634)
443
        self.assertEqualDiff(content[:len(block._content)], block._content)
444
        # And now lets finish
445
        block._ensure_content(158634)
446
        self.assertEqualDiff(content, block._content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
447
        # And the decompressor is finalized
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
448
        self.assertIs(None, block._z_content_decompressor)
449
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
450
    def test__ensure_all_content(self):
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
451
        content_chunks = []
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
452
        # We need a sufficient amount of data so that zlib.decompress has
453
        # partial decompression to work with. Most auto-generated data
454
        # compresses a bit too well, we want a combination, so we combine a sha
455
        # hash with compressible data.
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
456
        for i in range(2048):
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
457
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
458
            content_chunks.append(next_content)
459
            next_sha1 = osutils.sha_string(next_content)
460
            content_chunks.append(next_sha1 + '\n')
461
        content = ''.join(content_chunks)
462
        self.assertEqual(158634, len(content))
463
        z_content = zlib.compress(content)
464
        self.assertEqual(57182, len(z_content))
465
        block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
466
        block._z_content_chunks = (z_content,)
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
467
        block._z_content_length = len(z_content)
468
        block._compressor_name = 'zlib'
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
469
        block._content_length = 158634
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
470
        self.assertIs(None, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
471
        # The first _ensure_content got all of the required data
472
        block._ensure_content(158634)
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
473
        self.assertEqualDiff(content, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
474
        # And we should have released the _z_content_decompressor since it was
475
        # fully consumed
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
476
        self.assertIs(None, block._z_content_decompressor)
477
4300.1.1 by John Arbash Meinel
Add the ability to convert a gc block into 'human readable' form.
478
    def test__dump(self):
479
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
480
        key_to_text = {('1',): dup_content + '1 unique\n',
481
                       ('2',): dup_content + '2 extra special\n'}
482
        locs, block = self.make_block(key_to_text)
483
        self.assertEqual([('f', len(key_to_text[('1',)])),
484
                          ('d', 21, len(key_to_text[('2',)]),
485
                           [('c', 2, len(dup_content)),
486
                            ('i', len('2 extra special\n'), '')
487
                           ]),
488
                         ], block._dump())
489
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
490
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
491
class TestCaseWithGroupCompressVersionedFiles(
492
        tests.TestCaseWithMemoryTransport):
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
493
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
494
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
495
                     dir='.', inconsistency_fatal=True):
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
496
        t = self.get_transport(dir)
497
        t.ensure_base()
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
498
        vf = groupcompress.make_pack_factory(graph=create_graph,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
499
            delta=False, keylength=keylength,
500
            inconsistency_fatal=inconsistency_fatal)(t)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
501
        if do_cleanup:
502
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
503
        return vf
504
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
505
506
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
507
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
508
    def make_g_index(self, name, ref_lists=0, nodes=[]):
509
        builder = btree_index.BTreeBuilder(ref_lists)
510
        for node, references, value in nodes:
511
            builder.add_node(node, references, value)
512
        stream = builder.finish()
513
        trans = self.get_transport()
514
        size = trans.put_file(name, stream)
515
        return btree_index.BTreeGraphIndex(trans, name, size)
516
517
    def make_g_index_missing_parent(self):
518
        graph_index = self.make_g_index('missing_parent', 1,
519
            [(('parent', ), '2 78 2 10', ([],)),
520
             (('tip', ), '2 78 2 10',
521
              ([('parent', ), ('missing-parent', )],)),
522
              ])
523
        return graph_index
524
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
525
    def test_get_record_stream_as_requested(self):
526
        # Consider promoting 'as-requested' to general availability, and
527
        # make this a VF interface test
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
528
        vf = self.make_test_vf(False, dir='source')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
529
        vf.add_lines(('a',), (), ['lines\n'])
530
        vf.add_lines(('b',), (), ['lines\n'])
531
        vf.add_lines(('c',), (), ['lines\n'])
532
        vf.add_lines(('d',), (), ['lines\n'])
533
        vf.writer.end()
534
        keys = [record.key for record in vf.get_record_stream(
535
                    [('a',), ('b',), ('c',), ('d',)],
536
                    'as-requested', False)]
537
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
538
        keys = [record.key for record in vf.get_record_stream(
539
                    [('b',), ('a',), ('d',), ('c',)],
540
                    'as-requested', False)]
541
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
542
543
        # It should work even after being repacked into another VF
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
544
        vf2 = self.make_test_vf(False, dir='target')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
545
        vf2.insert_record_stream(vf.get_record_stream(
546
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
547
        vf2.writer.end()
548
549
        keys = [record.key for record in vf2.get_record_stream(
550
                    [('a',), ('b',), ('c',), ('d',)],
551
                    'as-requested', False)]
552
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
553
        keys = [record.key for record in vf2.get_record_stream(
554
                    [('b',), ('a',), ('d',), ('c',)],
555
                    'as-requested', False)]
556
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
557
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
558
    def test_get_record_stream_max_bytes_to_index_default(self):
5755.2.6 by John Arbash Meinel
Test that the record stream has the correct values set.
559
        vf = self.make_test_vf(True, dir='source')
560
        vf.add_lines(('a',), (), ['lines\n'])
561
        vf.writer.end()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
562
        record = next(vf.get_record_stream([('a',)], 'unordered', True))
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
563
        self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
564
                         record._manager._get_compressor_settings())
5755.2.6 by John Arbash Meinel
Test that the record stream has the correct values set.
565
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
566
    def test_get_record_stream_accesses_compressor_settings(self):
5755.2.6 by John Arbash Meinel
Test that the record stream has the correct values set.
567
        vf = self.make_test_vf(True, dir='source')
568
        vf.add_lines(('a',), (), ['lines\n'])
569
        vf.writer.end()
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
570
        vf._max_bytes_to_index = 1234
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
571
        record = next(vf.get_record_stream([('a',)], 'unordered', True))
5755.2.9 by John Arbash Meinel
Change settings to a dict. That way the attributes are still named.
572
        self.assertEqual(dict(max_bytes_to_index=1234),
573
                         record._manager._get_compressor_settings())
5755.2.6 by John Arbash Meinel
Test that the record stream has the correct values set.
574
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
575
    def test_insert_record_stream_reuses_blocks(self):
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
576
        vf = self.make_test_vf(True, dir='source')
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
577
        def grouped_stream(revision_ids, first_parents=()):
578
            parents = first_parents
579
            for revision_id in revision_ids:
580
                key = (revision_id,)
581
                record = versionedfile.FulltextContentFactory(
582
                    key, parents, None,
583
                    'some content that is\n'
584
                    'identical except for\n'
585
                    'revision_id:%s\n' % (revision_id,))
586
                yield record
587
                parents = (key,)
588
        # One group, a-d
589
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
590
        # Second group, e-h
591
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
592
                                               first_parents=(('d',),)))
593
        block_bytes = {}
594
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
595
                                      'unordered', False)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
596
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
597
        for record in stream:
598
            if record.key in [('a',), ('e',)]:
599
                self.assertEqual('groupcompress-block', record.storage_kind)
600
            else:
601
                self.assertEqual('groupcompress-block-ref',
602
                                 record.storage_kind)
603
            block_bytes[record.key] = record._manager._block._z_content
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
604
            num_records += 1
605
        self.assertEqual(8, num_records)
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
606
        for r in 'abcd':
607
            key = (r,)
608
            self.assertIs(block_bytes[key], block_bytes[('a',)])
609
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
610
        for r in 'efgh':
611
            key = (r,)
612
            self.assertIs(block_bytes[key], block_bytes[('e',)])
613
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
614
        # Now copy the blocks into another vf, and ensure that the blocks are
615
        # preserved without creating new entries
616
        vf2 = self.make_test_vf(True, dir='target')
617
        # ordering in 'groupcompress' order, should actually swap the groups in
618
        # the target vf, but the groups themselves should not be disturbed.
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
619
        def small_size_stream():
620
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
621
                                               'groupcompress', False):
622
                record._manager._full_enough_block_size = \
623
                    record._manager._block._content_length
624
                yield record
625
                        
626
        vf2.insert_record_stream(small_size_stream())
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
627
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
628
                                       'groupcompress', False)
629
        vf2.writer.end()
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
630
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
631
        for record in stream:
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
632
            num_records += 1
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
633
            self.assertEqual(block_bytes[record.key],
634
                             record._manager._block._z_content)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
635
        self.assertEqual(8, num_records)
636
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
637
    def test_insert_record_stream_packs_on_the_fly(self):
638
        vf = self.make_test_vf(True, dir='source')
639
        def grouped_stream(revision_ids, first_parents=()):
640
            parents = first_parents
641
            for revision_id in revision_ids:
642
                key = (revision_id,)
643
                record = versionedfile.FulltextContentFactory(
644
                    key, parents, None,
645
                    'some content that is\n'
646
                    'identical except for\n'
647
                    'revision_id:%s\n' % (revision_id,))
648
                yield record
649
                parents = (key,)
650
        # One group, a-d
651
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
652
        # Second group, e-h
653
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
654
                                               first_parents=(('d',),)))
655
        # Now copy the blocks into another vf, and see that the
656
        # insert_record_stream rebuilt a new block on-the-fly because of
657
        # under-utilization
658
        vf2 = self.make_test_vf(True, dir='target')
659
        vf2.insert_record_stream(vf.get_record_stream(
660
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
661
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
662
                                       'groupcompress', False)
663
        vf2.writer.end()
664
        num_records = 0
665
        # All of the records should be recombined into a single block
666
        block = None
667
        for record in stream:
668
            num_records += 1
669
            if block is None:
670
                block = record._manager._block
671
            else:
672
                self.assertIs(block, record._manager._block)
673
        self.assertEqual(8, num_records)
674
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
675
    def test__insert_record_stream_no_reuse_block(self):
676
        vf = self.make_test_vf(True, dir='source')
677
        def grouped_stream(revision_ids, first_parents=()):
678
            parents = first_parents
679
            for revision_id in revision_ids:
680
                key = (revision_id,)
681
                record = versionedfile.FulltextContentFactory(
682
                    key, parents, None,
683
                    'some content that is\n'
684
                    'identical except for\n'
685
                    'revision_id:%s\n' % (revision_id,))
686
                yield record
687
                parents = (key,)
688
        # One group, a-d
689
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
690
        # Second group, e-h
691
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
692
                                               first_parents=(('d',),)))
693
        vf.writer.end()
694
        self.assertEqual(8, len(list(vf.get_record_stream(
695
                                        [(r,) for r in 'abcdefgh'],
696
                                        'unordered', False))))
697
        # Now copy the blocks into another vf, and ensure that the blocks are
698
        # preserved without creating new entries
699
        vf2 = self.make_test_vf(True, dir='target')
700
        # ordering in 'groupcompress' order, should actually swap the groups in
701
        # the target vf, but the groups themselves should not be disturbed.
702
        list(vf2._insert_record_stream(vf.get_record_stream(
703
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
704
            reuse_blocks=False))
705
        vf2.writer.end()
706
        # After inserting with reuse_blocks=False, we should have everything in
707
        # a single new block.
708
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
709
                                       'groupcompress', False)
710
        block = None
711
        for record in stream:
712
            if block is None:
713
                block = record._manager._block
714
            else:
715
                self.assertIs(block, record._manager._block)
716
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
717
    def test_add_missing_noncompression_parent_unvalidated_index(self):
718
        unvalidated = self.make_g_index_missing_parent()
719
        combined = _mod_index.CombinedGraphIndex([unvalidated])
720
        index = groupcompress._GCGraphIndex(combined,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
721
            is_locked=lambda: True, parents=True,
722
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
723
        index.scan_unvalidated_index(unvalidated)
724
        self.assertEqual(
725
            frozenset([('missing-parent',)]), index.get_missing_parents())
726
727
    def test_track_external_parent_refs(self):
728
        g_index = self.make_g_index('empty', 1, [])
729
        mod_index = btree_index.BTreeBuilder(1, 1)
730
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
731
        index = groupcompress._GCGraphIndex(combined,
732
            is_locked=lambda: True, parents=True,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
733
            add_callback=mod_index.add_nodes,
734
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
735
        index.add_records([
736
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
737
        self.assertEqual(
738
            frozenset([('parent-1',), ('parent-2',)]),
739
            index.get_missing_parents())
740
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
741
    def make_source_with_b(self, a_parent, path):
742
        source = self.make_test_vf(True, dir=path)
743
        source.add_lines(('a',), (), ['lines\n'])
744
        if a_parent:
745
            b_parents = (('a',),)
746
        else:
747
            b_parents = ()
748
        source.add_lines(('b',), b_parents, ['lines\n'])
749
        return source
750
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
751
    def do_inconsistent_inserts(self, inconsistency_fatal):
752
        target = self.make_test_vf(True, dir='target',
753
                                   inconsistency_fatal=inconsistency_fatal)
754
        for x in range(2):
755
            source = self.make_source_with_b(x==1, 'source%s' % x)
756
            target.insert_record_stream(source.get_record_stream(
757
                [('b',)], 'unordered', False))
758
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
759
    def test_inconsistent_redundant_inserts_warn(self):
4465.2.2 by Aaron Bentley
Add test that duplicates are skipped.
760
        """Should not insert a record that is already present."""
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
761
        warnings = []
762
        def warning(template, args):
763
            warnings.append(template % args)
764
        _trace_warning = trace.warning
765
        trace.warning = warning
766
        try:
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
767
            self.do_inconsistent_inserts(inconsistency_fatal=False)
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
768
        finally:
769
            trace.warning = _trace_warning
770
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
771
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
772
                         warnings)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
773
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
774
    def test_inconsistent_redundant_inserts_raises(self):
775
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
776
                              inconsistency_fatal=True)
777
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
778
                              " in add_records:"
779
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
780
                              " 0 8', \(\(\('a',\),\),\)\)")
781
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
782
    def test_clear_cache(self):
783
        vf = self.make_source_with_b(True, 'source')
784
        vf.writer.end()
785
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
786
                                           True):
787
            pass
788
        self.assertTrue(len(vf._group_cache) > 0)
789
        vf.clear_cache()
790
        self.assertEqual(0, len(vf._group_cache))
791
792
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
793
class TestGroupCompressConfig(tests.TestCaseWithTransport):
794
795
    def make_test_vf(self):
796
        t = self.get_transport('.')
797
        t.ensure_base()
798
        factory = groupcompress.make_pack_factory(graph=True,
799
            delta=False, keylength=1, inconsistency_fatal=True)
800
        vf = factory(t)
801
        self.addCleanup(groupcompress.cleanup_pack_group, vf)
802
        return vf
803
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
804
    def test_max_bytes_to_index_default(self):
805
        vf = self.make_test_vf()
806
        gc = vf._make_group_compressor()
807
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
808
                         vf._max_bytes_to_index)
809
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
810
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
811
                             gc._delta_index._max_bytes_to_index)
812
813
    def test_max_bytes_to_index_in_config(self):
814
        c = config.GlobalConfig()
815
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
816
        vf = self.make_test_vf()
817
        gc = vf._make_group_compressor()
818
        self.assertEqual(10000, vf._max_bytes_to_index)
819
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
820
            self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
821
822
    def test_max_bytes_to_index_bad_config(self):
823
        c = config.GlobalConfig()
824
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
825
        vf = self.make_test_vf()
826
        # TODO: This is triggering a warning, we might want to trap and make
827
        #       sure it is readable.
828
        gc = vf._make_group_compressor()
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
829
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
830
                         vf._max_bytes_to_index)
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
831
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
832
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
833
                             gc._delta_index._max_bytes_to_index)
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
834
835
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
836
class StubGCVF(object):
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
837
    def __init__(self, canned_get_blocks=None):
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
838
        self._group_cache = {}
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
839
        self._canned_get_blocks = canned_get_blocks or []
840
    def _get_blocks(self, read_memos):
841
        return iter(self._canned_get_blocks)
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
842
    
843
844
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
845
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
846
    
847
    def test_add_key_new_read_memo(self):
848
        """Adding a key with an uncached read_memo new to this batch adds that
849
        read_memo to the list of memos to fetch.
850
        """
851
        # locations are: index_memo, ignored, parents, ignored
852
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
853
        # and (idx, offset, size) is known as the 'read_memo', identifying the
854
        # raw bytes needed.
855
        read_memo = ('fake index', 100, 50)
856
        locations = {
857
            ('key',): (read_memo + (None, None), None, None, None)}
858
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
859
        total_size = batcher.add_key(('key',))
860
        self.assertEqual(50, total_size)
861
        self.assertEqual([('key',)], batcher.keys)
862
        self.assertEqual([read_memo], batcher.memos_to_get)
863
864
    def test_add_key_duplicate_read_memo(self):
865
        """read_memos that occur multiple times in a batch will only be fetched
866
        once.
867
        """
868
        read_memo = ('fake index', 100, 50)
869
        # Two keys, both sharing the same read memo (but different overall
870
        # index_memos).
871
        locations = {
872
            ('key1',): (read_memo + (0, 1), None, None, None),
873
            ('key2',): (read_memo + (1, 2), None, None, None)}
874
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
875
        total_size = batcher.add_key(('key1',))
876
        total_size = batcher.add_key(('key2',))
877
        self.assertEqual(50, total_size)
878
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
879
        self.assertEqual([read_memo], batcher.memos_to_get)
880
881
    def test_add_key_cached_read_memo(self):
882
        """Adding a key with a cached read_memo will not cause that read_memo
883
        to be added to the list to fetch.
884
        """
885
        read_memo = ('fake index', 100, 50)
886
        gcvf = StubGCVF()
887
        gcvf._group_cache[read_memo] = 'fake block'
888
        locations = {
889
            ('key',): (read_memo + (None, None), None, None, None)}
890
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
891
        total_size = batcher.add_key(('key',))
892
        self.assertEqual(0, total_size)
893
        self.assertEqual([('key',)], batcher.keys)
894
        self.assertEqual([], batcher.memos_to_get)
895
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
896
    def test_yield_factories_empty(self):
897
        """An empty batch yields no factories."""
898
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
899
        self.assertEqual([], list(batcher.yield_factories()))
900
901
    def test_yield_factories_calls_get_blocks(self):
4634.3.22 by Andrew Bennetts
Fix docstring.
902
        """Uncached memos are retrieved via get_blocks."""
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
903
        read_memo1 = ('fake index', 100, 50)
904
        read_memo2 = ('fake index', 150, 40)
905
        gcvf = StubGCVF(
906
            canned_get_blocks=[
907
                (read_memo1, groupcompress.GroupCompressBlock()),
908
                (read_memo2, groupcompress.GroupCompressBlock())])
909
        locations = {
910
            ('key1',): (read_memo1 + (None, None), None, None, None),
911
            ('key2',): (read_memo2 + (None, None), None, None, None)}
912
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
913
        batcher.add_key(('key1',))
914
        batcher.add_key(('key2',))
915
        factories = list(batcher.yield_factories(full_flush=True))
916
        self.assertLength(2, factories)
917
        keys = [f.key for f in factories]
918
        kinds = [f.storage_kind for f in factories]
919
        self.assertEqual([('key1',), ('key2',)], keys)
920
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
921
922
    def test_yield_factories_flushing(self):
923
        """yield_factories holds back on yielding results from the final block
924
        unless passed full_flush=True.
925
        """
926
        fake_block = groupcompress.GroupCompressBlock()
927
        read_memo = ('fake index', 100, 50)
928
        gcvf = StubGCVF()
929
        gcvf._group_cache[read_memo] = fake_block
930
        locations = {
931
            ('key',): (read_memo + (None, None), None, None, None)}
932
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
933
        batcher.add_key(('key',))
934
        self.assertEqual([], list(batcher.yield_factories()))
935
        factories = list(batcher.yield_factories(full_flush=True))
936
        self.assertLength(1, factories)
937
        self.assertEqual(('key',), factories[0].key)
938
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
939
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
940
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
941
class TestLazyGroupCompress(tests.TestCaseWithTransport):
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
942
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
943
    _texts = {
944
        ('key1',): "this is a text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
945
                   "with a reasonable amount of compressible bytes\n"
946
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
947
        ('key2',): "another text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
948
                   "with a reasonable amount of compressible bytes\n"
949
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
950
        ('key3',): "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
951
                   "with a reasonable amount of compressible bytes\n"
952
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
953
        ('key4',): "this will be extracted\n"
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
954
                   "but references most of its bytes from\n"
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
955
                   "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
956
                   "with a reasonable amount of compressible bytes\n"
957
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
958
    }
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
959
    def make_block(self, key_to_text):
960
        """Create a GroupCompressBlock, filling it with the given texts."""
961
        compressor = groupcompress.GroupCompressor()
962
        start = 0
963
        for key in sorted(key_to_text):
964
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
965
        locs = dict((key, (start, end)) for key, (start, _, end, _)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
966
                    in compressor.labels_deltas.items())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
967
        block = compressor.flush()
968
        raw_bytes = block.to_bytes()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
969
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
970
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
971
    def add_key_to_manager(self, key, locations, block, manager):
972
        start, end = locations[key]
973
        manager.add_factory(key, (), start, end)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
974
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
975
    def make_block_and_full_manager(self, texts):
976
        locations, block = self.make_block(texts)
977
        manager = groupcompress._LazyGroupContentManager(block)
978
        for key in sorted(texts):
979
            self.add_key_to_manager(key, locations, block, manager)
980
        return block, manager
981
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
982
    def test_get_fulltexts(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
983
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
984
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
985
        self.add_key_to_manager(('key1',), locations, block, manager)
986
        self.add_key_to_manager(('key2',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
987
        result_order = []
988
        for record in manager.get_record_stream():
989
            result_order.append(record.key)
990
            text = self._texts[record.key]
991
            self.assertEqual(text, record.get_bytes_as('fulltext'))
992
        self.assertEqual([('key1',), ('key2',)], result_order)
993
994
        # If we build the manager in the opposite order, we should get them
995
        # back in the opposite order
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
996
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
997
        self.add_key_to_manager(('key2',), locations, block, manager)
998
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
999
        result_order = []
1000
        for record in manager.get_record_stream():
1001
            result_order.append(record.key)
1002
            text = self._texts[record.key]
1003
            self.assertEqual(text, record.get_bytes_as('fulltext'))
1004
        self.assertEqual([('key2',), ('key1',)], result_order)
1005
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1006
    def test__wire_bytes_no_keys(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1007
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1008
        manager = groupcompress._LazyGroupContentManager(block)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1009
        wire_bytes = manager._wire_bytes()
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1010
        block_length = len(block.to_bytes())
3735.32.24 by John Arbash Meinel
_wire_bytes() now strips groups as necessary, as does _insert_record_stream
1011
        # We should have triggered a strip, since we aren't using any content
1012
        stripped_block = manager._block.to_bytes()
1013
        self.assertTrue(block_length > len(stripped_block))
1014
        empty_z_header = zlib.compress('')
1015
        self.assertEqual('groupcompress-block\n'
1016
                         '8\n' # len(compress(''))
1017
                         '0\n' # len('')
1018
                         '%d\n'# compressed block len
1019
                         '%s'  # zheader
1020
                         '%s'  # block
1021
                         % (len(stripped_block), empty_z_header,
1022
                            stripped_block),
1023
                         wire_bytes)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1024
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
1025
    def test__wire_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1026
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1027
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1028
        self.add_key_to_manager(('key1',), locations, block, manager)
1029
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1030
        block_bytes = block.to_bytes()
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1031
        wire_bytes = manager._wire_bytes()
1032
        (storage_kind, z_header_len, header_len,
1033
         block_len, rest) = wire_bytes.split('\n', 4)
1034
        z_header_len = int(z_header_len)
1035
        header_len = int(header_len)
1036
        block_len = int(block_len)
1037
        self.assertEqual('groupcompress-block', storage_kind)
4665.3.8 by John Arbash Meinel
Of course, when you change the content, it can effect the stored wire bytes slightly.
1038
        self.assertEqual(34, z_header_len)
1039
        self.assertEqual(26, header_len)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1040
        self.assertEqual(len(block_bytes), block_len)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1041
        z_header = rest[:z_header_len]
1042
        header = zlib.decompress(z_header)
1043
        self.assertEqual(header_len, len(header))
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1044
        entry1 = locations[('key1',)]
1045
        entry4 = locations[('key4',)]
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1046
        self.assertEqualDiff('key1\n'
1047
                             '\n'  # no parents
1048
                             '%d\n' # start offset
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1049
                             '%d\n' # end offset
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1050
                             'key4\n'
1051
                             '\n'
1052
                             '%d\n'
1053
                             '%d\n'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1054
                             % (entry1[0], entry1[1],
1055
                                entry4[0], entry4[1]),
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1056
                            header)
1057
        z_block = rest[z_header_len:]
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1058
        self.assertEqual(block_bytes, z_block)
1059
1060
    def test_from_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1061
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1062
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1063
        self.add_key_to_manager(('key1',), locations, block, manager)
1064
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1065
        wire_bytes = manager._wire_bytes()
1066
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
3735.32.18 by John Arbash Meinel
We now support generating a network stream.
1067
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1068
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1069
        self.assertEqual(2, len(manager._factories))
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1070
        self.assertEqual(block._z_content, manager._block._z_content)
1071
        result_order = []
1072
        for record in manager.get_record_stream():
1073
            result_order.append(record.key)
1074
            text = self._texts[record.key]
1075
            self.assertEqual(text, record.get_bytes_as('fulltext'))
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1076
        self.assertEqual([('key1',), ('key4',)], result_order)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1077
1078
    def test__check_rebuild_no_changes(self):
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1079
        block, manager = self.make_block_and_full_manager(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1080
        manager._check_rebuild_block()
1081
        self.assertIs(block, manager._block)
1082
1083
    def test__check_rebuild_only_one(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1084
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1085
        manager = groupcompress._LazyGroupContentManager(block)
1086
        # Request just the first key, which should trigger a 'strip' action
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1087
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1088
        manager._check_rebuild_block()
1089
        self.assertIsNot(block, manager._block)
1090
        self.assertTrue(block._content_length > manager._block._content_length)
1091
        # We should be able to still get the content out of this block, though
1092
        # it should only have 1 entry
1093
        for record in manager.get_record_stream():
1094
            self.assertEqual(('key1',), record.key)
1095
            self.assertEqual(self._texts[record.key],
1096
                             record.get_bytes_as('fulltext'))
1097
1098
    def test__check_rebuild_middle(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1099
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1100
        manager = groupcompress._LazyGroupContentManager(block)
1101
        # Request a small key in the middle should trigger a 'rebuild'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1102
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1103
        manager._check_rebuild_block()
1104
        self.assertIsNot(block, manager._block)
1105
        self.assertTrue(block._content_length > manager._block._content_length)
1106
        for record in manager.get_record_stream():
1107
            self.assertEqual(('key4',), record.key)
1108
            self.assertEqual(self._texts[record.key],
1109
                             record.get_bytes_as('fulltext'))
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1110
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1111
    def test_manager_default_compressor_settings(self):
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1112
        locations, old_block = self.make_block(self._texts)
1113
        manager = groupcompress._LazyGroupContentManager(old_block)
1114
        gcvf = groupcompress.GroupCompressVersionedFiles
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1115
        # It doesn't greedily evaluate _max_bytes_to_index
1116
        self.assertIs(None, manager._compressor_settings)
1117
        self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1118
                         manager._get_compressor_settings())
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1119
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1120
    def test_manager_custom_compressor_settings(self):
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1121
        locations, old_block = self.make_block(self._texts)
1122
        called = []
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1123
        def compressor_settings():
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1124
            called.append('called')
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1125
            return (10,)
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1126
        manager = groupcompress._LazyGroupContentManager(old_block,
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1127
            get_compressor_settings=compressor_settings)
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1128
        gcvf = groupcompress.GroupCompressVersionedFiles
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1129
        # It doesn't greedily evaluate compressor_settings
1130
        self.assertIs(None, manager._compressor_settings)
1131
        self.assertEqual((10,), manager._get_compressor_settings())
1132
        self.assertEqual((10,), manager._get_compressor_settings())
1133
        self.assertEqual((10,), manager._compressor_settings)
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1134
        # Only called 1 time
1135
        self.assertEqual(['called'], called)
1136
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1137
    def test__rebuild_handles_compressor_settings(self):
1138
        if not isinstance(groupcompress.GroupCompressor,
1139
                          groupcompress.PyrexGroupCompressor):
1140
            raise tests.TestNotApplicable('pure-python compressor'
1141
                ' does not handle compressor_settings')
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1142
        locations, old_block = self.make_block(self._texts)
1143
        manager = groupcompress._LazyGroupContentManager(old_block,
5755.2.9 by John Arbash Meinel
Change settings to a dict. That way the attributes are still named.
1144
            get_compressor_settings=lambda: dict(max_bytes_to_index=32))
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1145
        gc = manager._make_group_compressor()
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1146
        self.assertEqual(32, gc._delta_index._max_bytes_to_index)
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1147
        self.add_key_to_manager(('key3',), locations, old_block, manager)
1148
        self.add_key_to_manager(('key4',), locations, old_block, manager)
1149
        action, last_byte, total_bytes = manager._check_rebuild_action()
1150
        self.assertEqual('rebuild', action)
1151
        manager._rebuild_block()
1152
        new_block = manager._block
1153
        self.assertIsNot(old_block, new_block)
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1154
        # Because of the new max_bytes_to_index, we do a poor job of
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1155
        # rebuilding. This is a side-effect of the change, but at least it does
1156
        # show the setting had an effect.
1157
        self.assertTrue(old_block._content_length < new_block._content_length)
1158
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1159
    def test_check_is_well_utilized_all_keys(self):
1160
        block, manager = self.make_block_and_full_manager(self._texts)
1161
        self.assertFalse(manager.check_is_well_utilized())
1162
        # Though we can fake it by changing the recommended minimum size
1163
        manager._full_enough_block_size = block._content_length
1164
        self.assertTrue(manager.check_is_well_utilized())
1165
        # Setting it just above causes it to fail
1166
        manager._full_enough_block_size = block._content_length + 1
1167
        self.assertFalse(manager.check_is_well_utilized())
1168
        # Setting the mixed-block size doesn't do anything, because the content
1169
        # is considered to not be 'mixed'
1170
        manager._full_enough_mixed_block_size = block._content_length
1171
        self.assertFalse(manager.check_is_well_utilized())
1172
1173
    def test_check_is_well_utilized_mixed_keys(self):
1174
        texts = {}
1175
        f1k1 = ('f1', 'k1')
1176
        f1k2 = ('f1', 'k2')
1177
        f2k1 = ('f2', 'k1')
1178
        f2k2 = ('f2', 'k2')
1179
        texts[f1k1] = self._texts[('key1',)]
1180
        texts[f1k2] = self._texts[('key2',)]
1181
        texts[f2k1] = self._texts[('key3',)]
1182
        texts[f2k2] = self._texts[('key4',)]
1183
        block, manager = self.make_block_and_full_manager(texts)
1184
        self.assertFalse(manager.check_is_well_utilized())
1185
        manager._full_enough_block_size = block._content_length
1186
        self.assertTrue(manager.check_is_well_utilized())
1187
        manager._full_enough_block_size = block._content_length + 1
1188
        self.assertFalse(manager.check_is_well_utilized())
1189
        manager._full_enough_mixed_block_size = block._content_length
1190
        self.assertTrue(manager.check_is_well_utilized())
1191
1192
    def test_check_is_well_utilized_partial_use(self):
1193
        locations, block = self.make_block(self._texts)
1194
        manager = groupcompress._LazyGroupContentManager(block)
1195
        manager._full_enough_block_size = block._content_length
1196
        self.add_key_to_manager(('key1',), locations, block, manager)
1197
        self.add_key_to_manager(('key2',), locations, block, manager)
1198
        # Just using the content from key1 and 2 is not enough to be considered
1199
        # 'complete'
1200
        self.assertFalse(manager.check_is_well_utilized())
1201
        # However if we add key3, then we have enough, as we only require 75%
1202
        # consumption
1203
        self.add_key_to_manager(('key4',), locations, block, manager)
1204
        self.assertTrue(manager.check_is_well_utilized())
5365.4.1 by John Arbash Meinel
Find a case where we are wasting a bit of memory.
1205
1206
1207
class Test_GCBuildDetails(tests.TestCase):
1208
1209
    def test_acts_like_tuple(self):
1210
        # _GCBuildDetails inlines some of the data that used to be spread out
1211
        # across a bunch of tuples
1212
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1213
            ('INDEX', 10, 20, 0, 5))
1214
        self.assertEqual(4, len(bd))
1215
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1216
        self.assertEqual(None, bd[1]) # Compression Parent is always None
1217
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1218
        self.assertEqual(('group', None), bd[3]) # Record details
1219
1220
    def test__repr__(self):
1221
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1222
            ('INDEX', 10, 20, 0, 5))
1223
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1224
                         " (('parent1',), ('parent2',)))",
1225
                         repr(bd))
1226