/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5557.1.7 by John Arbash Meinel
Merge in the bzr.dev 5582
1
# Copyright (C) 2008-2011 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
16
17
"""Tests for group compression."""
18
19
import zlib
20
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
21
from .. import (
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
22
    config,
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
23
    errors,
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
24
    osutils,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
25
    tests,
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
26
    trace,
6670.4.1 by Jelmer Vernooij
Update imports.
27
    )
28
from ..bzr import (
29
    btree_index,
30
    groupcompress,
6744 by Jelmer Vernooij
Merge lp:~jelmer/brz/move-errors-knit.
31
    knit,
6670.4.1 by Jelmer Vernooij
Update imports.
32
    index as _mod_index,
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
33
    versionedfile,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
34
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
35
from ..osutils import sha_string
36
from .test__groupcompress import compiled_groupcompress_feature
37
from .scenarios import load_tests_apply_scenarios
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
38
39
40
def group_compress_implementation_scenarios():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
41
    scenarios = [
42
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
43
        ]
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
44
    if compiled_groupcompress_feature.available():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
45
        scenarios.append(('C',
46
            {'compressor': groupcompress.PyrexGroupCompressor}))
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
47
    return scenarios
48
49
50
load_tests = load_tests_apply_scenarios
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
51
52
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
53
class TestGroupCompressor(tests.TestCase):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
54
55
    def _chunks_to_repr_lines(self, chunks):
56
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
57
58
    def assertEqualDiffEncoded(self, expected, actual):
59
        """Compare the actual content to the expected content.
60
61
        :param expected: A group of chunks that we expect to see
62
        :param actual: The measured 'chunks'
63
64
        We will transform the chunks back into lines, and then run 'repr()'
65
        over them to handle non-ascii characters.
66
        """
67
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
68
                             self._chunks_to_repr_lines(actual))
69
70
71
class TestAllGroupCompressors(TestGroupCompressor):
0.17.2 by Robert Collins
Core proof of concept working.
72
    """Tests for GroupCompressor"""
73
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
74
    scenarios = group_compress_implementation_scenarios()
75
    compressor = None # Set by scenario
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
76
0.17.2 by Robert Collins
Core proof of concept working.
77
    def test_empty_delta(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
78
        compressor = self.compressor()
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
79
        self.assertEqual([], compressor.chunks)
0.17.2 by Robert Collins
Core proof of concept working.
80
81
    def test_one_nosha_delta(self):
82
        # diff against NUKK
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
83
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
84
        sha1, start_point, end_point, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
85
            'strange\ncommon\n', None)
86
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
87
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
88
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
89
        self.assertEqual(0, start_point)
0.17.2 by Robert Collins
Core proof of concept working.
90
        self.assertEqual(sum(map(len, expected_lines)), end_point)
91
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
92
    def test_empty_content(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
93
        compressor = self.compressor()
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
94
        # Adding empty bytes should return the 'null' record
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
95
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
96
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
97
        self.assertEqual(0, start_point)
98
        self.assertEqual(0, end_point)
99
        self.assertEqual('fulltext', kind)
100
        self.assertEqual(groupcompress._null_sha1, sha1)
101
        self.assertEqual(0, compressor.endpoint)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
102
        self.assertEqual([], compressor.chunks)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
103
        # Even after adding some content
104
        compressor.compress(('content',), 'some\nbytes\n', None)
105
        self.assertTrue(compressor.endpoint > 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
106
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
107
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
108
        self.assertEqual(0, start_point)
109
        self.assertEqual(0, end_point)
110
        self.assertEqual('fulltext', kind)
111
        self.assertEqual(groupcompress._null_sha1, sha1)
112
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
113
    def test_extract_from_compressor(self):
114
        # Knit fetching will try to reconstruct texts locally which results in
115
        # reading something that is in the compressor stream already.
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
116
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
117
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
118
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
119
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
120
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
121
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
122
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
123
        self.assertEqual(('strange\ncommon long line\n'
124
                          'that needs a 16 byte match\n', sha1_1),
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
125
                         compressor.extract(('label',)))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
126
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
127
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
128
                          'different\n', sha1_2),
129
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
130
4241.17.2 by John Arbash Meinel
PythonGroupCompressor needs to support pop_last() properly.
131
    def test_pop_last(self):
132
        compressor = self.compressor()
133
        _, _, _, _ = compressor.compress(('key1',),
134
            'some text\nfor the first entry\n', None)
135
        expected_lines = list(compressor.chunks)
136
        _, _, _, _ = compressor.compress(('key2',),
137
            'some text\nfor the second entry\n', None)
138
        compressor.pop_last()
139
        self.assertEqual(expected_lines, compressor.chunks)
140
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
141
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
142
class TestPyrexGroupCompressor(TestGroupCompressor):
143
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
144
    _test_needs_features = [compiled_groupcompress_feature]
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
145
    compressor = groupcompress.PyrexGroupCompressor
146
147
    def test_stats(self):
148
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
149
        compressor.compress(('label',),
150
                            'strange\n'
151
                            'common very very long line\n'
152
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
153
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
154
                            'common very very long line\n'
155
                            'plus more text\n'
156
                            'different\n'
157
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
158
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
159
                            'new\n'
160
                            'common very very long line\n'
161
                            'plus more text\n'
162
                            'different\n'
163
                            'moredifferent\n', None)
164
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
165
166
    def test_two_nosha_delta(self):
167
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
168
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
169
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
170
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
171
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
172
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
        self.assertEqual(sha_string('common long line\n'
174
                                    'that needs a 16 byte match\n'
175
                                    'different\n'), sha1_2)
176
        expected_lines.extend([
177
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
178
            'd\x0f',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
179
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
180
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
181
            # copy the line common
182
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
183
            # add the line different, and the trailing newline
184
            '\x0adifferent\n', # insert 10 bytes
185
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
186
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
187
        self.assertEqual(sum(map(len, expected_lines)), end_point)
188
189
    def test_three_nosha_delta(self):
190
        # The first interesting test: make a change that should use lines from
191
        # both parents.
192
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
193
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
194
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
195
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
196
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
197
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
198
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
199
            'new\ncommon very very long line\nwith some extra text\n'
200
            'different\nmoredifferent\nand then some more\n',
201
            None)
202
        self.assertEqual(
203
            sha_string('new\ncommon very very long line\nwith some extra text\n'
204
                       'different\nmoredifferent\nand then some more\n'),
205
            sha1_3)
206
        expected_lines.extend([
207
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
208
            'd\x0b',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
209
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
210
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
211
            # insert new
212
            '\x03new',
213
            # Copy of first parent 'common' range
214
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
215
            # Copy of second parent 'different' range
216
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
217
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
218
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
219
        self.assertEqual(sum(map(len, expected_lines)), end_point)
220
221
222
class TestPythonGroupCompressor(TestGroupCompressor):
223
224
    compressor = groupcompress.PythonGroupCompressor
225
226
    def test_stats(self):
227
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
228
        compressor.compress(('label',),
229
                            'strange\n'
230
                            'common very very long line\n'
231
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
232
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
233
                            'common very very long line\n'
234
                            'plus more text\n'
235
                            'different\n'
236
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
237
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
238
                            'new\n'
239
                            'common very very long line\n'
240
                            'plus more text\n'
241
                            'different\n'
242
                            'moredifferent\n', None)
243
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
244
245
    def test_two_nosha_delta(self):
246
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
247
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
248
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
249
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
250
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
251
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
        self.assertEqual(sha_string('common long line\n'
253
                                    'that needs a 16 byte match\n'
254
                                    'different\n'), sha1_2)
255
        expected_lines.extend([
256
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
257
            'd\x0f',
258
            # target length
259
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
260
            # copy the line common
261
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
262
            # add the line different, and the trailing newline
263
            '\x0adifferent\n', # insert 10 bytes
264
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
265
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
266
        self.assertEqual(sum(map(len, expected_lines)), end_point)
267
268
    def test_three_nosha_delta(self):
269
        # The first interesting test: make a change that should use lines from
270
        # both parents.
271
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
272
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
273
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
274
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
275
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
276
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
277
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
278
            'new\ncommon very very long line\nwith some extra text\n'
279
            'different\nmoredifferent\nand then some more\n',
280
            None)
281
        self.assertEqual(
282
            sha_string('new\ncommon very very long line\nwith some extra text\n'
283
                       'different\nmoredifferent\nand then some more\n'),
284
            sha1_3)
285
        expected_lines.extend([
286
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
287
            'd\x0c',
288
            # target length
289
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
290
            # insert new
291
            '\x04new\n',
292
            # Copy of first parent 'common' range
293
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
294
            # Copy of second parent 'different' range
295
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
296
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
297
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
298
        self.assertEqual(sum(map(len, expected_lines)), end_point)
299
300
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
301
class TestGroupCompressBlock(tests.TestCase):
302
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
303
    def make_block(self, key_to_text):
304
        """Create a GroupCompressBlock, filling it with the given texts."""
305
        compressor = groupcompress.GroupCompressor()
306
        start = 0
307
        for key in sorted(key_to_text):
308
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
309
        locs = dict((key, (start, end)) for key, (start, _, end, _)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
310
                    in compressor.labels_deltas.items())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
311
        block = compressor.flush()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
312
        raw_bytes = block.to_bytes()
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
313
        # Go through from_bytes(to_bytes()) so that we start with a compressed
314
        # content object
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
315
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
316
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
317
    def test_from_empty_bytes(self):
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
318
        self.assertRaises(ValueError,
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
319
                          groupcompress.GroupCompressBlock.from_bytes, '')
320
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
321
    def test_from_minimal_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
322
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.38.4 by John Arbash Meinel
Another disk format change.
323
            'gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
324
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
325
        self.assertIs(None, block._content)
326
        self.assertEqual('', block._z_content)
327
        block._ensure_content()
3735.32.5 by John Arbash Meinel
Change the parsing code to start out just holding the compressed bytes.
328
        self.assertEqual('', block._content)
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
329
        self.assertEqual('', block._z_content)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
330
        block._ensure_content() # Ensure content is safe to call 2x
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
331
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
332
    def test_from_invalid(self):
333
        self.assertRaises(ValueError,
334
                          groupcompress.GroupCompressBlock.from_bytes,
335
                          'this is not a valid header')
336
3735.38.4 by John Arbash Meinel
Another disk format change.
337
    def test_from_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
338
        content = ('a tiny bit of content\n')
339
        z_content = zlib.compress(content)
340
        z_bytes = (
341
            'gcb1z\n' # group compress block v1 plain
342
            '%d\n' # Length of compressed content
343
            '%d\n' # Length of uncompressed content
344
            '%s'   # Compressed content
3735.38.4 by John Arbash Meinel
Another disk format change.
345
            ) % (len(z_content), len(content), z_content)
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
346
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
347
            z_bytes)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
348
        self.assertEqual(z_content, block._z_content)
349
        self.assertIs(None, block._content)
3735.38.4 by John Arbash Meinel
Another disk format change.
350
        self.assertEqual(len(z_content), block._z_content_length)
351
        self.assertEqual(len(content), block._content_length)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
352
        block._ensure_content()
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
353
        self.assertEqual(z_content, block._z_content)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
354
        self.assertEqual(content, block._content)
355
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
356
    def test_to_chunks(self):
357
        content_chunks = ['this is some content\n',
358
                          'this content will be compressed\n']
359
        content_len = sum(map(len, content_chunks))
360
        content = ''.join(content_chunks)
361
        gcb = groupcompress.GroupCompressBlock()
362
        gcb.set_chunked_content(content_chunks, content_len)
363
        total_len, block_chunks = gcb.to_chunks()
364
        block_bytes = ''.join(block_chunks)
365
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
366
        self.assertEqual(total_len, len(block_bytes))
367
        self.assertEqual(gcb._content_length, content_len)
368
        expected_header =('gcb1z\n' # group compress block v1 zlib
369
                          '%d\n' # Length of compressed content
370
                          '%d\n' # Length of uncompressed content
371
                         ) % (gcb._z_content_length, gcb._content_length)
372
        # The first chunk should be the header chunk. It is small, fixed size,
373
        # and there is no compelling reason to split it up
374
        self.assertEqual(expected_header, block_chunks[0])
375
        self.assertStartsWith(block_bytes, expected_header)
376
        remaining_bytes = block_bytes[len(expected_header):]
377
        raw_bytes = zlib.decompress(remaining_bytes)
378
        self.assertEqual(content, raw_bytes)
379
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
380
    def test_to_bytes(self):
3735.38.4 by John Arbash Meinel
Another disk format change.
381
        content = ('this is some content\n'
382
                   'this content will be compressed\n')
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
383
        gcb = groupcompress.GroupCompressBlock()
3735.38.4 by John Arbash Meinel
Another disk format change.
384
        gcb.set_content(content)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
385
        bytes = gcb.to_bytes()
3735.38.4 by John Arbash Meinel
Another disk format change.
386
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
387
        self.assertEqual(gcb._content_length, len(content))
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
388
        expected_header =('gcb1z\n' # group compress block v1 zlib
3735.38.4 by John Arbash Meinel
Another disk format change.
389
                          '%d\n' # Length of compressed content
390
                          '%d\n' # Length of uncompressed content
391
                         ) % (gcb._z_content_length, gcb._content_length)
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
392
        self.assertStartsWith(bytes, expected_header)
393
        remaining_bytes = bytes[len(expected_header):]
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
394
        raw_bytes = zlib.decompress(remaining_bytes)
3735.38.4 by John Arbash Meinel
Another disk format change.
395
        self.assertEqual(content, raw_bytes)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
396
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
397
        # we should get the same results if using the chunked version
398
        gcb = groupcompress.GroupCompressBlock()
399
        gcb.set_chunked_content(['this is some content\n'
4469.1.2 by John Arbash Meinel
The only caller already knows the content length, so make the api such that
400
                                 'this content will be compressed\n'],
401
                                 len(content))
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
402
        old_bytes = bytes
403
        bytes = gcb.to_bytes()
404
        self.assertEqual(old_bytes, bytes)
405
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
406
    def test_partial_decomp(self):
407
        content_chunks = []
408
        # We need a sufficient amount of data so that zlib.decompress has
409
        # partial decompression to work with. Most auto-generated data
410
        # compresses a bit too well, we want a combination, so we combine a sha
411
        # hash with compressible data.
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
412
        for i in range(2048):
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
413
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
414
            content_chunks.append(next_content)
415
            next_sha1 = osutils.sha_string(next_content)
416
            content_chunks.append(next_sha1 + '\n')
417
        content = ''.join(content_chunks)
418
        self.assertEqual(158634, len(content))
419
        z_content = zlib.compress(content)
420
        self.assertEqual(57182, len(z_content))
421
        block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
422
        block._z_content_chunks = (z_content,)
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
423
        block._z_content_length = len(z_content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
424
        block._compressor_name = 'zlib'
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
425
        block._content_length = 158634
426
        self.assertIs(None, block._content)
427
        block._ensure_content(100)
428
        self.assertIsNot(None, block._content)
429
        # We have decompressed at least 100 bytes
430
        self.assertTrue(len(block._content) >= 100)
431
        # We have not decompressed the whole content
432
        self.assertTrue(len(block._content) < 158634)
433
        self.assertEqualDiff(content[:len(block._content)], block._content)
434
        # ensuring content that we already have shouldn't cause any more data
435
        # to be extracted
436
        cur_len = len(block._content)
437
        block._ensure_content(cur_len - 10)
438
        self.assertEqual(cur_len, len(block._content))
439
        # Now we want a bit more content
440
        cur_len += 10
441
        block._ensure_content(cur_len)
442
        self.assertTrue(len(block._content) >= cur_len)
443
        self.assertTrue(len(block._content) < 158634)
444
        self.assertEqualDiff(content[:len(block._content)], block._content)
445
        # And now lets finish
446
        block._ensure_content(158634)
447
        self.assertEqualDiff(content, block._content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
448
        # And the decompressor is finalized
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
449
        self.assertIs(None, block._z_content_decompressor)
450
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
451
    def test__ensure_all_content(self):
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
452
        content_chunks = []
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
453
        # We need a sufficient amount of data so that zlib.decompress has
454
        # partial decompression to work with. Most auto-generated data
455
        # compresses a bit too well, we want a combination, so we combine a sha
456
        # hash with compressible data.
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
457
        for i in range(2048):
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
458
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
459
            content_chunks.append(next_content)
460
            next_sha1 = osutils.sha_string(next_content)
461
            content_chunks.append(next_sha1 + '\n')
462
        content = ''.join(content_chunks)
463
        self.assertEqual(158634, len(content))
464
        z_content = zlib.compress(content)
465
        self.assertEqual(57182, len(z_content))
466
        block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
467
        block._z_content_chunks = (z_content,)
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
468
        block._z_content_length = len(z_content)
469
        block._compressor_name = 'zlib'
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
470
        block._content_length = 158634
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
471
        self.assertIs(None, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
472
        # The first _ensure_content got all of the required data
473
        block._ensure_content(158634)
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
474
        self.assertEqualDiff(content, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
475
        # And we should have released the _z_content_decompressor since it was
476
        # fully consumed
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
477
        self.assertIs(None, block._z_content_decompressor)
478
4300.1.1 by John Arbash Meinel
Add the ability to convert a gc block into 'human readable' form.
479
    def test__dump(self):
480
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
481
        key_to_text = {('1',): dup_content + '1 unique\n',
482
                       ('2',): dup_content + '2 extra special\n'}
483
        locs, block = self.make_block(key_to_text)
484
        self.assertEqual([('f', len(key_to_text[('1',)])),
485
                          ('d', 21, len(key_to_text[('2',)]),
486
                           [('c', 2, len(dup_content)),
487
                            ('i', len('2 extra special\n'), '')
488
                           ]),
489
                         ], block._dump())
490
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
491
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
492
class TestCaseWithGroupCompressVersionedFiles(
493
        tests.TestCaseWithMemoryTransport):
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
494
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
495
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
496
                     dir='.', inconsistency_fatal=True):
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
497
        t = self.get_transport(dir)
498
        t.ensure_base()
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
499
        vf = groupcompress.make_pack_factory(graph=create_graph,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
500
            delta=False, keylength=keylength,
501
            inconsistency_fatal=inconsistency_fatal)(t)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
502
        if do_cleanup:
503
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
504
        return vf
505
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
506
507
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
508
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
509
    def make_g_index(self, name, ref_lists=0, nodes=[]):
510
        builder = btree_index.BTreeBuilder(ref_lists)
511
        for node, references, value in nodes:
512
            builder.add_node(node, references, value)
513
        stream = builder.finish()
514
        trans = self.get_transport()
515
        size = trans.put_file(name, stream)
516
        return btree_index.BTreeGraphIndex(trans, name, size)
517
518
    def make_g_index_missing_parent(self):
519
        graph_index = self.make_g_index('missing_parent', 1,
520
            [(('parent', ), '2 78 2 10', ([],)),
521
             (('tip', ), '2 78 2 10',
522
              ([('parent', ), ('missing-parent', )],)),
523
              ])
524
        return graph_index
525
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
526
    def test_get_record_stream_as_requested(self):
527
        # Consider promoting 'as-requested' to general availability, and
528
        # make this a VF interface test
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
529
        vf = self.make_test_vf(False, dir='source')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
530
        vf.add_lines(('a',), (), ['lines\n'])
531
        vf.add_lines(('b',), (), ['lines\n'])
532
        vf.add_lines(('c',), (), ['lines\n'])
533
        vf.add_lines(('d',), (), ['lines\n'])
534
        vf.writer.end()
535
        keys = [record.key for record in vf.get_record_stream(
536
                    [('a',), ('b',), ('c',), ('d',)],
537
                    'as-requested', False)]
538
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
539
        keys = [record.key for record in vf.get_record_stream(
540
                    [('b',), ('a',), ('d',), ('c',)],
541
                    'as-requested', False)]
542
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
543
544
        # It should work even after being repacked into another VF
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
545
        vf2 = self.make_test_vf(False, dir='target')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
546
        vf2.insert_record_stream(vf.get_record_stream(
547
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
548
        vf2.writer.end()
549
550
        keys = [record.key for record in vf2.get_record_stream(
551
                    [('a',), ('b',), ('c',), ('d',)],
552
                    'as-requested', False)]
553
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
554
        keys = [record.key for record in vf2.get_record_stream(
555
                    [('b',), ('a',), ('d',), ('c',)],
556
                    'as-requested', False)]
557
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
558
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
559
    def test_get_record_stream_max_bytes_to_index_default(self):
5755.2.6 by John Arbash Meinel
Test that the record stream has the correct values set.
560
        vf = self.make_test_vf(True, dir='source')
561
        vf.add_lines(('a',), (), ['lines\n'])
562
        vf.writer.end()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
563
        record = next(vf.get_record_stream([('a',)], 'unordered', True))
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
564
        self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
565
                         record._manager._get_compressor_settings())
5755.2.6 by John Arbash Meinel
Test that the record stream has the correct values set.
566
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
567
    def test_get_record_stream_accesses_compressor_settings(self):
5755.2.6 by John Arbash Meinel
Test that the record stream has the correct values set.
568
        vf = self.make_test_vf(True, dir='source')
569
        vf.add_lines(('a',), (), ['lines\n'])
570
        vf.writer.end()
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
571
        vf._max_bytes_to_index = 1234
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
572
        record = next(vf.get_record_stream([('a',)], 'unordered', True))
5755.2.9 by John Arbash Meinel
Change settings to a dict. That way the attributes are still named.
573
        self.assertEqual(dict(max_bytes_to_index=1234),
574
                         record._manager._get_compressor_settings())
5755.2.6 by John Arbash Meinel
Test that the record stream has the correct values set.
575
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
576
    def test_insert_record_stream_reuses_blocks(self):
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
577
        vf = self.make_test_vf(True, dir='source')
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
578
        def grouped_stream(revision_ids, first_parents=()):
579
            parents = first_parents
580
            for revision_id in revision_ids:
581
                key = (revision_id,)
582
                record = versionedfile.FulltextContentFactory(
583
                    key, parents, None,
584
                    'some content that is\n'
585
                    'identical except for\n'
586
                    'revision_id:%s\n' % (revision_id,))
587
                yield record
588
                parents = (key,)
589
        # One group, a-d
590
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
591
        # Second group, e-h
592
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
593
                                               first_parents=(('d',),)))
594
        block_bytes = {}
595
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
596
                                      'unordered', False)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
597
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
598
        for record in stream:
599
            if record.key in [('a',), ('e',)]:
600
                self.assertEqual('groupcompress-block', record.storage_kind)
601
            else:
602
                self.assertEqual('groupcompress-block-ref',
603
                                 record.storage_kind)
604
            block_bytes[record.key] = record._manager._block._z_content
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
605
            num_records += 1
606
        self.assertEqual(8, num_records)
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
607
        for r in 'abcd':
608
            key = (r,)
609
            self.assertIs(block_bytes[key], block_bytes[('a',)])
610
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
611
        for r in 'efgh':
612
            key = (r,)
613
            self.assertIs(block_bytes[key], block_bytes[('e',)])
614
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
615
        # Now copy the blocks into another vf, and ensure that the blocks are
616
        # preserved without creating new entries
617
        vf2 = self.make_test_vf(True, dir='target')
618
        # ordering in 'groupcompress' order, should actually swap the groups in
619
        # the target vf, but the groups themselves should not be disturbed.
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
620
        def small_size_stream():
621
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
622
                                               'groupcompress', False):
623
                record._manager._full_enough_block_size = \
624
                    record._manager._block._content_length
625
                yield record
626
                        
627
        vf2.insert_record_stream(small_size_stream())
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
628
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
629
                                       'groupcompress', False)
630
        vf2.writer.end()
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
631
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
632
        for record in stream:
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
633
            num_records += 1
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
634
            self.assertEqual(block_bytes[record.key],
635
                             record._manager._block._z_content)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
636
        self.assertEqual(8, num_records)
637
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
638
    def test_insert_record_stream_packs_on_the_fly(self):
639
        vf = self.make_test_vf(True, dir='source')
640
        def grouped_stream(revision_ids, first_parents=()):
641
            parents = first_parents
642
            for revision_id in revision_ids:
643
                key = (revision_id,)
644
                record = versionedfile.FulltextContentFactory(
645
                    key, parents, None,
646
                    'some content that is\n'
647
                    'identical except for\n'
648
                    'revision_id:%s\n' % (revision_id,))
649
                yield record
650
                parents = (key,)
651
        # One group, a-d
652
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
653
        # Second group, e-h
654
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
655
                                               first_parents=(('d',),)))
656
        # Now copy the blocks into another vf, and see that the
657
        # insert_record_stream rebuilt a new block on-the-fly because of
658
        # under-utilization
659
        vf2 = self.make_test_vf(True, dir='target')
660
        vf2.insert_record_stream(vf.get_record_stream(
661
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
662
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
                                       'groupcompress', False)
664
        vf2.writer.end()
665
        num_records = 0
666
        # All of the records should be recombined into a single block
667
        block = None
668
        for record in stream:
669
            num_records += 1
670
            if block is None:
671
                block = record._manager._block
672
            else:
673
                self.assertIs(block, record._manager._block)
674
        self.assertEqual(8, num_records)
675
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
676
    def test__insert_record_stream_no_reuse_block(self):
677
        vf = self.make_test_vf(True, dir='source')
678
        def grouped_stream(revision_ids, first_parents=()):
679
            parents = first_parents
680
            for revision_id in revision_ids:
681
                key = (revision_id,)
682
                record = versionedfile.FulltextContentFactory(
683
                    key, parents, None,
684
                    'some content that is\n'
685
                    'identical except for\n'
686
                    'revision_id:%s\n' % (revision_id,))
687
                yield record
688
                parents = (key,)
689
        # One group, a-d
690
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
691
        # Second group, e-h
692
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
693
                                               first_parents=(('d',),)))
694
        vf.writer.end()
695
        self.assertEqual(8, len(list(vf.get_record_stream(
696
                                        [(r,) for r in 'abcdefgh'],
697
                                        'unordered', False))))
698
        # Now copy the blocks into another vf, and ensure that the blocks are
699
        # preserved without creating new entries
700
        vf2 = self.make_test_vf(True, dir='target')
701
        # ordering in 'groupcompress' order, should actually swap the groups in
702
        # the target vf, but the groups themselves should not be disturbed.
703
        list(vf2._insert_record_stream(vf.get_record_stream(
704
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
705
            reuse_blocks=False))
706
        vf2.writer.end()
707
        # After inserting with reuse_blocks=False, we should have everything in
708
        # a single new block.
709
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
710
                                       'groupcompress', False)
711
        block = None
712
        for record in stream:
713
            if block is None:
714
                block = record._manager._block
715
            else:
716
                self.assertIs(block, record._manager._block)
717
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
718
    def test_add_missing_noncompression_parent_unvalidated_index(self):
719
        unvalidated = self.make_g_index_missing_parent()
720
        combined = _mod_index.CombinedGraphIndex([unvalidated])
721
        index = groupcompress._GCGraphIndex(combined,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
722
            is_locked=lambda: True, parents=True,
723
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
724
        index.scan_unvalidated_index(unvalidated)
725
        self.assertEqual(
726
            frozenset([('missing-parent',)]), index.get_missing_parents())
727
728
    def test_track_external_parent_refs(self):
729
        g_index = self.make_g_index('empty', 1, [])
730
        mod_index = btree_index.BTreeBuilder(1, 1)
731
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
732
        index = groupcompress._GCGraphIndex(combined,
733
            is_locked=lambda: True, parents=True,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
734
            add_callback=mod_index.add_nodes,
735
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
736
        index.add_records([
737
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
738
        self.assertEqual(
739
            frozenset([('parent-1',), ('parent-2',)]),
740
            index.get_missing_parents())
741
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
742
    def make_source_with_b(self, a_parent, path):
743
        source = self.make_test_vf(True, dir=path)
744
        source.add_lines(('a',), (), ['lines\n'])
745
        if a_parent:
746
            b_parents = (('a',),)
747
        else:
748
            b_parents = ()
749
        source.add_lines(('b',), b_parents, ['lines\n'])
750
        return source
751
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
752
    def do_inconsistent_inserts(self, inconsistency_fatal):
753
        target = self.make_test_vf(True, dir='target',
754
                                   inconsistency_fatal=inconsistency_fatal)
755
        for x in range(2):
756
            source = self.make_source_with_b(x==1, 'source%s' % x)
757
            target.insert_record_stream(source.get_record_stream(
758
                [('b',)], 'unordered', False))
759
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
760
    def test_inconsistent_redundant_inserts_warn(self):
4465.2.2 by Aaron Bentley
Add test that duplicates are skipped.
761
        """Should not insert a record that is already present."""
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
762
        warnings = []
763
        def warning(template, args):
764
            warnings.append(template % args)
765
        _trace_warning = trace.warning
766
        trace.warning = warning
767
        try:
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
768
            self.do_inconsistent_inserts(inconsistency_fatal=False)
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
769
        finally:
770
            trace.warning = _trace_warning
771
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
772
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
773
                         warnings)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
774
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
775
    def test_inconsistent_redundant_inserts_raises(self):
6744 by Jelmer Vernooij
Merge lp:~jelmer/brz/move-errors-knit.
776
        e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
777
                              inconsistency_fatal=True)
778
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
779
                              " in add_records:"
780
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
781
                              " 0 8', \(\(\('a',\),\),\)\)")
782
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
783
    def test_clear_cache(self):
784
        vf = self.make_source_with_b(True, 'source')
785
        vf.writer.end()
786
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
787
                                           True):
788
            pass
789
        self.assertTrue(len(vf._group_cache) > 0)
790
        vf.clear_cache()
791
        self.assertEqual(0, len(vf._group_cache))
792
793
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
794
class TestGroupCompressConfig(tests.TestCaseWithTransport):
795
796
    def make_test_vf(self):
797
        t = self.get_transport('.')
798
        t.ensure_base()
799
        factory = groupcompress.make_pack_factory(graph=True,
800
            delta=False, keylength=1, inconsistency_fatal=True)
801
        vf = factory(t)
802
        self.addCleanup(groupcompress.cleanup_pack_group, vf)
803
        return vf
804
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
805
    def test_max_bytes_to_index_default(self):
806
        vf = self.make_test_vf()
807
        gc = vf._make_group_compressor()
808
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
809
                         vf._max_bytes_to_index)
810
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
811
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
812
                             gc._delta_index._max_bytes_to_index)
813
814
    def test_max_bytes_to_index_in_config(self):
815
        c = config.GlobalConfig()
816
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
817
        vf = self.make_test_vf()
818
        gc = vf._make_group_compressor()
819
        self.assertEqual(10000, vf._max_bytes_to_index)
820
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
821
            self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
822
823
    def test_max_bytes_to_index_bad_config(self):
824
        c = config.GlobalConfig()
825
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
826
        vf = self.make_test_vf()
827
        # TODO: This is triggering a warning, we might want to trap and make
828
        #       sure it is readable.
829
        gc = vf._make_group_compressor()
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
830
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
831
                         vf._max_bytes_to_index)
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
832
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
833
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
834
                             gc._delta_index._max_bytes_to_index)
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
835
836
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
837
class StubGCVF(object):
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
838
    def __init__(self, canned_get_blocks=None):
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
839
        self._group_cache = {}
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
840
        self._canned_get_blocks = canned_get_blocks or []
841
    def _get_blocks(self, read_memos):
842
        return iter(self._canned_get_blocks)
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
843
    
844
845
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
846
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
847
    
848
    def test_add_key_new_read_memo(self):
849
        """Adding a key with an uncached read_memo new to this batch adds that
850
        read_memo to the list of memos to fetch.
851
        """
852
        # locations are: index_memo, ignored, parents, ignored
853
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
854
        # and (idx, offset, size) is known as the 'read_memo', identifying the
855
        # raw bytes needed.
856
        read_memo = ('fake index', 100, 50)
857
        locations = {
858
            ('key',): (read_memo + (None, None), None, None, None)}
859
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
860
        total_size = batcher.add_key(('key',))
861
        self.assertEqual(50, total_size)
862
        self.assertEqual([('key',)], batcher.keys)
863
        self.assertEqual([read_memo], batcher.memos_to_get)
864
865
    def test_add_key_duplicate_read_memo(self):
866
        """read_memos that occur multiple times in a batch will only be fetched
867
        once.
868
        """
869
        read_memo = ('fake index', 100, 50)
870
        # Two keys, both sharing the same read memo (but different overall
871
        # index_memos).
872
        locations = {
873
            ('key1',): (read_memo + (0, 1), None, None, None),
874
            ('key2',): (read_memo + (1, 2), None, None, None)}
875
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
876
        total_size = batcher.add_key(('key1',))
877
        total_size = batcher.add_key(('key2',))
878
        self.assertEqual(50, total_size)
879
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
880
        self.assertEqual([read_memo], batcher.memos_to_get)
881
882
    def test_add_key_cached_read_memo(self):
883
        """Adding a key with a cached read_memo will not cause that read_memo
884
        to be added to the list to fetch.
885
        """
886
        read_memo = ('fake index', 100, 50)
887
        gcvf = StubGCVF()
888
        gcvf._group_cache[read_memo] = 'fake block'
889
        locations = {
890
            ('key',): (read_memo + (None, None), None, None, None)}
891
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
892
        total_size = batcher.add_key(('key',))
893
        self.assertEqual(0, total_size)
894
        self.assertEqual([('key',)], batcher.keys)
895
        self.assertEqual([], batcher.memos_to_get)
896
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
897
    def test_yield_factories_empty(self):
898
        """An empty batch yields no factories."""
899
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
900
        self.assertEqual([], list(batcher.yield_factories()))
901
902
    def test_yield_factories_calls_get_blocks(self):
4634.3.22 by Andrew Bennetts
Fix docstring.
903
        """Uncached memos are retrieved via get_blocks."""
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
904
        read_memo1 = ('fake index', 100, 50)
905
        read_memo2 = ('fake index', 150, 40)
906
        gcvf = StubGCVF(
907
            canned_get_blocks=[
908
                (read_memo1, groupcompress.GroupCompressBlock()),
909
                (read_memo2, groupcompress.GroupCompressBlock())])
910
        locations = {
911
            ('key1',): (read_memo1 + (None, None), None, None, None),
912
            ('key2',): (read_memo2 + (None, None), None, None, None)}
913
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
914
        batcher.add_key(('key1',))
915
        batcher.add_key(('key2',))
916
        factories = list(batcher.yield_factories(full_flush=True))
917
        self.assertLength(2, factories)
918
        keys = [f.key for f in factories]
919
        kinds = [f.storage_kind for f in factories]
920
        self.assertEqual([('key1',), ('key2',)], keys)
921
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
922
923
    def test_yield_factories_flushing(self):
924
        """yield_factories holds back on yielding results from the final block
925
        unless passed full_flush=True.
926
        """
927
        fake_block = groupcompress.GroupCompressBlock()
928
        read_memo = ('fake index', 100, 50)
929
        gcvf = StubGCVF()
930
        gcvf._group_cache[read_memo] = fake_block
931
        locations = {
932
            ('key',): (read_memo + (None, None), None, None, None)}
933
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
934
        batcher.add_key(('key',))
935
        self.assertEqual([], list(batcher.yield_factories()))
936
        factories = list(batcher.yield_factories(full_flush=True))
937
        self.assertLength(1, factories)
938
        self.assertEqual(('key',), factories[0].key)
939
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
940
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
941
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
942
class TestLazyGroupCompress(tests.TestCaseWithTransport):
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
943
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
944
    _texts = {
945
        ('key1',): "this is a text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
946
                   "with a reasonable amount of compressible bytes\n"
947
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
948
        ('key2',): "another text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
949
                   "with a reasonable amount of compressible bytes\n"
950
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
951
        ('key3',): "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
952
                   "with a reasonable amount of compressible bytes\n"
953
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
954
        ('key4',): "this will be extracted\n"
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
955
                   "but references most of its bytes from\n"
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
956
                   "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
957
                   "with a reasonable amount of compressible bytes\n"
958
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
959
    }
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
960
    def make_block(self, key_to_text):
961
        """Create a GroupCompressBlock, filling it with the given texts."""
962
        compressor = groupcompress.GroupCompressor()
963
        start = 0
964
        for key in sorted(key_to_text):
965
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
966
        locs = dict((key, (start, end)) for key, (start, _, end, _)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
967
                    in compressor.labels_deltas.items())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
968
        block = compressor.flush()
969
        raw_bytes = block.to_bytes()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
970
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
971
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
972
    def add_key_to_manager(self, key, locations, block, manager):
973
        start, end = locations[key]
974
        manager.add_factory(key, (), start, end)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
975
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
976
    def make_block_and_full_manager(self, texts):
977
        locations, block = self.make_block(texts)
978
        manager = groupcompress._LazyGroupContentManager(block)
979
        for key in sorted(texts):
980
            self.add_key_to_manager(key, locations, block, manager)
981
        return block, manager
982
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
983
    def test_get_fulltexts(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
984
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
985
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
986
        self.add_key_to_manager(('key1',), locations, block, manager)
987
        self.add_key_to_manager(('key2',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
988
        result_order = []
989
        for record in manager.get_record_stream():
990
            result_order.append(record.key)
991
            text = self._texts[record.key]
992
            self.assertEqual(text, record.get_bytes_as('fulltext'))
993
        self.assertEqual([('key1',), ('key2',)], result_order)
994
995
        # If we build the manager in the opposite order, we should get them
996
        # back in the opposite order
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
997
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
998
        self.add_key_to_manager(('key2',), locations, block, manager)
999
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
1000
        result_order = []
1001
        for record in manager.get_record_stream():
1002
            result_order.append(record.key)
1003
            text = self._texts[record.key]
1004
            self.assertEqual(text, record.get_bytes_as('fulltext'))
1005
        self.assertEqual([('key2',), ('key1',)], result_order)
1006
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1007
    def test__wire_bytes_no_keys(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1008
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1009
        manager = groupcompress._LazyGroupContentManager(block)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1010
        wire_bytes = manager._wire_bytes()
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1011
        block_length = len(block.to_bytes())
3735.32.24 by John Arbash Meinel
_wire_bytes() now strips groups as necessary, as does _insert_record_stream
1012
        # We should have triggered a strip, since we aren't using any content
1013
        stripped_block = manager._block.to_bytes()
1014
        self.assertTrue(block_length > len(stripped_block))
1015
        empty_z_header = zlib.compress('')
1016
        self.assertEqual('groupcompress-block\n'
1017
                         '8\n' # len(compress(''))
1018
                         '0\n' # len('')
1019
                         '%d\n'# compressed block len
1020
                         '%s'  # zheader
1021
                         '%s'  # block
1022
                         % (len(stripped_block), empty_z_header,
1023
                            stripped_block),
1024
                         wire_bytes)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1025
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
1026
    def test__wire_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1027
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1028
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1029
        self.add_key_to_manager(('key1',), locations, block, manager)
1030
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1031
        block_bytes = block.to_bytes()
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1032
        wire_bytes = manager._wire_bytes()
1033
        (storage_kind, z_header_len, header_len,
1034
         block_len, rest) = wire_bytes.split('\n', 4)
1035
        z_header_len = int(z_header_len)
1036
        header_len = int(header_len)
1037
        block_len = int(block_len)
1038
        self.assertEqual('groupcompress-block', storage_kind)
4665.3.8 by John Arbash Meinel
Of course, when you change the content, it can effect the stored wire bytes slightly.
1039
        self.assertEqual(34, z_header_len)
1040
        self.assertEqual(26, header_len)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1041
        self.assertEqual(len(block_bytes), block_len)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1042
        z_header = rest[:z_header_len]
1043
        header = zlib.decompress(z_header)
1044
        self.assertEqual(header_len, len(header))
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1045
        entry1 = locations[('key1',)]
1046
        entry4 = locations[('key4',)]
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1047
        self.assertEqualDiff('key1\n'
1048
                             '\n'  # no parents
1049
                             '%d\n' # start offset
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1050
                             '%d\n' # end offset
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1051
                             'key4\n'
1052
                             '\n'
1053
                             '%d\n'
1054
                             '%d\n'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1055
                             % (entry1[0], entry1[1],
1056
                                entry4[0], entry4[1]),
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1057
                            header)
1058
        z_block = rest[z_header_len:]
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1059
        self.assertEqual(block_bytes, z_block)
1060
1061
    def test_from_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1062
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1063
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1064
        self.add_key_to_manager(('key1',), locations, block, manager)
1065
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1066
        wire_bytes = manager._wire_bytes()
1067
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
3735.32.18 by John Arbash Meinel
We now support generating a network stream.
1068
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1069
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1070
        self.assertEqual(2, len(manager._factories))
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1071
        self.assertEqual(block._z_content, manager._block._z_content)
1072
        result_order = []
1073
        for record in manager.get_record_stream():
1074
            result_order.append(record.key)
1075
            text = self._texts[record.key]
1076
            self.assertEqual(text, record.get_bytes_as('fulltext'))
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1077
        self.assertEqual([('key1',), ('key4',)], result_order)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1078
1079
    def test__check_rebuild_no_changes(self):
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1080
        block, manager = self.make_block_and_full_manager(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1081
        manager._check_rebuild_block()
1082
        self.assertIs(block, manager._block)
1083
1084
    def test__check_rebuild_only_one(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1085
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1086
        manager = groupcompress._LazyGroupContentManager(block)
1087
        # Request just the first key, which should trigger a 'strip' action
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1088
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1089
        manager._check_rebuild_block()
1090
        self.assertIsNot(block, manager._block)
1091
        self.assertTrue(block._content_length > manager._block._content_length)
1092
        # We should be able to still get the content out of this block, though
1093
        # it should only have 1 entry
1094
        for record in manager.get_record_stream():
1095
            self.assertEqual(('key1',), record.key)
1096
            self.assertEqual(self._texts[record.key],
1097
                             record.get_bytes_as('fulltext'))
1098
1099
    def test__check_rebuild_middle(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1100
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1101
        manager = groupcompress._LazyGroupContentManager(block)
1102
        # Request a small key in the middle should trigger a 'rebuild'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1103
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1104
        manager._check_rebuild_block()
1105
        self.assertIsNot(block, manager._block)
1106
        self.assertTrue(block._content_length > manager._block._content_length)
1107
        for record in manager.get_record_stream():
1108
            self.assertEqual(('key4',), record.key)
1109
            self.assertEqual(self._texts[record.key],
1110
                             record.get_bytes_as('fulltext'))
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1111
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1112
    def test_manager_default_compressor_settings(self):
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1113
        locations, old_block = self.make_block(self._texts)
1114
        manager = groupcompress._LazyGroupContentManager(old_block)
1115
        gcvf = groupcompress.GroupCompressVersionedFiles
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1116
        # It doesn't greedily evaluate _max_bytes_to_index
1117
        self.assertIs(None, manager._compressor_settings)
1118
        self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1119
                         manager._get_compressor_settings())
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1120
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1121
    def test_manager_custom_compressor_settings(self):
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1122
        locations, old_block = self.make_block(self._texts)
1123
        called = []
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1124
        def compressor_settings():
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1125
            called.append('called')
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1126
            return (10,)
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1127
        manager = groupcompress._LazyGroupContentManager(old_block,
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1128
            get_compressor_settings=compressor_settings)
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1129
        gcvf = groupcompress.GroupCompressVersionedFiles
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1130
        # It doesn't greedily evaluate compressor_settings
1131
        self.assertIs(None, manager._compressor_settings)
1132
        self.assertEqual((10,), manager._get_compressor_settings())
1133
        self.assertEqual((10,), manager._get_compressor_settings())
1134
        self.assertEqual((10,), manager._compressor_settings)
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1135
        # Only called 1 time
1136
        self.assertEqual(['called'], called)
1137
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1138
    def test__rebuild_handles_compressor_settings(self):
1139
        if not isinstance(groupcompress.GroupCompressor,
1140
                          groupcompress.PyrexGroupCompressor):
1141
            raise tests.TestNotApplicable('pure-python compressor'
1142
                ' does not handle compressor_settings')
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1143
        locations, old_block = self.make_block(self._texts)
1144
        manager = groupcompress._LazyGroupContentManager(old_block,
5755.2.9 by John Arbash Meinel
Change settings to a dict. That way the attributes are still named.
1145
            get_compressor_settings=lambda: dict(max_bytes_to_index=32))
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1146
        gc = manager._make_group_compressor()
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1147
        self.assertEqual(32, gc._delta_index._max_bytes_to_index)
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1148
        self.add_key_to_manager(('key3',), locations, old_block, manager)
1149
        self.add_key_to_manager(('key4',), locations, old_block, manager)
1150
        action, last_byte, total_bytes = manager._check_rebuild_action()
1151
        self.assertEqual('rebuild', action)
1152
        manager._rebuild_block()
1153
        new_block = manager._block
1154
        self.assertIsNot(old_block, new_block)
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
1155
        # Because of the new max_bytes_to_index, we do a poor job of
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1156
        # rebuilding. This is a side-effect of the change, but at least it does
1157
        # show the setting had an effect.
1158
        self.assertTrue(old_block._content_length < new_block._content_length)
1159
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1160
    def test_check_is_well_utilized_all_keys(self):
1161
        block, manager = self.make_block_and_full_manager(self._texts)
1162
        self.assertFalse(manager.check_is_well_utilized())
1163
        # Though we can fake it by changing the recommended minimum size
1164
        manager._full_enough_block_size = block._content_length
1165
        self.assertTrue(manager.check_is_well_utilized())
1166
        # Setting it just above causes it to fail
1167
        manager._full_enough_block_size = block._content_length + 1
1168
        self.assertFalse(manager.check_is_well_utilized())
1169
        # Setting the mixed-block size doesn't do anything, because the content
1170
        # is considered to not be 'mixed'
1171
        manager._full_enough_mixed_block_size = block._content_length
1172
        self.assertFalse(manager.check_is_well_utilized())
1173
1174
    def test_check_is_well_utilized_mixed_keys(self):
1175
        texts = {}
1176
        f1k1 = ('f1', 'k1')
1177
        f1k2 = ('f1', 'k2')
1178
        f2k1 = ('f2', 'k1')
1179
        f2k2 = ('f2', 'k2')
1180
        texts[f1k1] = self._texts[('key1',)]
1181
        texts[f1k2] = self._texts[('key2',)]
1182
        texts[f2k1] = self._texts[('key3',)]
1183
        texts[f2k2] = self._texts[('key4',)]
1184
        block, manager = self.make_block_and_full_manager(texts)
1185
        self.assertFalse(manager.check_is_well_utilized())
1186
        manager._full_enough_block_size = block._content_length
1187
        self.assertTrue(manager.check_is_well_utilized())
1188
        manager._full_enough_block_size = block._content_length + 1
1189
        self.assertFalse(manager.check_is_well_utilized())
1190
        manager._full_enough_mixed_block_size = block._content_length
1191
        self.assertTrue(manager.check_is_well_utilized())
1192
1193
    def test_check_is_well_utilized_partial_use(self):
1194
        locations, block = self.make_block(self._texts)
1195
        manager = groupcompress._LazyGroupContentManager(block)
1196
        manager._full_enough_block_size = block._content_length
1197
        self.add_key_to_manager(('key1',), locations, block, manager)
1198
        self.add_key_to_manager(('key2',), locations, block, manager)
1199
        # Just using the content from key1 and 2 is not enough to be considered
1200
        # 'complete'
1201
        self.assertFalse(manager.check_is_well_utilized())
1202
        # However if we add key3, then we have enough, as we only require 75%
1203
        # consumption
1204
        self.add_key_to_manager(('key4',), locations, block, manager)
1205
        self.assertTrue(manager.check_is_well_utilized())
5365.4.1 by John Arbash Meinel
Find a case where we are wasting a bit of memory.
1206
1207
1208
class Test_GCBuildDetails(tests.TestCase):
1209
1210
    def test_acts_like_tuple(self):
1211
        # _GCBuildDetails inlines some of the data that used to be spread out
1212
        # across a bunch of tuples
1213
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1214
            ('INDEX', 10, 20, 0, 5))
1215
        self.assertEqual(4, len(bd))
1216
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1217
        self.assertEqual(None, bd[1]) # Compression Parent is always None
1218
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1219
        self.assertEqual(('group', None), bd[3]) # Record details
1220
1221
    def test__repr__(self):
1222
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1223
            ('INDEX', 10, 20, 0, 5))
1224
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1225
                         " (('parent1',), ('parent2',)))",
1226
                         repr(bd))
1227