/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5557.1.7 by John Arbash Meinel
Merge in the bzr.dev 5582
1
# Copyright (C) 2008-2011 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
16
17
"""Tests for group compression."""
18
19
import zlib
20
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
21
from bzrlib import (
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
22
    btree_index,
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
23
    config,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
24
    groupcompress,
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
25
    errors,
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
26
    index as _mod_index,
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
27
    osutils,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
28
    tests,
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
29
    trace,
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
30
    versionedfile,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
31
    )
0.23.58 by John Arbash Meinel
fix up the failing tests.
32
from bzrlib.osutils import sha_string
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
33
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
34
from bzrlib.tests.scenarios import load_tests_apply_scenarios
35
36
37
def group_compress_implementation_scenarios():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
38
    scenarios = [
39
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
40
        ]
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
41
    if compiled_groupcompress_feature.available():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
42
        scenarios.append(('C',
43
            {'compressor': groupcompress.PyrexGroupCompressor}))
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
44
    return scenarios
45
46
47
load_tests = load_tests_apply_scenarios
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
48
49
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
50
class TestGroupCompressor(tests.TestCase):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
51
52
    def _chunks_to_repr_lines(self, chunks):
53
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
54
55
    def assertEqualDiffEncoded(self, expected, actual):
56
        """Compare the actual content to the expected content.
57
58
        :param expected: A group of chunks that we expect to see
59
        :param actual: The measured 'chunks'
60
61
        We will transform the chunks back into lines, and then run 'repr()'
62
        over them to handle non-ascii characters.
63
        """
64
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
65
                             self._chunks_to_repr_lines(actual))
66
67
68
class TestAllGroupCompressors(TestGroupCompressor):
0.17.2 by Robert Collins
Core proof of concept working.
69
    """Tests for GroupCompressor"""
70
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
71
    scenarios = group_compress_implementation_scenarios()
72
    compressor = None # Set by scenario
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
73
0.17.2 by Robert Collins
Core proof of concept working.
74
    def test_empty_delta(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
75
        compressor = self.compressor()
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
76
        self.assertEqual([], compressor.chunks)
0.17.2 by Robert Collins
Core proof of concept working.
77
78
    def test_one_nosha_delta(self):
79
        # diff against NUKK
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
80
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
81
        sha1, start_point, end_point, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
82
            'strange\ncommon\n', None)
83
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
84
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
85
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
86
        self.assertEqual(0, start_point)
0.17.2 by Robert Collins
Core proof of concept working.
87
        self.assertEqual(sum(map(len, expected_lines)), end_point)
88
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
89
    def test_empty_content(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
90
        compressor = self.compressor()
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
91
        # Adding empty bytes should return the 'null' record
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
92
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
93
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
94
        self.assertEqual(0, start_point)
95
        self.assertEqual(0, end_point)
96
        self.assertEqual('fulltext', kind)
97
        self.assertEqual(groupcompress._null_sha1, sha1)
98
        self.assertEqual(0, compressor.endpoint)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
99
        self.assertEqual([], compressor.chunks)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
100
        # Even after adding some content
101
        compressor.compress(('content',), 'some\nbytes\n', None)
102
        self.assertTrue(compressor.endpoint > 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
103
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
104
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
105
        self.assertEqual(0, start_point)
106
        self.assertEqual(0, end_point)
107
        self.assertEqual('fulltext', kind)
108
        self.assertEqual(groupcompress._null_sha1, sha1)
109
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
110
    def test_extract_from_compressor(self):
111
        # Knit fetching will try to reconstruct texts locally which results in
112
        # reading something that is in the compressor stream already.
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
113
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
114
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
115
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
116
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
117
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
118
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
119
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
120
        self.assertEqual(('strange\ncommon long line\n'
121
                          'that needs a 16 byte match\n', sha1_1),
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
122
                         compressor.extract(('label',)))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
123
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
124
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
125
                          'different\n', sha1_2),
126
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
127
4241.17.2 by John Arbash Meinel
PythonGroupCompressor needs to support pop_last() properly.
128
    def test_pop_last(self):
129
        compressor = self.compressor()
130
        _, _, _, _ = compressor.compress(('key1',),
131
            'some text\nfor the first entry\n', None)
132
        expected_lines = list(compressor.chunks)
133
        _, _, _, _ = compressor.compress(('key2',),
134
            'some text\nfor the second entry\n', None)
135
        compressor.pop_last()
136
        self.assertEqual(expected_lines, compressor.chunks)
137
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
138
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
139
class TestPyrexGroupCompressor(TestGroupCompressor):
140
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
141
    _test_needs_features = [compiled_groupcompress_feature]
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
142
    compressor = groupcompress.PyrexGroupCompressor
143
144
    def test_stats(self):
145
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
146
        compressor.compress(('label',),
147
                            'strange\n'
148
                            'common very very long line\n'
149
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
150
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
151
                            'common very very long line\n'
152
                            'plus more text\n'
153
                            'different\n'
154
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
155
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
156
                            'new\n'
157
                            'common very very long line\n'
158
                            'plus more text\n'
159
                            'different\n'
160
                            'moredifferent\n', None)
161
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
162
163
    def test_two_nosha_delta(self):
164
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
165
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
166
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
167
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
168
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
169
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
170
        self.assertEqual(sha_string('common long line\n'
171
                                    'that needs a 16 byte match\n'
172
                                    'different\n'), sha1_2)
173
        expected_lines.extend([
174
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
175
            'd\x0f',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
176
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
177
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
178
            # copy the line common
179
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
180
            # add the line different, and the trailing newline
181
            '\x0adifferent\n', # insert 10 bytes
182
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
183
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
184
        self.assertEqual(sum(map(len, expected_lines)), end_point)
185
186
    def test_three_nosha_delta(self):
187
        # The first interesting test: make a change that should use lines from
188
        # both parents.
189
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
190
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
191
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
192
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
193
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
194
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
195
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
196
            'new\ncommon very very long line\nwith some extra text\n'
197
            'different\nmoredifferent\nand then some more\n',
198
            None)
199
        self.assertEqual(
200
            sha_string('new\ncommon very very long line\nwith some extra text\n'
201
                       'different\nmoredifferent\nand then some more\n'),
202
            sha1_3)
203
        expected_lines.extend([
204
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
205
            'd\x0b',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
206
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
207
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
208
            # insert new
209
            '\x03new',
210
            # Copy of first parent 'common' range
211
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
212
            # Copy of second parent 'different' range
213
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
214
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
215
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
216
        self.assertEqual(sum(map(len, expected_lines)), end_point)
217
218
219
class TestPythonGroupCompressor(TestGroupCompressor):
220
221
    compressor = groupcompress.PythonGroupCompressor
222
223
    def test_stats(self):
224
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
225
        compressor.compress(('label',),
226
                            'strange\n'
227
                            'common very very long line\n'
228
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
229
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
230
                            'common very very long line\n'
231
                            'plus more text\n'
232
                            'different\n'
233
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
234
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
235
                            'new\n'
236
                            'common very very long line\n'
237
                            'plus more text\n'
238
                            'different\n'
239
                            'moredifferent\n', None)
240
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
241
242
    def test_two_nosha_delta(self):
243
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
244
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
245
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
246
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
247
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
248
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
249
        self.assertEqual(sha_string('common long line\n'
250
                                    'that needs a 16 byte match\n'
251
                                    'different\n'), sha1_2)
252
        expected_lines.extend([
253
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
254
            'd\x0f',
255
            # target length
256
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
257
            # copy the line common
258
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
259
            # add the line different, and the trailing newline
260
            '\x0adifferent\n', # insert 10 bytes
261
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
262
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
263
        self.assertEqual(sum(map(len, expected_lines)), end_point)
264
265
    def test_three_nosha_delta(self):
266
        # The first interesting test: make a change that should use lines from
267
        # both parents.
268
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
269
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
270
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
271
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
272
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
273
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
274
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
275
            'new\ncommon very very long line\nwith some extra text\n'
276
            'different\nmoredifferent\nand then some more\n',
277
            None)
278
        self.assertEqual(
279
            sha_string('new\ncommon very very long line\nwith some extra text\n'
280
                       'different\nmoredifferent\nand then some more\n'),
281
            sha1_3)
282
        expected_lines.extend([
283
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
284
            'd\x0c',
285
            # target length
286
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
287
            # insert new
288
            '\x04new\n',
289
            # Copy of first parent 'common' range
290
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
291
            # Copy of second parent 'different' range
292
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
293
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
294
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
295
        self.assertEqual(sum(map(len, expected_lines)), end_point)
296
297
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
298
class TestGroupCompressBlock(tests.TestCase):
299
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
300
    def make_block(self, key_to_text):
301
        """Create a GroupCompressBlock, filling it with the given texts."""
302
        compressor = groupcompress.GroupCompressor()
303
        start = 0
304
        for key in sorted(key_to_text):
305
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
306
        locs = dict((key, (start, end)) for key, (start, _, end, _)
307
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
308
        block = compressor.flush()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
309
        raw_bytes = block.to_bytes()
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
310
        # Go through from_bytes(to_bytes()) so that we start with a compressed
311
        # content object
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
312
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
313
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
314
    def test_from_empty_bytes(self):
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
315
        self.assertRaises(ValueError,
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
316
                          groupcompress.GroupCompressBlock.from_bytes, '')
317
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
318
    def test_from_minimal_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
319
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.38.4 by John Arbash Meinel
Another disk format change.
320
            'gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
321
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
322
        self.assertIs(None, block._content)
323
        self.assertEqual('', block._z_content)
324
        block._ensure_content()
3735.32.5 by John Arbash Meinel
Change the parsing code to start out just holding the compressed bytes.
325
        self.assertEqual('', block._content)
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
326
        self.assertEqual('', block._z_content)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
327
        block._ensure_content() # Ensure content is safe to call 2x
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
328
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
329
    def test_from_invalid(self):
330
        self.assertRaises(ValueError,
331
                          groupcompress.GroupCompressBlock.from_bytes,
332
                          'this is not a valid header')
333
3735.38.4 by John Arbash Meinel
Another disk format change.
334
    def test_from_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
335
        content = ('a tiny bit of content\n')
336
        z_content = zlib.compress(content)
337
        z_bytes = (
338
            'gcb1z\n' # group compress block v1 plain
339
            '%d\n' # Length of compressed content
340
            '%d\n' # Length of uncompressed content
341
            '%s'   # Compressed content
3735.38.4 by John Arbash Meinel
Another disk format change.
342
            ) % (len(z_content), len(content), z_content)
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
343
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
344
            z_bytes)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
345
        self.assertEqual(z_content, block._z_content)
346
        self.assertIs(None, block._content)
3735.38.4 by John Arbash Meinel
Another disk format change.
347
        self.assertEqual(len(z_content), block._z_content_length)
348
        self.assertEqual(len(content), block._content_length)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
349
        block._ensure_content()
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
350
        self.assertEqual(z_content, block._z_content)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
351
        self.assertEqual(content, block._content)
352
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
353
    def test_to_chunks(self):
354
        content_chunks = ['this is some content\n',
355
                          'this content will be compressed\n']
356
        content_len = sum(map(len, content_chunks))
357
        content = ''.join(content_chunks)
358
        gcb = groupcompress.GroupCompressBlock()
359
        gcb.set_chunked_content(content_chunks, content_len)
360
        total_len, block_chunks = gcb.to_chunks()
361
        block_bytes = ''.join(block_chunks)
362
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
363
        self.assertEqual(total_len, len(block_bytes))
364
        self.assertEqual(gcb._content_length, content_len)
365
        expected_header =('gcb1z\n' # group compress block v1 zlib
366
                          '%d\n' # Length of compressed content
367
                          '%d\n' # Length of uncompressed content
368
                         ) % (gcb._z_content_length, gcb._content_length)
369
        # The first chunk should be the header chunk. It is small, fixed size,
370
        # and there is no compelling reason to split it up
371
        self.assertEqual(expected_header, block_chunks[0])
372
        self.assertStartsWith(block_bytes, expected_header)
373
        remaining_bytes = block_bytes[len(expected_header):]
374
        raw_bytes = zlib.decompress(remaining_bytes)
375
        self.assertEqual(content, raw_bytes)
376
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
377
    def test_to_bytes(self):
3735.38.4 by John Arbash Meinel
Another disk format change.
378
        content = ('this is some content\n'
379
                   'this content will be compressed\n')
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
380
        gcb = groupcompress.GroupCompressBlock()
3735.38.4 by John Arbash Meinel
Another disk format change.
381
        gcb.set_content(content)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
382
        bytes = gcb.to_bytes()
3735.38.4 by John Arbash Meinel
Another disk format change.
383
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
384
        self.assertEqual(gcb._content_length, len(content))
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
385
        expected_header =('gcb1z\n' # group compress block v1 zlib
3735.38.4 by John Arbash Meinel
Another disk format change.
386
                          '%d\n' # Length of compressed content
387
                          '%d\n' # Length of uncompressed content
388
                         ) % (gcb._z_content_length, gcb._content_length)
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
389
        self.assertStartsWith(bytes, expected_header)
390
        remaining_bytes = bytes[len(expected_header):]
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
391
        raw_bytes = zlib.decompress(remaining_bytes)
3735.38.4 by John Arbash Meinel
Another disk format change.
392
        self.assertEqual(content, raw_bytes)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
393
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
394
        # we should get the same results if using the chunked version
395
        gcb = groupcompress.GroupCompressBlock()
396
        gcb.set_chunked_content(['this is some content\n'
4469.1.2 by John Arbash Meinel
The only caller already knows the content length, so make the api such that
397
                                 'this content will be compressed\n'],
398
                                 len(content))
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
399
        old_bytes = bytes
400
        bytes = gcb.to_bytes()
401
        self.assertEqual(old_bytes, bytes)
402
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
403
    def test_partial_decomp(self):
404
        content_chunks = []
405
        # We need a sufficient amount of data so that zlib.decompress has
406
        # partial decompression to work with. Most auto-generated data
407
        # compresses a bit too well, we want a combination, so we combine a sha
408
        # hash with compressible data.
409
        for i in xrange(2048):
410
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
411
            content_chunks.append(next_content)
412
            next_sha1 = osutils.sha_string(next_content)
413
            content_chunks.append(next_sha1 + '\n')
414
        content = ''.join(content_chunks)
415
        self.assertEqual(158634, len(content))
416
        z_content = zlib.compress(content)
417
        self.assertEqual(57182, len(z_content))
418
        block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
419
        block._z_content_chunks = (z_content,)
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
420
        block._z_content_length = len(z_content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
421
        block._compressor_name = 'zlib'
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
422
        block._content_length = 158634
423
        self.assertIs(None, block._content)
424
        block._ensure_content(100)
425
        self.assertIsNot(None, block._content)
426
        # We have decompressed at least 100 bytes
427
        self.assertTrue(len(block._content) >= 100)
428
        # We have not decompressed the whole content
429
        self.assertTrue(len(block._content) < 158634)
430
        self.assertEqualDiff(content[:len(block._content)], block._content)
431
        # ensuring content that we already have shouldn't cause any more data
432
        # to be extracted
433
        cur_len = len(block._content)
434
        block._ensure_content(cur_len - 10)
435
        self.assertEqual(cur_len, len(block._content))
436
        # Now we want a bit more content
437
        cur_len += 10
438
        block._ensure_content(cur_len)
439
        self.assertTrue(len(block._content) >= cur_len)
440
        self.assertTrue(len(block._content) < 158634)
441
        self.assertEqualDiff(content[:len(block._content)], block._content)
442
        # And now lets finish
443
        block._ensure_content(158634)
444
        self.assertEqualDiff(content, block._content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
445
        # And the decompressor is finalized
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
446
        self.assertIs(None, block._z_content_decompressor)
447
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
448
    def test__ensure_all_content(self):
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
449
        content_chunks = []
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
450
        # We need a sufficient amount of data so that zlib.decompress has
451
        # partial decompression to work with. Most auto-generated data
452
        # compresses a bit too well, we want a combination, so we combine a sha
453
        # hash with compressible data.
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
454
        for i in xrange(2048):
455
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
456
            content_chunks.append(next_content)
457
            next_sha1 = osutils.sha_string(next_content)
458
            content_chunks.append(next_sha1 + '\n')
459
        content = ''.join(content_chunks)
460
        self.assertEqual(158634, len(content))
461
        z_content = zlib.compress(content)
462
        self.assertEqual(57182, len(z_content))
463
        block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
464
        block._z_content_chunks = (z_content,)
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
465
        block._z_content_length = len(z_content)
466
        block._compressor_name = 'zlib'
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
467
        block._content_length = 158634
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
468
        self.assertIs(None, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
469
        # The first _ensure_content got all of the required data
470
        block._ensure_content(158634)
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
471
        self.assertEqualDiff(content, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
472
        # And we should have released the _z_content_decompressor since it was
473
        # fully consumed
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
474
        self.assertIs(None, block._z_content_decompressor)
475
4300.1.1 by John Arbash Meinel
Add the ability to convert a gc block into 'human readable' form.
476
    def test__dump(self):
477
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
478
        key_to_text = {('1',): dup_content + '1 unique\n',
479
                       ('2',): dup_content + '2 extra special\n'}
480
        locs, block = self.make_block(key_to_text)
481
        self.assertEqual([('f', len(key_to_text[('1',)])),
482
                          ('d', 21, len(key_to_text[('2',)]),
483
                           [('c', 2, len(dup_content)),
484
                            ('i', len('2 extra special\n'), '')
485
                           ]),
486
                         ], block._dump())
487
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
488
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
489
class TestCaseWithGroupCompressVersionedFiles(
490
        tests.TestCaseWithMemoryTransport):
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
491
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
492
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
493
                     dir='.', inconsistency_fatal=True):
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
494
        t = self.get_transport(dir)
495
        t.ensure_base()
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
496
        vf = groupcompress.make_pack_factory(graph=create_graph,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
497
            delta=False, keylength=keylength,
498
            inconsistency_fatal=inconsistency_fatal)(t)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
499
        if do_cleanup:
500
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
501
        return vf
502
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
503
504
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
505
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
506
    def make_g_index(self, name, ref_lists=0, nodes=[]):
507
        builder = btree_index.BTreeBuilder(ref_lists)
508
        for node, references, value in nodes:
509
            builder.add_node(node, references, value)
510
        stream = builder.finish()
511
        trans = self.get_transport()
512
        size = trans.put_file(name, stream)
513
        return btree_index.BTreeGraphIndex(trans, name, size)
514
515
    def make_g_index_missing_parent(self):
516
        graph_index = self.make_g_index('missing_parent', 1,
517
            [(('parent', ), '2 78 2 10', ([],)),
518
             (('tip', ), '2 78 2 10',
519
              ([('parent', ), ('missing-parent', )],)),
520
              ])
521
        return graph_index
522
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
523
    def test_get_record_stream_as_requested(self):
524
        # Consider promoting 'as-requested' to general availability, and
525
        # make this a VF interface test
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
526
        vf = self.make_test_vf(False, dir='source')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
527
        vf.add_lines(('a',), (), ['lines\n'])
528
        vf.add_lines(('b',), (), ['lines\n'])
529
        vf.add_lines(('c',), (), ['lines\n'])
530
        vf.add_lines(('d',), (), ['lines\n'])
531
        vf.writer.end()
532
        keys = [record.key for record in vf.get_record_stream(
533
                    [('a',), ('b',), ('c',), ('d',)],
534
                    'as-requested', False)]
535
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
536
        keys = [record.key for record in vf.get_record_stream(
537
                    [('b',), ('a',), ('d',), ('c',)],
538
                    'as-requested', False)]
539
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
540
541
        # It should work even after being repacked into another VF
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
542
        vf2 = self.make_test_vf(False, dir='target')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
543
        vf2.insert_record_stream(vf.get_record_stream(
544
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
545
        vf2.writer.end()
546
547
        keys = [record.key for record in vf2.get_record_stream(
548
                    [('a',), ('b',), ('c',), ('d',)],
549
                    'as-requested', False)]
550
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
551
        keys = [record.key for record in vf2.get_record_stream(
552
                    [('b',), ('a',), ('d',), ('c',)],
553
                    'as-requested', False)]
554
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
555
5755.2.6 by John Arbash Meinel
Test that the record stream has the correct values set.
556
    def test_get_record_stream_max_entries_per_source_default(self):
557
        vf = self.make_test_vf(True, dir='source')
558
        vf.add_lines(('a',), (), ['lines\n'])
559
        vf.writer.end()
560
        record = vf.get_record_stream([('a',)], 'unordered', True).next()
561
        self.assertEqual(vf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
562
                         record._manager._get_max_entries_per_source())
563
564
    def test_get_record_stream_accesses_max_entries_per_source_default(self):
565
        vf = self.make_test_vf(True, dir='source')
566
        vf.add_lines(('a',), (), ['lines\n'])
567
        vf.writer.end()
568
        vf._max_entries_per_source = 1234
569
        record = vf.get_record_stream([('a',)], 'unordered', True).next()
570
        self.assertEqual(1234, record._manager._get_max_entries_per_source())
571
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
572
    def test_insert_record_stream_reuses_blocks(self):
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
573
        vf = self.make_test_vf(True, dir='source')
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
574
        def grouped_stream(revision_ids, first_parents=()):
575
            parents = first_parents
576
            for revision_id in revision_ids:
577
                key = (revision_id,)
578
                record = versionedfile.FulltextContentFactory(
579
                    key, parents, None,
580
                    'some content that is\n'
581
                    'identical except for\n'
582
                    'revision_id:%s\n' % (revision_id,))
583
                yield record
584
                parents = (key,)
585
        # One group, a-d
586
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
587
        # Second group, e-h
588
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
589
                                               first_parents=(('d',),)))
590
        block_bytes = {}
591
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
592
                                      'unordered', False)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
593
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
594
        for record in stream:
595
            if record.key in [('a',), ('e',)]:
596
                self.assertEqual('groupcompress-block', record.storage_kind)
597
            else:
598
                self.assertEqual('groupcompress-block-ref',
599
                                 record.storage_kind)
600
            block_bytes[record.key] = record._manager._block._z_content
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
601
            num_records += 1
602
        self.assertEqual(8, num_records)
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
603
        for r in 'abcd':
604
            key = (r,)
605
            self.assertIs(block_bytes[key], block_bytes[('a',)])
606
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
607
        for r in 'efgh':
608
            key = (r,)
609
            self.assertIs(block_bytes[key], block_bytes[('e',)])
610
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
611
        # Now copy the blocks into another vf, and ensure that the blocks are
612
        # preserved without creating new entries
613
        vf2 = self.make_test_vf(True, dir='target')
614
        # ordering in 'groupcompress' order, should actually swap the groups in
615
        # the target vf, but the groups themselves should not be disturbed.
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
616
        def small_size_stream():
617
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
618
                                               'groupcompress', False):
619
                record._manager._full_enough_block_size = \
620
                    record._manager._block._content_length
621
                yield record
622
                        
623
        vf2.insert_record_stream(small_size_stream())
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
624
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
625
                                       'groupcompress', False)
626
        vf2.writer.end()
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
627
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
628
        for record in stream:
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
629
            num_records += 1
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
630
            self.assertEqual(block_bytes[record.key],
631
                             record._manager._block._z_content)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
632
        self.assertEqual(8, num_records)
633
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
634
    def test_insert_record_stream_packs_on_the_fly(self):
635
        vf = self.make_test_vf(True, dir='source')
636
        def grouped_stream(revision_ids, first_parents=()):
637
            parents = first_parents
638
            for revision_id in revision_ids:
639
                key = (revision_id,)
640
                record = versionedfile.FulltextContentFactory(
641
                    key, parents, None,
642
                    'some content that is\n'
643
                    'identical except for\n'
644
                    'revision_id:%s\n' % (revision_id,))
645
                yield record
646
                parents = (key,)
647
        # One group, a-d
648
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
649
        # Second group, e-h
650
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
651
                                               first_parents=(('d',),)))
652
        # Now copy the blocks into another vf, and see that the
653
        # insert_record_stream rebuilt a new block on-the-fly because of
654
        # under-utilization
655
        vf2 = self.make_test_vf(True, dir='target')
656
        vf2.insert_record_stream(vf.get_record_stream(
657
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
658
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
659
                                       'groupcompress', False)
660
        vf2.writer.end()
661
        num_records = 0
662
        # All of the records should be recombined into a single block
663
        block = None
664
        for record in stream:
665
            num_records += 1
666
            if block is None:
667
                block = record._manager._block
668
            else:
669
                self.assertIs(block, record._manager._block)
670
        self.assertEqual(8, num_records)
671
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
672
    def test__insert_record_stream_no_reuse_block(self):
673
        vf = self.make_test_vf(True, dir='source')
674
        def grouped_stream(revision_ids, first_parents=()):
675
            parents = first_parents
676
            for revision_id in revision_ids:
677
                key = (revision_id,)
678
                record = versionedfile.FulltextContentFactory(
679
                    key, parents, None,
680
                    'some content that is\n'
681
                    'identical except for\n'
682
                    'revision_id:%s\n' % (revision_id,))
683
                yield record
684
                parents = (key,)
685
        # One group, a-d
686
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
687
        # Second group, e-h
688
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
689
                                               first_parents=(('d',),)))
690
        vf.writer.end()
691
        self.assertEqual(8, len(list(vf.get_record_stream(
692
                                        [(r,) for r in 'abcdefgh'],
693
                                        'unordered', False))))
694
        # Now copy the blocks into another vf, and ensure that the blocks are
695
        # preserved without creating new entries
696
        vf2 = self.make_test_vf(True, dir='target')
697
        # ordering in 'groupcompress' order, should actually swap the groups in
698
        # the target vf, but the groups themselves should not be disturbed.
699
        list(vf2._insert_record_stream(vf.get_record_stream(
700
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
701
            reuse_blocks=False))
702
        vf2.writer.end()
703
        # After inserting with reuse_blocks=False, we should have everything in
704
        # a single new block.
705
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
706
                                       'groupcompress', False)
707
        block = None
708
        for record in stream:
709
            if block is None:
710
                block = record._manager._block
711
            else:
712
                self.assertIs(block, record._manager._block)
713
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
714
    def test_add_missing_noncompression_parent_unvalidated_index(self):
715
        unvalidated = self.make_g_index_missing_parent()
716
        combined = _mod_index.CombinedGraphIndex([unvalidated])
717
        index = groupcompress._GCGraphIndex(combined,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
718
            is_locked=lambda: True, parents=True,
719
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
720
        index.scan_unvalidated_index(unvalidated)
721
        self.assertEqual(
722
            frozenset([('missing-parent',)]), index.get_missing_parents())
723
724
    def test_track_external_parent_refs(self):
725
        g_index = self.make_g_index('empty', 1, [])
726
        mod_index = btree_index.BTreeBuilder(1, 1)
727
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
728
        index = groupcompress._GCGraphIndex(combined,
729
            is_locked=lambda: True, parents=True,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
730
            add_callback=mod_index.add_nodes,
731
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
732
        index.add_records([
733
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
734
        self.assertEqual(
735
            frozenset([('parent-1',), ('parent-2',)]),
736
            index.get_missing_parents())
737
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
738
    def make_source_with_b(self, a_parent, path):
739
        source = self.make_test_vf(True, dir=path)
740
        source.add_lines(('a',), (), ['lines\n'])
741
        if a_parent:
742
            b_parents = (('a',),)
743
        else:
744
            b_parents = ()
745
        source.add_lines(('b',), b_parents, ['lines\n'])
746
        return source
747
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
748
    def do_inconsistent_inserts(self, inconsistency_fatal):
749
        target = self.make_test_vf(True, dir='target',
750
                                   inconsistency_fatal=inconsistency_fatal)
751
        for x in range(2):
752
            source = self.make_source_with_b(x==1, 'source%s' % x)
753
            target.insert_record_stream(source.get_record_stream(
754
                [('b',)], 'unordered', False))
755
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
756
    def test_inconsistent_redundant_inserts_warn(self):
4465.2.2 by Aaron Bentley
Add test that duplicates are skipped.
757
        """Should not insert a record that is already present."""
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
758
        warnings = []
759
        def warning(template, args):
760
            warnings.append(template % args)
761
        _trace_warning = trace.warning
762
        trace.warning = warning
763
        try:
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
764
            self.do_inconsistent_inserts(inconsistency_fatal=False)
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
765
        finally:
766
            trace.warning = _trace_warning
767
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
768
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
769
                         warnings)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
770
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
771
    def test_inconsistent_redundant_inserts_raises(self):
772
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
773
                              inconsistency_fatal=True)
774
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
775
                              " in add_records:"
776
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
777
                              " 0 8', \(\(\('a',\),\),\)\)")
778
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
779
    def test_clear_cache(self):
780
        vf = self.make_source_with_b(True, 'source')
781
        vf.writer.end()
782
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
783
                                           True):
784
            pass
785
        self.assertTrue(len(vf._group_cache) > 0)
786
        vf.clear_cache()
787
        self.assertEqual(0, len(vf._group_cache))
788
789
5755.2.4 by John Arbash Meinel
Expose the max_entries_per_source into GroupCompressVersionedFiles
790
class TestGroupCompressConfig(tests.TestCaseWithTransport):
791
792
    def make_test_vf(self):
793
        t = self.get_transport('.')
794
        t.ensure_base()
795
        factory = groupcompress.make_pack_factory(graph=True,
796
            delta=False, keylength=1, inconsistency_fatal=True)
797
        vf = factory(t)
798
        self.addCleanup(groupcompress.cleanup_pack_group, vf)
799
        return vf
800
801
    def test_max_entries_per_source_default(self):
802
        vf = self.make_test_vf()
803
        gc = vf._make_group_compressor()
804
        self.assertEqual(vf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
805
                         vf._max_entries_per_source)
806
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
807
            self.assertEqual(vf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
808
                             gc._delta_index._max_entries_per_source)
809
810
    def test_max_entries_per_source_in_config(self):
811
        c = config.GlobalConfig()
812
        c.set_user_option('bzr.groupcompress.max_entries_per_source', '10000')
813
        vf = self.make_test_vf()
814
        gc = vf._make_group_compressor()
815
        self.assertEqual(10000, vf._max_entries_per_source)
816
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
817
            self.assertEqual(10000, gc._delta_index._max_entries_per_source)
818
819
    def test_max_entries_per_source_bad_config(self):
820
        c = config.GlobalConfig()
821
        c.set_user_option('bzr.groupcompress.max_entries_per_source', 'boogah')
822
        vf = self.make_test_vf()
823
        # TODO: This is triggering a warning, we might want to trap and make
824
        #       sure it is readable.
825
        gc = vf._make_group_compressor()
826
        self.assertEqual(vf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
827
                         vf._max_entries_per_source)
828
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
829
            self.assertEqual(vf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
830
                             gc._delta_index._max_entries_per_source)
831
832
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
833
class StubGCVF(object):
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
834
    def __init__(self, canned_get_blocks=None):
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
835
        self._group_cache = {}
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
836
        self._canned_get_blocks = canned_get_blocks or []
837
    def _get_blocks(self, read_memos):
838
        return iter(self._canned_get_blocks)
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
839
    
840
841
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
842
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
843
    
844
    def test_add_key_new_read_memo(self):
845
        """Adding a key with an uncached read_memo new to this batch adds that
846
        read_memo to the list of memos to fetch.
847
        """
848
        # locations are: index_memo, ignored, parents, ignored
849
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
850
        # and (idx, offset, size) is known as the 'read_memo', identifying the
851
        # raw bytes needed.
852
        read_memo = ('fake index', 100, 50)
853
        locations = {
854
            ('key',): (read_memo + (None, None), None, None, None)}
855
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
856
        total_size = batcher.add_key(('key',))
857
        self.assertEqual(50, total_size)
858
        self.assertEqual([('key',)], batcher.keys)
859
        self.assertEqual([read_memo], batcher.memos_to_get)
860
861
    def test_add_key_duplicate_read_memo(self):
862
        """read_memos that occur multiple times in a batch will only be fetched
863
        once.
864
        """
865
        read_memo = ('fake index', 100, 50)
866
        # Two keys, both sharing the same read memo (but different overall
867
        # index_memos).
868
        locations = {
869
            ('key1',): (read_memo + (0, 1), None, None, None),
870
            ('key2',): (read_memo + (1, 2), None, None, None)}
871
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
872
        total_size = batcher.add_key(('key1',))
873
        total_size = batcher.add_key(('key2',))
874
        self.assertEqual(50, total_size)
875
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
876
        self.assertEqual([read_memo], batcher.memos_to_get)
877
878
    def test_add_key_cached_read_memo(self):
879
        """Adding a key with a cached read_memo will not cause that read_memo
880
        to be added to the list to fetch.
881
        """
882
        read_memo = ('fake index', 100, 50)
883
        gcvf = StubGCVF()
884
        gcvf._group_cache[read_memo] = 'fake block'
885
        locations = {
886
            ('key',): (read_memo + (None, None), None, None, None)}
887
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
888
        total_size = batcher.add_key(('key',))
889
        self.assertEqual(0, total_size)
890
        self.assertEqual([('key',)], batcher.keys)
891
        self.assertEqual([], batcher.memos_to_get)
892
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
893
    def test_yield_factories_empty(self):
894
        """An empty batch yields no factories."""
895
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
896
        self.assertEqual([], list(batcher.yield_factories()))
897
898
    def test_yield_factories_calls_get_blocks(self):
4634.3.22 by Andrew Bennetts
Fix docstring.
899
        """Uncached memos are retrieved via get_blocks."""
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
900
        read_memo1 = ('fake index', 100, 50)
901
        read_memo2 = ('fake index', 150, 40)
902
        gcvf = StubGCVF(
903
            canned_get_blocks=[
904
                (read_memo1, groupcompress.GroupCompressBlock()),
905
                (read_memo2, groupcompress.GroupCompressBlock())])
906
        locations = {
907
            ('key1',): (read_memo1 + (None, None), None, None, None),
908
            ('key2',): (read_memo2 + (None, None), None, None, None)}
909
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
910
        batcher.add_key(('key1',))
911
        batcher.add_key(('key2',))
912
        factories = list(batcher.yield_factories(full_flush=True))
913
        self.assertLength(2, factories)
914
        keys = [f.key for f in factories]
915
        kinds = [f.storage_kind for f in factories]
916
        self.assertEqual([('key1',), ('key2',)], keys)
917
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
918
919
    def test_yield_factories_flushing(self):
920
        """yield_factories holds back on yielding results from the final block
921
        unless passed full_flush=True.
922
        """
923
        fake_block = groupcompress.GroupCompressBlock()
924
        read_memo = ('fake index', 100, 50)
925
        gcvf = StubGCVF()
926
        gcvf._group_cache[read_memo] = fake_block
927
        locations = {
928
            ('key',): (read_memo + (None, None), None, None, None)}
929
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
930
        batcher.add_key(('key',))
931
        self.assertEqual([], list(batcher.yield_factories()))
932
        factories = list(batcher.yield_factories(full_flush=True))
933
        self.assertLength(1, factories)
934
        self.assertEqual(('key',), factories[0].key)
935
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
936
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
937
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
938
class TestLazyGroupCompress(tests.TestCaseWithTransport):
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
939
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
940
    _texts = {
941
        ('key1',): "this is a text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
942
                   "with a reasonable amount of compressible bytes\n"
943
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
944
        ('key2',): "another text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
945
                   "with a reasonable amount of compressible bytes\n"
946
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
947
        ('key3',): "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
948
                   "with a reasonable amount of compressible bytes\n"
949
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
950
        ('key4',): "this will be extracted\n"
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
951
                   "but references most of its bytes from\n"
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
952
                   "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
953
                   "with a reasonable amount of compressible bytes\n"
954
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
955
    }
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
956
    def make_block(self, key_to_text):
957
        """Create a GroupCompressBlock, filling it with the given texts."""
958
        compressor = groupcompress.GroupCompressor()
959
        start = 0
960
        for key in sorted(key_to_text):
961
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
962
        locs = dict((key, (start, end)) for key, (start, _, end, _)
963
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
964
        block = compressor.flush()
965
        raw_bytes = block.to_bytes()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
966
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
967
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
968
    def add_key_to_manager(self, key, locations, block, manager):
969
        start, end = locations[key]
970
        manager.add_factory(key, (), start, end)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
971
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
972
    def make_block_and_full_manager(self, texts):
973
        locations, block = self.make_block(texts)
974
        manager = groupcompress._LazyGroupContentManager(block)
975
        for key in sorted(texts):
976
            self.add_key_to_manager(key, locations, block, manager)
977
        return block, manager
978
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
979
    def test_get_fulltexts(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
980
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
981
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
982
        self.add_key_to_manager(('key1',), locations, block, manager)
983
        self.add_key_to_manager(('key2',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
984
        result_order = []
985
        for record in manager.get_record_stream():
986
            result_order.append(record.key)
987
            text = self._texts[record.key]
988
            self.assertEqual(text, record.get_bytes_as('fulltext'))
989
        self.assertEqual([('key1',), ('key2',)], result_order)
990
991
        # If we build the manager in the opposite order, we should get them
992
        # back in the opposite order
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
993
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
994
        self.add_key_to_manager(('key2',), locations, block, manager)
995
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
996
        result_order = []
997
        for record in manager.get_record_stream():
998
            result_order.append(record.key)
999
            text = self._texts[record.key]
1000
            self.assertEqual(text, record.get_bytes_as('fulltext'))
1001
        self.assertEqual([('key2',), ('key1',)], result_order)
1002
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1003
    def test__wire_bytes_no_keys(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1004
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1005
        manager = groupcompress._LazyGroupContentManager(block)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1006
        wire_bytes = manager._wire_bytes()
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1007
        block_length = len(block.to_bytes())
3735.32.24 by John Arbash Meinel
_wire_bytes() now strips groups as necessary, as does _insert_record_stream
1008
        # We should have triggered a strip, since we aren't using any content
1009
        stripped_block = manager._block.to_bytes()
1010
        self.assertTrue(block_length > len(stripped_block))
1011
        empty_z_header = zlib.compress('')
1012
        self.assertEqual('groupcompress-block\n'
1013
                         '8\n' # len(compress(''))
1014
                         '0\n' # len('')
1015
                         '%d\n'# compressed block len
1016
                         '%s'  # zheader
1017
                         '%s'  # block
1018
                         % (len(stripped_block), empty_z_header,
1019
                            stripped_block),
1020
                         wire_bytes)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1021
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
1022
    def test__wire_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1023
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1024
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1025
        self.add_key_to_manager(('key1',), locations, block, manager)
1026
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1027
        block_bytes = block.to_bytes()
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1028
        wire_bytes = manager._wire_bytes()
1029
        (storage_kind, z_header_len, header_len,
1030
         block_len, rest) = wire_bytes.split('\n', 4)
1031
        z_header_len = int(z_header_len)
1032
        header_len = int(header_len)
1033
        block_len = int(block_len)
1034
        self.assertEqual('groupcompress-block', storage_kind)
4665.3.8 by John Arbash Meinel
Of course, when you change the content, it can effect the stored wire bytes slightly.
1035
        self.assertEqual(34, z_header_len)
1036
        self.assertEqual(26, header_len)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1037
        self.assertEqual(len(block_bytes), block_len)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1038
        z_header = rest[:z_header_len]
1039
        header = zlib.decompress(z_header)
1040
        self.assertEqual(header_len, len(header))
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1041
        entry1 = locations[('key1',)]
1042
        entry4 = locations[('key4',)]
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1043
        self.assertEqualDiff('key1\n'
1044
                             '\n'  # no parents
1045
                             '%d\n' # start offset
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1046
                             '%d\n' # end offset
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1047
                             'key4\n'
1048
                             '\n'
1049
                             '%d\n'
1050
                             '%d\n'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1051
                             % (entry1[0], entry1[1],
1052
                                entry4[0], entry4[1]),
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
1053
                            header)
1054
        z_block = rest[z_header_len:]
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1055
        self.assertEqual(block_bytes, z_block)
1056
1057
    def test_from_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1058
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1059
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1060
        self.add_key_to_manager(('key1',), locations, block, manager)
1061
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1062
        wire_bytes = manager._wire_bytes()
1063
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
3735.32.18 by John Arbash Meinel
We now support generating a network stream.
1064
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1065
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1066
        self.assertEqual(2, len(manager._factories))
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1067
        self.assertEqual(block._z_content, manager._block._z_content)
1068
        result_order = []
1069
        for record in manager.get_record_stream():
1070
            result_order.append(record.key)
1071
            text = self._texts[record.key]
1072
            self.assertEqual(text, record.get_bytes_as('fulltext'))
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1073
        self.assertEqual([('key1',), ('key4',)], result_order)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1074
1075
    def test__check_rebuild_no_changes(self):
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1076
        block, manager = self.make_block_and_full_manager(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1077
        manager._check_rebuild_block()
1078
        self.assertIs(block, manager._block)
1079
1080
    def test__check_rebuild_only_one(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1081
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1082
        manager = groupcompress._LazyGroupContentManager(block)
1083
        # Request just the first key, which should trigger a 'strip' action
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1084
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1085
        manager._check_rebuild_block()
1086
        self.assertIsNot(block, manager._block)
1087
        self.assertTrue(block._content_length > manager._block._content_length)
1088
        # We should be able to still get the content out of this block, though
1089
        # it should only have 1 entry
1090
        for record in manager.get_record_stream():
1091
            self.assertEqual(('key1',), record.key)
1092
            self.assertEqual(self._texts[record.key],
1093
                             record.get_bytes_as('fulltext'))
1094
1095
    def test__check_rebuild_middle(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1096
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1097
        manager = groupcompress._LazyGroupContentManager(block)
1098
        # Request a small key in the middle should trigger a 'rebuild'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1099
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1100
        manager._check_rebuild_block()
1101
        self.assertIsNot(block, manager._block)
1102
        self.assertTrue(block._content_length > manager._block._content_length)
1103
        for record in manager.get_record_stream():
1104
            self.assertEqual(('key4',), record.key)
1105
            self.assertEqual(self._texts[record.key],
1106
                             record.get_bytes_as('fulltext'))
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1107
5755.2.5 by John Arbash Meinel
Expose the setting up the stack.
1108
    def test_manager_default_max_entries_per_source(self):
1109
        locations, old_block = self.make_block(self._texts)
1110
        manager = groupcompress._LazyGroupContentManager(old_block)
1111
        gcvf = groupcompress.GroupCompressVersionedFiles
1112
        # It doesn't greedily evaluate _max_entries_per_source
1113
        self.assertIs(None, manager._max_entries_per_source)
1114
        self.assertEqual(gcvf._DEFAULT_MAX_ENTRIES_PER_SOURCE,
1115
                         manager._get_max_entries_per_source())
1116
1117
    def test_manager_custom_max_entries_per_source(self):
1118
        locations, old_block = self.make_block(self._texts)
1119
        called = []
1120
        def max_entries():
1121
            called.append('called')
1122
            return 10
1123
        manager = groupcompress._LazyGroupContentManager(old_block,
1124
            get_max_entries_per_source=max_entries)
1125
        gcvf = groupcompress.GroupCompressVersionedFiles
1126
        # It doesn't greedily evaluate _max_entries_per_source
1127
        self.assertIs(None, manager._max_entries_per_source)
1128
        self.assertEqual(10, manager._get_max_entries_per_source())
1129
        self.assertEqual(10, manager._get_max_entries_per_source())
1130
        self.assertEqual(10, manager._max_entries_per_source)
1131
        # Only called 1 time
1132
        self.assertEqual(['called'], called)
1133
1134
    def test__rebuild_handles_max_entries_per_source(self):
1135
        locations, old_block = self.make_block(self._texts)
1136
        manager = groupcompress._LazyGroupContentManager(old_block,
1137
            get_max_entries_per_source=lambda: 2)
1138
        gc = manager._make_group_compressor()
1139
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
1140
            self.assertEqual(2, gc._delta_index._max_entries_per_source)
1141
        self.add_key_to_manager(('key3',), locations, old_block, manager)
1142
        self.add_key_to_manager(('key4',), locations, old_block, manager)
1143
        action, last_byte, total_bytes = manager._check_rebuild_action()
1144
        self.assertEqual('rebuild', action)
1145
        manager._rebuild_block()
1146
        new_block = manager._block
1147
        self.assertIsNot(old_block, new_block)
1148
        # Because of the new max_entries_per_source, we do a poor job of
1149
        # rebuilding. This is a side-effect of the change, but at least it does
1150
        # show the setting had an effect.
1151
        self.assertTrue(old_block._content_length < new_block._content_length)
1152
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1153
    def test_check_is_well_utilized_all_keys(self):
1154
        block, manager = self.make_block_and_full_manager(self._texts)
1155
        self.assertFalse(manager.check_is_well_utilized())
1156
        # Though we can fake it by changing the recommended minimum size
1157
        manager._full_enough_block_size = block._content_length
1158
        self.assertTrue(manager.check_is_well_utilized())
1159
        # Setting it just above causes it to fail
1160
        manager._full_enough_block_size = block._content_length + 1
1161
        self.assertFalse(manager.check_is_well_utilized())
1162
        # Setting the mixed-block size doesn't do anything, because the content
1163
        # is considered to not be 'mixed'
1164
        manager._full_enough_mixed_block_size = block._content_length
1165
        self.assertFalse(manager.check_is_well_utilized())
1166
1167
    def test_check_is_well_utilized_mixed_keys(self):
1168
        texts = {}
1169
        f1k1 = ('f1', 'k1')
1170
        f1k2 = ('f1', 'k2')
1171
        f2k1 = ('f2', 'k1')
1172
        f2k2 = ('f2', 'k2')
1173
        texts[f1k1] = self._texts[('key1',)]
1174
        texts[f1k2] = self._texts[('key2',)]
1175
        texts[f2k1] = self._texts[('key3',)]
1176
        texts[f2k2] = self._texts[('key4',)]
1177
        block, manager = self.make_block_and_full_manager(texts)
1178
        self.assertFalse(manager.check_is_well_utilized())
1179
        manager._full_enough_block_size = block._content_length
1180
        self.assertTrue(manager.check_is_well_utilized())
1181
        manager._full_enough_block_size = block._content_length + 1
1182
        self.assertFalse(manager.check_is_well_utilized())
1183
        manager._full_enough_mixed_block_size = block._content_length
1184
        self.assertTrue(manager.check_is_well_utilized())
1185
1186
    def test_check_is_well_utilized_partial_use(self):
1187
        locations, block = self.make_block(self._texts)
1188
        manager = groupcompress._LazyGroupContentManager(block)
1189
        manager._full_enough_block_size = block._content_length
1190
        self.add_key_to_manager(('key1',), locations, block, manager)
1191
        self.add_key_to_manager(('key2',), locations, block, manager)
1192
        # Just using the content from key1 and 2 is not enough to be considered
1193
        # 'complete'
1194
        self.assertFalse(manager.check_is_well_utilized())
1195
        # However if we add key3, then we have enough, as we only require 75%
1196
        # consumption
1197
        self.add_key_to_manager(('key4',), locations, block, manager)
1198
        self.assertTrue(manager.check_is_well_utilized())
5365.4.1 by John Arbash Meinel
Find a case where we are wasting a bit of memory.
1199
1200
1201
class Test_GCBuildDetails(tests.TestCase):
1202
1203
    def test_acts_like_tuple(self):
1204
        # _GCBuildDetails inlines some of the data that used to be spread out
1205
        # across a bunch of tuples
1206
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1207
            ('INDEX', 10, 20, 0, 5))
1208
        self.assertEqual(4, len(bd))
1209
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1210
        self.assertEqual(None, bd[1]) # Compression Parent is always None
1211
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1212
        self.assertEqual(('group', None), bd[3]) # Record details
1213
1214
    def test__repr__(self):
1215
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1216
            ('INDEX', 10, 20, 0, 5))
1217
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1218
                         " (('parent1',), ('parent2',)))",
1219
                         repr(bd))
1220