bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
1  | 
# Copyright (C) 2008, 2009 Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
|
16  | 
||
17  | 
"""Tests for group compression."""
 | 
|
18  | 
||
19  | 
import zlib  | 
|
20  | 
||
21  | 
from bzrlib import (  | 
|
22  | 
groupcompress,  | 
|
23  | 
errors,  | 
|
24  | 
osutils,  | 
|
25  | 
tests,  | 
|
26  | 
versionedfile,  | 
|
27  | 
    )
 | 
|
28  | 
from bzrlib.osutils import sha_string  | 
|
29  | 
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature  | 
|
30  | 
||
31  | 
||
32  | 
def load_tests(standard_tests, module, loader):  | 
|
33  | 
"""Parameterize tests for all versions of groupcompress."""  | 
|
34  | 
to_adapt, result = tests.split_suite_by_condition(  | 
|
35  | 
standard_tests, tests.condition_isinstance(TestAllGroupCompressors))  | 
|
36  | 
scenarios = [  | 
|
37  | 
('python', {'compressor': groupcompress.PythonGroupCompressor}),  | 
|
38  | 
        ]
 | 
|
39  | 
if CompiledGroupCompressFeature.available():  | 
|
40  | 
scenarios.append(('C',  | 
|
41  | 
{'compressor': groupcompress.PyrexGroupCompressor}))  | 
|
42  | 
return tests.multiply_tests(to_adapt, scenarios, result)  | 
|
43  | 
||
44  | 
||
45  | 
class TestGroupCompressor(tests.TestCase):  | 
|
46  | 
||
47  | 
def _chunks_to_repr_lines(self, chunks):  | 
|
48  | 
return '\n'.join(map(repr, ''.join(chunks).split('\n')))  | 
|
49  | 
||
50  | 
def assertEqualDiffEncoded(self, expected, actual):  | 
|
51  | 
"""Compare the actual content to the expected content.  | 
|
52  | 
||
53  | 
        :param expected: A group of chunks that we expect to see
 | 
|
54  | 
        :param actual: The measured 'chunks'
 | 
|
55  | 
||
56  | 
        We will transform the chunks back into lines, and then run 'repr()'
 | 
|
57  | 
        over them to handle non-ascii characters.
 | 
|
58  | 
        """
 | 
|
59  | 
self.assertEqualDiff(self._chunks_to_repr_lines(expected),  | 
|
60  | 
self._chunks_to_repr_lines(actual))  | 
|
61  | 
||
62  | 
||
63  | 
class TestAllGroupCompressors(TestGroupCompressor):  | 
|
64  | 
"""Tests for GroupCompressor"""  | 
|
65  | 
||
66  | 
compressor = None # Set by multiply_tests  | 
|
67  | 
||
68  | 
def test_empty_delta(self):  | 
|
69  | 
compressor = self.compressor()  | 
|
70  | 
self.assertEqual([], compressor.chunks)  | 
|
71  | 
||
72  | 
def test_one_nosha_delta(self):  | 
|
73  | 
        # diff against NUKK
 | 
|
74  | 
compressor = self.compressor()  | 
|
75  | 
sha1, start_point, end_point, _ = compressor.compress(('label',),  | 
|
76  | 
'strange\ncommon\n', None)  | 
|
77  | 
self.assertEqual(sha_string('strange\ncommon\n'), sha1)  | 
|
78  | 
expected_lines = 'f' '\x0f' 'strange\ncommon\n'  | 
|
79  | 
self.assertEqual(expected_lines, ''.join(compressor.chunks))  | 
|
80  | 
self.assertEqual(0, start_point)  | 
|
81  | 
self.assertEqual(sum(map(len, expected_lines)), end_point)  | 
|
82  | 
||
83  | 
def test_empty_content(self):  | 
|
84  | 
compressor = self.compressor()  | 
|
85  | 
        # Adding empty bytes should return the 'null' record
 | 
|
86  | 
sha1, start_point, end_point, kind = compressor.compress(('empty',),  | 
|
87  | 
'', None)  | 
|
88  | 
self.assertEqual(0, start_point)  | 
|
89  | 
self.assertEqual(0, end_point)  | 
|
90  | 
self.assertEqual('fulltext', kind)  | 
|
91  | 
self.assertEqual(groupcompress._null_sha1, sha1)  | 
|
92  | 
self.assertEqual(0, compressor.endpoint)  | 
|
93  | 
self.assertEqual([], compressor.chunks)  | 
|
94  | 
        # Even after adding some content
 | 
|
95  | 
compressor.compress(('content',), 'some\nbytes\n', None)  | 
|
96  | 
self.assertTrue(compressor.endpoint > 0)  | 
|
97  | 
sha1, start_point, end_point, kind = compressor.compress(('empty2',),  | 
|
98  | 
'', None)  | 
|
99  | 
self.assertEqual(0, start_point)  | 
|
100  | 
self.assertEqual(0, end_point)  | 
|
101  | 
self.assertEqual('fulltext', kind)  | 
|
102  | 
self.assertEqual(groupcompress._null_sha1, sha1)  | 
|
103  | 
||
104  | 
def test_extract_from_compressor(self):  | 
|
105  | 
        # Knit fetching will try to reconstruct texts locally which results in
 | 
|
106  | 
        # reading something that is in the compressor stream already.
 | 
|
107  | 
compressor = self.compressor()  | 
|
108  | 
sha1_1, _, _, _ = compressor.compress(('label',),  | 
|
109  | 
'strange\ncommon long line\nthat needs a 16 byte match\n', None)  | 
|
110  | 
expected_lines = list(compressor.chunks)  | 
|
111  | 
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),  | 
|
112  | 
'common long line\nthat needs a 16 byte match\ndifferent\n', None)  | 
|
113  | 
        # get the first out
 | 
|
114  | 
self.assertEqual(('strange\ncommon long line\n'  | 
|
115  | 
'that needs a 16 byte match\n', sha1_1),  | 
|
116  | 
compressor.extract(('label',)))  | 
|
117  | 
        # and the second
 | 
|
118  | 
self.assertEqual(('common long line\nthat needs a 16 byte match\n'  | 
|
119  | 
'different\n', sha1_2),  | 
|
120  | 
compressor.extract(('newlabel',)))  | 
|
121  | 
||
122  | 
||
123  | 
class TestPyrexGroupCompressor(TestGroupCompressor):  | 
|
124  | 
||
125  | 
_test_needs_features = [CompiledGroupCompressFeature]  | 
|
126  | 
compressor = groupcompress.PyrexGroupCompressor  | 
|
127  | 
||
128  | 
def test_stats(self):  | 
|
129  | 
compressor = self.compressor()  | 
|
130  | 
compressor.compress(('label',),  | 
|
131  | 
'strange\n'  | 
|
132  | 
'common very very long line\n'  | 
|
133  | 
'plus more text\n', None)  | 
|
134  | 
compressor.compress(('newlabel',),  | 
|
135  | 
'common very very long line\n'  | 
|
136  | 
'plus more text\n'  | 
|
137  | 
'different\n'  | 
|
138  | 
'moredifferent\n', None)  | 
|
139  | 
compressor.compress(('label3',),  | 
|
140  | 
'new\n'  | 
|
141  | 
'common very very long line\n'  | 
|
142  | 
'plus more text\n'  | 
|
143  | 
'different\n'  | 
|
144  | 
'moredifferent\n', None)  | 
|
145  | 
self.assertAlmostEqual(1.9, compressor.ratio(), 1)  | 
|
146  | 
||
147  | 
def test_two_nosha_delta(self):  | 
|
148  | 
compressor = self.compressor()  | 
|
149  | 
sha1_1, _, _, _ = compressor.compress(('label',),  | 
|
150  | 
'strange\ncommon long line\nthat needs a 16 byte match\n', None)  | 
|
151  | 
expected_lines = list(compressor.chunks)  | 
|
152  | 
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),  | 
|
153  | 
'common long line\nthat needs a 16 byte match\ndifferent\n', None)  | 
|
154  | 
self.assertEqual(sha_string('common long line\n'  | 
|
155  | 
'that needs a 16 byte match\n'  | 
|
156  | 
'different\n'), sha1_2)  | 
|
157  | 
expected_lines.extend([  | 
|
158  | 
            # 'delta', delta length
 | 
|
159  | 
'd\x0f',  | 
|
160  | 
            # source and target length
 | 
|
161  | 
'\x36',  | 
|
162  | 
            # copy the line common
 | 
|
163  | 
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c  | 
|
164  | 
            # add the line different, and the trailing newline
 | 
|
165  | 
'\x0adifferent\n', # insert 10 bytes  | 
|
166  | 
            ])
 | 
|
167  | 
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)  | 
|
168  | 
self.assertEqual(sum(map(len, expected_lines)), end_point)  | 
|
169  | 
||
170  | 
def test_three_nosha_delta(self):  | 
|
171  | 
        # The first interesting test: make a change that should use lines from
 | 
|
172  | 
        # both parents.
 | 
|
173  | 
compressor = self.compressor()  | 
|
174  | 
sha1_1, _, _, _ = compressor.compress(('label',),  | 
|
175  | 
'strange\ncommon very very long line\nwith some extra text\n', None)  | 
|
176  | 
sha1_2, _, _, _ = compressor.compress(('newlabel',),  | 
|
177  | 
'different\nmoredifferent\nand then some more\n', None)  | 
|
178  | 
expected_lines = list(compressor.chunks)  | 
|
179  | 
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),  | 
|
180  | 
'new\ncommon very very long line\nwith some extra text\n'  | 
|
181  | 
'different\nmoredifferent\nand then some more\n',  | 
|
182  | 
None)  | 
|
183  | 
self.assertEqual(  | 
|
184  | 
sha_string('new\ncommon very very long line\nwith some extra text\n'  | 
|
185  | 
'different\nmoredifferent\nand then some more\n'),  | 
|
186  | 
sha1_3)  | 
|
187  | 
expected_lines.extend([  | 
|
188  | 
            # 'delta', delta length
 | 
|
189  | 
'd\x0b',  | 
|
190  | 
            # source and target length
 | 
|
191  | 
'\x5f'  | 
|
192  | 
            # insert new
 | 
|
193  | 
'\x03new',  | 
|
194  | 
            # Copy of first parent 'common' range
 | 
|
195  | 
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes  | 
|
196  | 
            # Copy of second parent 'different' range
 | 
|
197  | 
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes  | 
|
198  | 
            ])
 | 
|
199  | 
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)  | 
|
200  | 
self.assertEqual(sum(map(len, expected_lines)), end_point)  | 
|
201  | 
||
202  | 
||
203  | 
class TestPythonGroupCompressor(TestGroupCompressor):  | 
|
204  | 
||
205  | 
compressor = groupcompress.PythonGroupCompressor  | 
|
206  | 
||
207  | 
def test_stats(self):  | 
|
208  | 
compressor = self.compressor()  | 
|
209  | 
compressor.compress(('label',),  | 
|
210  | 
'strange\n'  | 
|
211  | 
'common very very long line\n'  | 
|
212  | 
'plus more text\n', None)  | 
|
213  | 
compressor.compress(('newlabel',),  | 
|
214  | 
'common very very long line\n'  | 
|
215  | 
'plus more text\n'  | 
|
216  | 
'different\n'  | 
|
217  | 
'moredifferent\n', None)  | 
|
218  | 
compressor.compress(('label3',),  | 
|
219  | 
'new\n'  | 
|
220  | 
'common very very long line\n'  | 
|
221  | 
'plus more text\n'  | 
|
222  | 
'different\n'  | 
|
223  | 
'moredifferent\n', None)  | 
|
224  | 
self.assertAlmostEqual(1.9, compressor.ratio(), 1)  | 
|
225  | 
||
226  | 
def test_two_nosha_delta(self):  | 
|
227  | 
compressor = self.compressor()  | 
|
228  | 
sha1_1, _, _, _ = compressor.compress(('label',),  | 
|
229  | 
'strange\ncommon long line\nthat needs a 16 byte match\n', None)  | 
|
230  | 
expected_lines = list(compressor.chunks)  | 
|
231  | 
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),  | 
|
232  | 
'common long line\nthat needs a 16 byte match\ndifferent\n', None)  | 
|
233  | 
self.assertEqual(sha_string('common long line\n'  | 
|
234  | 
'that needs a 16 byte match\n'  | 
|
235  | 
'different\n'), sha1_2)  | 
|
236  | 
expected_lines.extend([  | 
|
237  | 
            # 'delta', delta length
 | 
|
238  | 
'd\x0f',  | 
|
239  | 
            # target length
 | 
|
240  | 
'\x36',  | 
|
241  | 
            # copy the line common
 | 
|
242  | 
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c  | 
|
243  | 
            # add the line different, and the trailing newline
 | 
|
244  | 
'\x0adifferent\n', # insert 10 bytes  | 
|
245  | 
            ])
 | 
|
246  | 
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)  | 
|
247  | 
self.assertEqual(sum(map(len, expected_lines)), end_point)  | 
|
248  | 
||
249  | 
def test_three_nosha_delta(self):  | 
|
250  | 
        # The first interesting test: make a change that should use lines from
 | 
|
251  | 
        # both parents.
 | 
|
252  | 
compressor = self.compressor()  | 
|
253  | 
sha1_1, _, _, _ = compressor.compress(('label',),  | 
|
254  | 
'strange\ncommon very very long line\nwith some extra text\n', None)  | 
|
255  | 
sha1_2, _, _, _ = compressor.compress(('newlabel',),  | 
|
256  | 
'different\nmoredifferent\nand then some more\n', None)  | 
|
257  | 
expected_lines = list(compressor.chunks)  | 
|
258  | 
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),  | 
|
259  | 
'new\ncommon very very long line\nwith some extra text\n'  | 
|
260  | 
'different\nmoredifferent\nand then some more\n',  | 
|
261  | 
None)  | 
|
262  | 
self.assertEqual(  | 
|
263  | 
sha_string('new\ncommon very very long line\nwith some extra text\n'  | 
|
264  | 
'different\nmoredifferent\nand then some more\n'),  | 
|
265  | 
sha1_3)  | 
|
266  | 
expected_lines.extend([  | 
|
267  | 
            # 'delta', delta length
 | 
|
268  | 
'd\x0c',  | 
|
269  | 
            # target length
 | 
|
270  | 
'\x5f'  | 
|
271  | 
            # insert new
 | 
|
272  | 
'\x04new\n',  | 
|
273  | 
            # Copy of first parent 'common' range
 | 
|
274  | 
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes  | 
|
275  | 
            # Copy of second parent 'different' range
 | 
|
276  | 
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes  | 
|
277  | 
            ])
 | 
|
278  | 
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)  | 
|
279  | 
self.assertEqual(sum(map(len, expected_lines)), end_point)  | 
|
280  | 
||
281  | 
||
282  | 
class TestGroupCompressBlock(tests.TestCase):  | 
|
283  | 
||
284  | 
def make_block(self, key_to_text):  | 
|
285  | 
"""Create a GroupCompressBlock, filling it with the given texts."""  | 
|
286  | 
compressor = groupcompress.GroupCompressor()  | 
|
287  | 
start = 0  | 
|
288  | 
for key in sorted(key_to_text):  | 
|
289  | 
compressor.compress(key, key_to_text[key], None)  | 
|
290  | 
locs = dict((key, (start, end)) for key, (start, _, end, _)  | 
|
291  | 
in compressor.labels_deltas.iteritems())  | 
|
292  | 
block = compressor.flush()  | 
|
293  | 
raw_bytes = block.to_bytes()  | 
|
294  | 
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 | 
|
295  | 
        # content object
 | 
|
296  | 
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)  | 
|
297  | 
||
298  | 
def test_from_empty_bytes(self):  | 
|
299  | 
self.assertRaises(ValueError,  | 
|
300  | 
groupcompress.GroupCompressBlock.from_bytes, '')  | 
|
301  | 
||
302  | 
def test_from_minimal_bytes(self):  | 
|
303  | 
block = groupcompress.GroupCompressBlock.from_bytes(  | 
|
304  | 
'gcb1z\n0\n0\n')  | 
|
305  | 
self.assertIsInstance(block, groupcompress.GroupCompressBlock)  | 
|
306  | 
self.assertIs(None, block._content)  | 
|
307  | 
self.assertEqual('', block._z_content)  | 
|
308  | 
block._ensure_content()  | 
|
309  | 
self.assertEqual('', block._content)  | 
|
310  | 
self.assertEqual('', block._z_content)  | 
|
311  | 
block._ensure_content() # Ensure content is safe to call 2x  | 
|
312  | 
||
313  | 
def test_from_invalid(self):  | 
|
314  | 
self.assertRaises(ValueError,  | 
|
315  | 
groupcompress.GroupCompressBlock.from_bytes,  | 
|
316  | 
'this is not a valid header')  | 
|
317  | 
||
318  | 
def test_from_bytes(self):  | 
|
319  | 
content = ('a tiny bit of content\n')  | 
|
320  | 
z_content = zlib.compress(content)  | 
|
321  | 
z_bytes = (  | 
|
322  | 
'gcb1z\n' # group compress block v1 plain  | 
|
323  | 
'%d\n' # Length of compressed content  | 
|
324  | 
'%d\n' # Length of uncompressed content  | 
|
325  | 
'%s' # Compressed content  | 
|
326  | 
) % (len(z_content), len(content), z_content)  | 
|
327  | 
block = groupcompress.GroupCompressBlock.from_bytes(  | 
|
328  | 
z_bytes)  | 
|
329  | 
self.assertEqual(z_content, block._z_content)  | 
|
330  | 
self.assertIs(None, block._content)  | 
|
331  | 
self.assertEqual(len(z_content), block._z_content_length)  | 
|
332  | 
self.assertEqual(len(content), block._content_length)  | 
|
333  | 
block._ensure_content()  | 
|
334  | 
self.assertEqual(z_content, block._z_content)  | 
|
335  | 
self.assertEqual(content, block._content)  | 
|
336  | 
||
337  | 
def test_to_bytes(self):  | 
|
338  | 
content = ('this is some content\n'  | 
|
339  | 
'this content will be compressed\n')  | 
|
340  | 
gcb = groupcompress.GroupCompressBlock()  | 
|
341  | 
gcb.set_content(content)  | 
|
342  | 
bytes = gcb.to_bytes()  | 
|
343  | 
self.assertEqual(gcb._z_content_length, len(gcb._z_content))  | 
|
344  | 
self.assertEqual(gcb._content_length, len(content))  | 
|
345  | 
expected_header =('gcb1z\n' # group compress block v1 zlib  | 
|
346  | 
'%d\n' # Length of compressed content  | 
|
347  | 
'%d\n' # Length of uncompressed content  | 
|
348  | 
) % (gcb._z_content_length, gcb._content_length)  | 
|
349  | 
self.assertStartsWith(bytes, expected_header)  | 
|
350  | 
remaining_bytes = bytes[len(expected_header):]  | 
|
351  | 
raw_bytes = zlib.decompress(remaining_bytes)  | 
|
352  | 
self.assertEqual(content, raw_bytes)  | 
|
353  | 
||
354  | 
def test_partial_decomp(self):  | 
|
355  | 
content_chunks = []  | 
|
356  | 
        # We need a sufficient amount of data so that zlib.decompress has
 | 
|
357  | 
        # partial decompression to work with. Most auto-generated data
 | 
|
358  | 
        # compresses a bit too well, we want a combination, so we combine a sha
 | 
|
359  | 
        # hash with compressible data.
 | 
|
360  | 
for i in xrange(2048):  | 
|
361  | 
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)  | 
|
362  | 
content_chunks.append(next_content)  | 
|
363  | 
next_sha1 = osutils.sha_string(next_content)  | 
|
364  | 
content_chunks.append(next_sha1 + '\n')  | 
|
365  | 
content = ''.join(content_chunks)  | 
|
366  | 
self.assertEqual(158634, len(content))  | 
|
367  | 
z_content = zlib.compress(content)  | 
|
368  | 
self.assertEqual(57182, len(z_content))  | 
|
369  | 
block = groupcompress.GroupCompressBlock()  | 
|
370  | 
block._z_content = z_content  | 
|
371  | 
block._z_content_length = len(z_content)  | 
|
372  | 
block._compressor_name = 'zlib'  | 
|
373  | 
block._content_length = 158634  | 
|
374  | 
self.assertIs(None, block._content)  | 
|
375  | 
block._ensure_content(100)  | 
|
376  | 
self.assertIsNot(None, block._content)  | 
|
377  | 
        # We have decompressed at least 100 bytes
 | 
|
378  | 
self.assertTrue(len(block._content) >= 100)  | 
|
379  | 
        # We have not decompressed the whole content
 | 
|
380  | 
self.assertTrue(len(block._content) < 158634)  | 
|
381  | 
self.assertEqualDiff(content[:len(block._content)], block._content)  | 
|
382  | 
        # ensuring content that we already have shouldn't cause any more data
 | 
|
383  | 
        # to be extracted
 | 
|
384  | 
cur_len = len(block._content)  | 
|
385  | 
block._ensure_content(cur_len - 10)  | 
|
386  | 
self.assertEqual(cur_len, len(block._content))  | 
|
387  | 
        # Now we want a bit more content
 | 
|
388  | 
cur_len += 10  | 
|
389  | 
block._ensure_content(cur_len)  | 
|
390  | 
self.assertTrue(len(block._content) >= cur_len)  | 
|
391  | 
self.assertTrue(len(block._content) < 158634)  | 
|
392  | 
self.assertEqualDiff(content[:len(block._content)], block._content)  | 
|
393  | 
        # And now lets finish
 | 
|
394  | 
block._ensure_content(158634)  | 
|
395  | 
self.assertEqualDiff(content, block._content)  | 
|
396  | 
        # And the decompressor is finalized
 | 
|
397  | 
self.assertIs(None, block._z_content_decompressor)  | 
|
398  | 
||
399  | 
def test_partial_decomp_no_known_length(self):  | 
|
400  | 
content_chunks = []  | 
|
401  | 
for i in xrange(2048):  | 
|
402  | 
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)  | 
|
403  | 
content_chunks.append(next_content)  | 
|
404  | 
next_sha1 = osutils.sha_string(next_content)  | 
|
405  | 
content_chunks.append(next_sha1 + '\n')  | 
|
406  | 
content = ''.join(content_chunks)  | 
|
407  | 
self.assertEqual(158634, len(content))  | 
|
408  | 
z_content = zlib.compress(content)  | 
|
409  | 
self.assertEqual(57182, len(z_content))  | 
|
410  | 
block = groupcompress.GroupCompressBlock()  | 
|
411  | 
block._z_content = z_content  | 
|
412  | 
block._z_content_length = len(z_content)  | 
|
413  | 
block._compressor_name = 'zlib'  | 
|
414  | 
block._content_length = None # Don't tell the decompressed length  | 
|
415  | 
self.assertIs(None, block._content)  | 
|
416  | 
block._ensure_content(100)  | 
|
417  | 
self.assertIsNot(None, block._content)  | 
|
418  | 
        # We have decompressed at least 100 bytes
 | 
|
419  | 
self.assertTrue(len(block._content) >= 100)  | 
|
420  | 
        # We have not decompressed the whole content
 | 
|
421  | 
self.assertTrue(len(block._content) < 158634)  | 
|
422  | 
self.assertEqualDiff(content[:len(block._content)], block._content)  | 
|
423  | 
        # ensuring content that we already have shouldn't cause any more data
 | 
|
424  | 
        # to be extracted
 | 
|
425  | 
cur_len = len(block._content)  | 
|
426  | 
block._ensure_content(cur_len - 10)  | 
|
427  | 
self.assertEqual(cur_len, len(block._content))  | 
|
428  | 
        # Now we want a bit more content
 | 
|
429  | 
cur_len += 10  | 
|
430  | 
block._ensure_content(cur_len)  | 
|
431  | 
self.assertTrue(len(block._content) >= cur_len)  | 
|
432  | 
self.assertTrue(len(block._content) < 158634)  | 
|
433  | 
self.assertEqualDiff(content[:len(block._content)], block._content)  | 
|
434  | 
        # And now lets finish
 | 
|
435  | 
block._ensure_content()  | 
|
436  | 
self.assertEqualDiff(content, block._content)  | 
|
437  | 
        # And the decompressor is finalized
 | 
|
438  | 
self.assertIs(None, block._z_content_decompressor)  | 
|
439  | 
||
440  | 
||
441  | 
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):  | 
|
442  | 
||
443  | 
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,  | 
|
444  | 
dir='.'):  | 
|
445  | 
t = self.get_transport(dir)  | 
|
446  | 
t.ensure_base()  | 
|
447  | 
vf = groupcompress.make_pack_factory(graph=create_graph,  | 
|
448  | 
delta=False, keylength=keylength)(t)  | 
|
449  | 
if do_cleanup:  | 
|
450  | 
self.addCleanup(groupcompress.cleanup_pack_group, vf)  | 
|
451  | 
return vf  | 
|
452  | 
||
453  | 
||
454  | 
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):  | 
|
455  | 
||
456  | 
def test_get_record_stream_as_requested(self):  | 
|
457  | 
        # Consider promoting 'as-requested' to general availability, and
 | 
|
458  | 
        # make this a VF interface test
 | 
|
459  | 
vf = self.make_test_vf(False, dir='source')  | 
|
460  | 
vf.add_lines(('a',), (), ['lines\n'])  | 
|
461  | 
vf.add_lines(('b',), (), ['lines\n'])  | 
|
462  | 
vf.add_lines(('c',), (), ['lines\n'])  | 
|
463  | 
vf.add_lines(('d',), (), ['lines\n'])  | 
|
464  | 
vf.writer.end()  | 
|
465  | 
keys = [record.key for record in vf.get_record_stream(  | 
|
466  | 
[('a',), ('b',), ('c',), ('d',)],  | 
|
467  | 
'as-requested', False)]  | 
|
468  | 
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)  | 
|
469  | 
keys = [record.key for record in vf.get_record_stream(  | 
|
470  | 
[('b',), ('a',), ('d',), ('c',)],  | 
|
471  | 
'as-requested', False)]  | 
|
472  | 
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)  | 
|
473  | 
||
474  | 
        # It should work even after being repacked into another VF
 | 
|
475  | 
vf2 = self.make_test_vf(False, dir='target')  | 
|
476  | 
vf2.insert_record_stream(vf.get_record_stream(  | 
|
477  | 
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))  | 
|
478  | 
vf2.writer.end()  | 
|
479  | 
||
480  | 
keys = [record.key for record in vf2.get_record_stream(  | 
|
481  | 
[('a',), ('b',), ('c',), ('d',)],  | 
|
482  | 
'as-requested', False)]  | 
|
483  | 
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)  | 
|
484  | 
keys = [record.key for record in vf2.get_record_stream(  | 
|
485  | 
[('b',), ('a',), ('d',), ('c',)],  | 
|
486  | 
'as-requested', False)]  | 
|
487  | 
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)  | 
|
488  | 
||
489  | 
def test_insert_record_stream_re_uses_blocks(self):  | 
|
490  | 
vf = self.make_test_vf(True, dir='source')  | 
|
491  | 
def grouped_stream(revision_ids, first_parents=()):  | 
|
492  | 
parents = first_parents  | 
|
493  | 
for revision_id in revision_ids:  | 
|
494  | 
key = (revision_id,)  | 
|
495  | 
record = versionedfile.FulltextContentFactory(  | 
|
496  | 
key, parents, None,  | 
|
497  | 
'some content that is\n'  | 
|
498  | 
'identical except for\n'  | 
|
499  | 
'revision_id:%s\n' % (revision_id,))  | 
|
500  | 
yield record  | 
|
501  | 
parents = (key,)  | 
|
502  | 
        # One group, a-d
 | 
|
503  | 
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))  | 
|
504  | 
        # Second group, e-h
 | 
|
505  | 
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],  | 
|
506  | 
first_parents=(('d',),)))  | 
|
507  | 
block_bytes = {}  | 
|
508  | 
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],  | 
|
509  | 
'unordered', False)  | 
|
510  | 
num_records = 0  | 
|
511  | 
for record in stream:  | 
|
512  | 
if record.key in [('a',), ('e',)]:  | 
|
513  | 
self.assertEqual('groupcompress-block', record.storage_kind)  | 
|
514  | 
else:  | 
|
515  | 
self.assertEqual('groupcompress-block-ref',  | 
|
516  | 
record.storage_kind)  | 
|
517  | 
block_bytes[record.key] = record._manager._block._z_content  | 
|
518  | 
num_records += 1  | 
|
519  | 
self.assertEqual(8, num_records)  | 
|
520  | 
for r in 'abcd':  | 
|
521  | 
key = (r,)  | 
|
522  | 
self.assertIs(block_bytes[key], block_bytes[('a',)])  | 
|
523  | 
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])  | 
|
524  | 
for r in 'efgh':  | 
|
525  | 
key = (r,)  | 
|
526  | 
self.assertIs(block_bytes[key], block_bytes[('e',)])  | 
|
527  | 
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])  | 
|
528  | 
        # Now copy the blocks into another vf, and ensure that the blocks are
 | 
|
529  | 
        # preserved without creating new entries
 | 
|
530  | 
vf2 = self.make_test_vf(True, dir='target')  | 
|
531  | 
        # ordering in 'groupcompress' order, should actually swap the groups in
 | 
|
532  | 
        # the target vf, but the groups themselves should not be disturbed.
 | 
|
533  | 
vf2.insert_record_stream(vf.get_record_stream(  | 
|
534  | 
[(r,) for r in 'abcdefgh'], 'groupcompress', False))  | 
|
535  | 
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],  | 
|
536  | 
'groupcompress', False)  | 
|
537  | 
vf2.writer.end()  | 
|
538  | 
num_records = 0  | 
|
539  | 
for record in stream:  | 
|
540  | 
num_records += 1  | 
|
541  | 
self.assertEqual(block_bytes[record.key],  | 
|
542  | 
record._manager._block._z_content)  | 
|
543  | 
self.assertEqual(8, num_records)  | 
|
544  | 
||
545  | 
def test__insert_record_stream_no_reuse_block(self):  | 
|
546  | 
vf = self.make_test_vf(True, dir='source')  | 
|
547  | 
def grouped_stream(revision_ids, first_parents=()):  | 
|
548  | 
parents = first_parents  | 
|
549  | 
for revision_id in revision_ids:  | 
|
550  | 
key = (revision_id,)  | 
|
551  | 
record = versionedfile.FulltextContentFactory(  | 
|
552  | 
key, parents, None,  | 
|
553  | 
'some content that is\n'  | 
|
554  | 
'identical except for\n'  | 
|
555  | 
'revision_id:%s\n' % (revision_id,))  | 
|
556  | 
yield record  | 
|
557  | 
parents = (key,)  | 
|
558  | 
        # One group, a-d
 | 
|
559  | 
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))  | 
|
560  | 
        # Second group, e-h
 | 
|
561  | 
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],  | 
|
562  | 
first_parents=(('d',),)))  | 
|
563  | 
vf.writer.end()  | 
|
564  | 
self.assertEqual(8, len(list(vf.get_record_stream(  | 
|
565  | 
[(r,) for r in 'abcdefgh'],  | 
|
566  | 
'unordered', False))))  | 
|
567  | 
        # Now copy the blocks into another vf, and ensure that the blocks are
 | 
|
568  | 
        # preserved without creating new entries
 | 
|
569  | 
vf2 = self.make_test_vf(True, dir='target')  | 
|
570  | 
        # ordering in 'groupcompress' order, should actually swap the groups in
 | 
|
571  | 
        # the target vf, but the groups themselves should not be disturbed.
 | 
|
572  | 
list(vf2._insert_record_stream(vf.get_record_stream(  | 
|
573  | 
[(r,) for r in 'abcdefgh'], 'groupcompress', False),  | 
|
574  | 
reuse_blocks=False))  | 
|
575  | 
vf2.writer.end()  | 
|
576  | 
        # After inserting with reuse_blocks=False, we should have everything in
 | 
|
577  | 
        # a single new block.
 | 
|
578  | 
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],  | 
|
579  | 
'groupcompress', False)  | 
|
580  | 
block = None  | 
|
581  | 
for record in stream:  | 
|
582  | 
if block is None:  | 
|
583  | 
block = record._manager._block  | 
|
584  | 
else:  | 
|
585  | 
self.assertIs(block, record._manager._block)  | 
|
586  | 
||
587  | 
||
588  | 
class TestLazyGroupCompress(tests.TestCaseWithTransport):  | 
|
589  | 
||
590  | 
_texts = {  | 
|
591  | 
('key1',): "this is a text\n"  | 
|
592  | 
"with a reasonable amount of compressible bytes\n",  | 
|
593  | 
('key2',): "another text\n"  | 
|
594  | 
"with a reasonable amount of compressible bytes\n",  | 
|
595  | 
('key3',): "yet another text which won't be extracted\n"  | 
|
596  | 
"with a reasonable amount of compressible bytes\n",  | 
|
597  | 
('key4',): "this will be extracted\n"  | 
|
598  | 
"but references most of its bytes from\n"  | 
|
599  | 
"yet another text which won't be extracted\n"  | 
|
600  | 
"with a reasonable amount of compressible bytes\n",  | 
|
601  | 
    }
 | 
|
602  | 
def make_block(self, key_to_text):  | 
|
603  | 
"""Create a GroupCompressBlock, filling it with the given texts."""  | 
|
604  | 
compressor = groupcompress.GroupCompressor()  | 
|
605  | 
start = 0  | 
|
606  | 
for key in sorted(key_to_text):  | 
|
607  | 
compressor.compress(key, key_to_text[key], None)  | 
|
608  | 
locs = dict((key, (start, end)) for key, (start, _, end, _)  | 
|
609  | 
in compressor.labels_deltas.iteritems())  | 
|
610  | 
block = compressor.flush()  | 
|
611  | 
raw_bytes = block.to_bytes()  | 
|
612  | 
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)  | 
|
613  | 
||
614  | 
def add_key_to_manager(self, key, locations, block, manager):  | 
|
615  | 
start, end = locations[key]  | 
|
616  | 
manager.add_factory(key, (), start, end)  | 
|
617  | 
||
618  | 
def test_get_fulltexts(self):  | 
|
619  | 
locations, block = self.make_block(self._texts)  | 
|
620  | 
manager = groupcompress._LazyGroupContentManager(block)  | 
|
621  | 
self.add_key_to_manager(('key1',), locations, block, manager)  | 
|
622  | 
self.add_key_to_manager(('key2',), locations, block, manager)  | 
|
623  | 
result_order = []  | 
|
624  | 
for record in manager.get_record_stream():  | 
|
625  | 
result_order.append(record.key)  | 
|
626  | 
text = self._texts[record.key]  | 
|
627  | 
self.assertEqual(text, record.get_bytes_as('fulltext'))  | 
|
628  | 
self.assertEqual([('key1',), ('key2',)], result_order)  | 
|
629  | 
||
630  | 
        # If we build the manager in the opposite order, we should get them
 | 
|
631  | 
        # back in the opposite order
 | 
|
632  | 
manager = groupcompress._LazyGroupContentManager(block)  | 
|
633  | 
self.add_key_to_manager(('key2',), locations, block, manager)  | 
|
634  | 
self.add_key_to_manager(('key1',), locations, block, manager)  | 
|
635  | 
result_order = []  | 
|
636  | 
for record in manager.get_record_stream():  | 
|
637  | 
result_order.append(record.key)  | 
|
638  | 
text = self._texts[record.key]  | 
|
639  | 
self.assertEqual(text, record.get_bytes_as('fulltext'))  | 
|
640  | 
self.assertEqual([('key2',), ('key1',)], result_order)  | 
|
641  | 
||
642  | 
def test__wire_bytes_no_keys(self):  | 
|
643  | 
locations, block = self.make_block(self._texts)  | 
|
644  | 
manager = groupcompress._LazyGroupContentManager(block)  | 
|
645  | 
wire_bytes = manager._wire_bytes()  | 
|
646  | 
block_length = len(block.to_bytes())  | 
|
647  | 
        # We should have triggered a strip, since we aren't using any content
 | 
|
648  | 
stripped_block = manager._block.to_bytes()  | 
|
649  | 
self.assertTrue(block_length > len(stripped_block))  | 
|
650  | 
empty_z_header = zlib.compress('')  | 
|
651  | 
self.assertEqual('groupcompress-block\n'  | 
|
652  | 
'8\n' # len(compress(''))  | 
|
653  | 
'0\n' # len('')  | 
|
654  | 
'%d\n'# compressed block len  | 
|
655  | 
'%s' # zheader  | 
|
656  | 
'%s' # block  | 
|
657  | 
% (len(stripped_block), empty_z_header,  | 
|
658  | 
stripped_block),  | 
|
659  | 
wire_bytes)  | 
|
660  | 
||
661  | 
def test__wire_bytes(self):  | 
|
662  | 
locations, block = self.make_block(self._texts)  | 
|
663  | 
manager = groupcompress._LazyGroupContentManager(block)  | 
|
664  | 
self.add_key_to_manager(('key1',), locations, block, manager)  | 
|
665  | 
self.add_key_to_manager(('key4',), locations, block, manager)  | 
|
666  | 
block_bytes = block.to_bytes()  | 
|
667  | 
wire_bytes = manager._wire_bytes()  | 
|
668  | 
(storage_kind, z_header_len, header_len,  | 
|
669  | 
block_len, rest) = wire_bytes.split('\n', 4)  | 
|
670  | 
z_header_len = int(z_header_len)  | 
|
671  | 
header_len = int(header_len)  | 
|
672  | 
block_len = int(block_len)  | 
|
673  | 
self.assertEqual('groupcompress-block', storage_kind)  | 
|
674  | 
self.assertEqual(33, z_header_len)  | 
|
675  | 
self.assertEqual(25, header_len)  | 
|
676  | 
self.assertEqual(len(block_bytes), block_len)  | 
|
677  | 
z_header = rest[:z_header_len]  | 
|
678  | 
header = zlib.decompress(z_header)  | 
|
679  | 
self.assertEqual(header_len, len(header))  | 
|
680  | 
entry1 = locations[('key1',)]  | 
|
681  | 
entry4 = locations[('key4',)]  | 
|
682  | 
self.assertEqualDiff('key1\n'  | 
|
683  | 
'\n' # no parents  | 
|
684  | 
'%d\n' # start offset  | 
|
685  | 
'%d\n' # end offset  | 
|
686  | 
'key4\n'  | 
|
687  | 
'\n'  | 
|
688  | 
'%d\n'  | 
|
689  | 
'%d\n'  | 
|
690  | 
% (entry1[0], entry1[1],  | 
|
691  | 
entry4[0], entry4[1]),  | 
|
692  | 
header)  | 
|
693  | 
z_block = rest[z_header_len:]  | 
|
694  | 
self.assertEqual(block_bytes, z_block)  | 
|
695  | 
||
696  | 
def test_from_bytes(self):  | 
|
697  | 
locations, block = self.make_block(self._texts)  | 
|
698  | 
manager = groupcompress._LazyGroupContentManager(block)  | 
|
699  | 
self.add_key_to_manager(('key1',), locations, block, manager)  | 
|
700  | 
self.add_key_to_manager(('key4',), locations, block, manager)  | 
|
701  | 
wire_bytes = manager._wire_bytes()  | 
|
702  | 
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')  | 
|
703  | 
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)  | 
|
704  | 
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)  | 
|
705  | 
self.assertEqual(2, len(manager._factories))  | 
|
706  | 
self.assertEqual(block._z_content, manager._block._z_content)  | 
|
707  | 
result_order = []  | 
|
708  | 
for record in manager.get_record_stream():  | 
|
709  | 
result_order.append(record.key)  | 
|
710  | 
text = self._texts[record.key]  | 
|
711  | 
self.assertEqual(text, record.get_bytes_as('fulltext'))  | 
|
712  | 
self.assertEqual([('key1',), ('key4',)], result_order)  | 
|
713  | 
||
714  | 
def test__check_rebuild_no_changes(self):  | 
|
715  | 
locations, block = self.make_block(self._texts)  | 
|
716  | 
manager = groupcompress._LazyGroupContentManager(block)  | 
|
717  | 
        # Request all the keys, which ensures that we won't rebuild
 | 
|
718  | 
self.add_key_to_manager(('key1',), locations, block, manager)  | 
|
719  | 
self.add_key_to_manager(('key2',), locations, block, manager)  | 
|
720  | 
self.add_key_to_manager(('key3',), locations, block, manager)  | 
|
721  | 
self.add_key_to_manager(('key4',), locations, block, manager)  | 
|
722  | 
manager._check_rebuild_block()  | 
|
723  | 
self.assertIs(block, manager._block)  | 
|
724  | 
||
725  | 
def test__check_rebuild_only_one(self):  | 
|
726  | 
locations, block = self.make_block(self._texts)  | 
|
727  | 
manager = groupcompress._LazyGroupContentManager(block)  | 
|
728  | 
        # Request just the first key, which should trigger a 'strip' action
 | 
|
729  | 
self.add_key_to_manager(('key1',), locations, block, manager)  | 
|
730  | 
manager._check_rebuild_block()  | 
|
731  | 
self.assertIsNot(block, manager._block)  | 
|
732  | 
self.assertTrue(block._content_length > manager._block._content_length)  | 
|
733  | 
        # We should be able to still get the content out of this block, though
 | 
|
734  | 
        # it should only have 1 entry
 | 
|
735  | 
for record in manager.get_record_stream():  | 
|
736  | 
self.assertEqual(('key1',), record.key)  | 
|
737  | 
self.assertEqual(self._texts[record.key],  | 
|
738  | 
record.get_bytes_as('fulltext'))  | 
|
739  | 
||
740  | 
def test__check_rebuild_middle(self):  | 
|
741  | 
locations, block = self.make_block(self._texts)  | 
|
742  | 
manager = groupcompress._LazyGroupContentManager(block)  | 
|
743  | 
        # Request a small key in the middle should trigger a 'rebuild'
 | 
|
744  | 
self.add_key_to_manager(('key4',), locations, block, manager)  | 
|
745  | 
manager._check_rebuild_block()  | 
|
746  | 
self.assertIsNot(block, manager._block)  | 
|
747  | 
self.assertTrue(block._content_length > manager._block._content_length)  | 
|
748  | 
for record in manager.get_record_stream():  | 
|
749  | 
self.assertEqual(('key4',), record.key)  | 
|
750  | 
self.assertEqual(self._texts[record.key],  | 
|
751  | 
record.get_bytes_as('fulltext'))  |