/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/tests/test_groupcompress.py

  • Committer: Jelmer Vernooij
  • Date: 2017-06-10 16:40:42 UTC
  • mfrom: (6653.6.7 rename-controldir)
  • mto: This revision was merged to the branch mainline in revision 6690.
  • Revision ID: jelmer@jelmer.uk-20170610164042-zrxqgy2htyduvke2
MergeĀ rename-controldirĀ branch.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008-2011 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from .. import (
 
22
    config,
 
23
    errors,
 
24
    osutils,
 
25
    tests,
 
26
    trace,
 
27
    )
 
28
from ..bzr import (
 
29
    btree_index,
 
30
    groupcompress,
 
31
    index as _mod_index,
 
32
    versionedfile,
 
33
    )
 
34
from ..osutils import sha_string
 
35
from .test__groupcompress import compiled_groupcompress_feature
 
36
from .scenarios import load_tests_apply_scenarios
 
37
 
 
38
 
 
39
def group_compress_implementation_scenarios():
 
40
    scenarios = [
 
41
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
42
        ]
 
43
    if compiled_groupcompress_feature.available():
 
44
        scenarios.append(('C',
 
45
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
46
    return scenarios
 
47
 
 
48
 
 
49
load_tests = load_tests_apply_scenarios
 
50
 
 
51
 
 
52
class TestGroupCompressor(tests.TestCase):
 
53
 
 
54
    def _chunks_to_repr_lines(self, chunks):
 
55
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
56
 
 
57
    def assertEqualDiffEncoded(self, expected, actual):
 
58
        """Compare the actual content to the expected content.
 
59
 
 
60
        :param expected: A group of chunks that we expect to see
 
61
        :param actual: The measured 'chunks'
 
62
 
 
63
        We will transform the chunks back into lines, and then run 'repr()'
 
64
        over them to handle non-ascii characters.
 
65
        """
 
66
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
67
                             self._chunks_to_repr_lines(actual))
 
68
 
 
69
 
 
70
class TestAllGroupCompressors(TestGroupCompressor):
 
71
    """Tests for GroupCompressor"""
 
72
 
 
73
    scenarios = group_compress_implementation_scenarios()
 
74
    compressor = None # Set by scenario
 
75
 
 
76
    def test_empty_delta(self):
 
77
        compressor = self.compressor()
 
78
        self.assertEqual([], compressor.chunks)
 
79
 
 
80
    def test_one_nosha_delta(self):
 
81
        # diff against NUKK
 
82
        compressor = self.compressor()
 
83
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
84
            'strange\ncommon\n', None)
 
85
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
86
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
87
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
88
        self.assertEqual(0, start_point)
 
89
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
90
 
 
91
    def test_empty_content(self):
 
92
        compressor = self.compressor()
 
93
        # Adding empty bytes should return the 'null' record
 
94
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
95
                                                                 '', None)
 
96
        self.assertEqual(0, start_point)
 
97
        self.assertEqual(0, end_point)
 
98
        self.assertEqual('fulltext', kind)
 
99
        self.assertEqual(groupcompress._null_sha1, sha1)
 
100
        self.assertEqual(0, compressor.endpoint)
 
101
        self.assertEqual([], compressor.chunks)
 
102
        # Even after adding some content
 
103
        compressor.compress(('content',), 'some\nbytes\n', None)
 
104
        self.assertTrue(compressor.endpoint > 0)
 
105
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
106
                                                                 '', None)
 
107
        self.assertEqual(0, start_point)
 
108
        self.assertEqual(0, end_point)
 
109
        self.assertEqual('fulltext', kind)
 
110
        self.assertEqual(groupcompress._null_sha1, sha1)
 
111
 
 
112
    def test_extract_from_compressor(self):
 
113
        # Knit fetching will try to reconstruct texts locally which results in
 
114
        # reading something that is in the compressor stream already.
 
115
        compressor = self.compressor()
 
116
        sha1_1, _, _, _ = compressor.compress(('label',),
 
117
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
118
        expected_lines = list(compressor.chunks)
 
119
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
120
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
121
        # get the first out
 
122
        self.assertEqual(('strange\ncommon long line\n'
 
123
                          'that needs a 16 byte match\n', sha1_1),
 
124
                         compressor.extract(('label',)))
 
125
        # and the second
 
126
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
127
                          'different\n', sha1_2),
 
128
                         compressor.extract(('newlabel',)))
 
129
 
 
130
    def test_pop_last(self):
 
131
        compressor = self.compressor()
 
132
        _, _, _, _ = compressor.compress(('key1',),
 
133
            'some text\nfor the first entry\n', None)
 
134
        expected_lines = list(compressor.chunks)
 
135
        _, _, _, _ = compressor.compress(('key2',),
 
136
            'some text\nfor the second entry\n', None)
 
137
        compressor.pop_last()
 
138
        self.assertEqual(expected_lines, compressor.chunks)
 
139
 
 
140
 
 
141
class TestPyrexGroupCompressor(TestGroupCompressor):
 
142
 
 
143
    _test_needs_features = [compiled_groupcompress_feature]
 
144
    compressor = groupcompress.PyrexGroupCompressor
 
145
 
 
146
    def test_stats(self):
 
147
        compressor = self.compressor()
 
148
        compressor.compress(('label',),
 
149
                            'strange\n'
 
150
                            'common very very long line\n'
 
151
                            'plus more text\n', None)
 
152
        compressor.compress(('newlabel',),
 
153
                            'common very very long line\n'
 
154
                            'plus more text\n'
 
155
                            'different\n'
 
156
                            'moredifferent\n', None)
 
157
        compressor.compress(('label3',),
 
158
                            'new\n'
 
159
                            'common very very long line\n'
 
160
                            'plus more text\n'
 
161
                            'different\n'
 
162
                            'moredifferent\n', None)
 
163
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
164
 
 
165
    def test_two_nosha_delta(self):
 
166
        compressor = self.compressor()
 
167
        sha1_1, _, _, _ = compressor.compress(('label',),
 
168
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
169
        expected_lines = list(compressor.chunks)
 
170
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
171
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
172
        self.assertEqual(sha_string('common long line\n'
 
173
                                    'that needs a 16 byte match\n'
 
174
                                    'different\n'), sha1_2)
 
175
        expected_lines.extend([
 
176
            # 'delta', delta length
 
177
            'd\x0f',
 
178
            # source and target length
 
179
            '\x36',
 
180
            # copy the line common
 
181
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
182
            # add the line different, and the trailing newline
 
183
            '\x0adifferent\n', # insert 10 bytes
 
184
            ])
 
185
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
186
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
187
 
 
188
    def test_three_nosha_delta(self):
 
189
        # The first interesting test: make a change that should use lines from
 
190
        # both parents.
 
191
        compressor = self.compressor()
 
192
        sha1_1, _, _, _ = compressor.compress(('label',),
 
193
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
194
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
195
            'different\nmoredifferent\nand then some more\n', None)
 
196
        expected_lines = list(compressor.chunks)
 
197
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
198
            'new\ncommon very very long line\nwith some extra text\n'
 
199
            'different\nmoredifferent\nand then some more\n',
 
200
            None)
 
201
        self.assertEqual(
 
202
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
203
                       'different\nmoredifferent\nand then some more\n'),
 
204
            sha1_3)
 
205
        expected_lines.extend([
 
206
            # 'delta', delta length
 
207
            'd\x0b',
 
208
            # source and target length
 
209
            '\x5f'
 
210
            # insert new
 
211
            '\x03new',
 
212
            # Copy of first parent 'common' range
 
213
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
214
            # Copy of second parent 'different' range
 
215
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
216
            ])
 
217
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
218
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
219
 
 
220
 
 
221
class TestPythonGroupCompressor(TestGroupCompressor):
 
222
 
 
223
    compressor = groupcompress.PythonGroupCompressor
 
224
 
 
225
    def test_stats(self):
 
226
        compressor = self.compressor()
 
227
        compressor.compress(('label',),
 
228
                            'strange\n'
 
229
                            'common very very long line\n'
 
230
                            'plus more text\n', None)
 
231
        compressor.compress(('newlabel',),
 
232
                            'common very very long line\n'
 
233
                            'plus more text\n'
 
234
                            'different\n'
 
235
                            'moredifferent\n', None)
 
236
        compressor.compress(('label3',),
 
237
                            'new\n'
 
238
                            'common very very long line\n'
 
239
                            'plus more text\n'
 
240
                            'different\n'
 
241
                            'moredifferent\n', None)
 
242
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
243
 
 
244
    def test_two_nosha_delta(self):
 
245
        compressor = self.compressor()
 
246
        sha1_1, _, _, _ = compressor.compress(('label',),
 
247
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
248
        expected_lines = list(compressor.chunks)
 
249
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
250
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
251
        self.assertEqual(sha_string('common long line\n'
 
252
                                    'that needs a 16 byte match\n'
 
253
                                    'different\n'), sha1_2)
 
254
        expected_lines.extend([
 
255
            # 'delta', delta length
 
256
            'd\x0f',
 
257
            # target length
 
258
            '\x36',
 
259
            # copy the line common
 
260
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
261
            # add the line different, and the trailing newline
 
262
            '\x0adifferent\n', # insert 10 bytes
 
263
            ])
 
264
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
265
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
266
 
 
267
    def test_three_nosha_delta(self):
 
268
        # The first interesting test: make a change that should use lines from
 
269
        # both parents.
 
270
        compressor = self.compressor()
 
271
        sha1_1, _, _, _ = compressor.compress(('label',),
 
272
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
273
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
274
            'different\nmoredifferent\nand then some more\n', None)
 
275
        expected_lines = list(compressor.chunks)
 
276
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
277
            'new\ncommon very very long line\nwith some extra text\n'
 
278
            'different\nmoredifferent\nand then some more\n',
 
279
            None)
 
280
        self.assertEqual(
 
281
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
282
                       'different\nmoredifferent\nand then some more\n'),
 
283
            sha1_3)
 
284
        expected_lines.extend([
 
285
            # 'delta', delta length
 
286
            'd\x0c',
 
287
            # target length
 
288
            '\x5f'
 
289
            # insert new
 
290
            '\x04new\n',
 
291
            # Copy of first parent 'common' range
 
292
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
293
            # Copy of second parent 'different' range
 
294
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
295
            ])
 
296
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
297
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
298
 
 
299
 
 
300
class TestGroupCompressBlock(tests.TestCase):
 
301
 
 
302
    def make_block(self, key_to_text):
 
303
        """Create a GroupCompressBlock, filling it with the given texts."""
 
304
        compressor = groupcompress.GroupCompressor()
 
305
        start = 0
 
306
        for key in sorted(key_to_text):
 
307
            compressor.compress(key, key_to_text[key], None)
 
308
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
309
                    in compressor.labels_deltas.items())
 
310
        block = compressor.flush()
 
311
        raw_bytes = block.to_bytes()
 
312
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
313
        # content object
 
314
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
315
 
 
316
    def test_from_empty_bytes(self):
 
317
        self.assertRaises(ValueError,
 
318
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
319
 
 
320
    def test_from_minimal_bytes(self):
 
321
        block = groupcompress.GroupCompressBlock.from_bytes(
 
322
            'gcb1z\n0\n0\n')
 
323
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
324
        self.assertIs(None, block._content)
 
325
        self.assertEqual('', block._z_content)
 
326
        block._ensure_content()
 
327
        self.assertEqual('', block._content)
 
328
        self.assertEqual('', block._z_content)
 
329
        block._ensure_content() # Ensure content is safe to call 2x
 
330
 
 
331
    def test_from_invalid(self):
 
332
        self.assertRaises(ValueError,
 
333
                          groupcompress.GroupCompressBlock.from_bytes,
 
334
                          'this is not a valid header')
 
335
 
 
336
    def test_from_bytes(self):
 
337
        content = ('a tiny bit of content\n')
 
338
        z_content = zlib.compress(content)
 
339
        z_bytes = (
 
340
            'gcb1z\n' # group compress block v1 plain
 
341
            '%d\n' # Length of compressed content
 
342
            '%d\n' # Length of uncompressed content
 
343
            '%s'   # Compressed content
 
344
            ) % (len(z_content), len(content), z_content)
 
345
        block = groupcompress.GroupCompressBlock.from_bytes(
 
346
            z_bytes)
 
347
        self.assertEqual(z_content, block._z_content)
 
348
        self.assertIs(None, block._content)
 
349
        self.assertEqual(len(z_content), block._z_content_length)
 
350
        self.assertEqual(len(content), block._content_length)
 
351
        block._ensure_content()
 
352
        self.assertEqual(z_content, block._z_content)
 
353
        self.assertEqual(content, block._content)
 
354
 
 
355
    def test_to_chunks(self):
 
356
        content_chunks = ['this is some content\n',
 
357
                          'this content will be compressed\n']
 
358
        content_len = sum(map(len, content_chunks))
 
359
        content = ''.join(content_chunks)
 
360
        gcb = groupcompress.GroupCompressBlock()
 
361
        gcb.set_chunked_content(content_chunks, content_len)
 
362
        total_len, block_chunks = gcb.to_chunks()
 
363
        block_bytes = ''.join(block_chunks)
 
364
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
365
        self.assertEqual(total_len, len(block_bytes))
 
366
        self.assertEqual(gcb._content_length, content_len)
 
367
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
368
                          '%d\n' # Length of compressed content
 
369
                          '%d\n' # Length of uncompressed content
 
370
                         ) % (gcb._z_content_length, gcb._content_length)
 
371
        # The first chunk should be the header chunk. It is small, fixed size,
 
372
        # and there is no compelling reason to split it up
 
373
        self.assertEqual(expected_header, block_chunks[0])
 
374
        self.assertStartsWith(block_bytes, expected_header)
 
375
        remaining_bytes = block_bytes[len(expected_header):]
 
376
        raw_bytes = zlib.decompress(remaining_bytes)
 
377
        self.assertEqual(content, raw_bytes)
 
378
 
 
379
    def test_to_bytes(self):
 
380
        content = ('this is some content\n'
 
381
                   'this content will be compressed\n')
 
382
        gcb = groupcompress.GroupCompressBlock()
 
383
        gcb.set_content(content)
 
384
        bytes = gcb.to_bytes()
 
385
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
386
        self.assertEqual(gcb._content_length, len(content))
 
387
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
388
                          '%d\n' # Length of compressed content
 
389
                          '%d\n' # Length of uncompressed content
 
390
                         ) % (gcb._z_content_length, gcb._content_length)
 
391
        self.assertStartsWith(bytes, expected_header)
 
392
        remaining_bytes = bytes[len(expected_header):]
 
393
        raw_bytes = zlib.decompress(remaining_bytes)
 
394
        self.assertEqual(content, raw_bytes)
 
395
 
 
396
        # we should get the same results if using the chunked version
 
397
        gcb = groupcompress.GroupCompressBlock()
 
398
        gcb.set_chunked_content(['this is some content\n'
 
399
                                 'this content will be compressed\n'],
 
400
                                 len(content))
 
401
        old_bytes = bytes
 
402
        bytes = gcb.to_bytes()
 
403
        self.assertEqual(old_bytes, bytes)
 
404
 
 
405
    def test_partial_decomp(self):
 
406
        content_chunks = []
 
407
        # We need a sufficient amount of data so that zlib.decompress has
 
408
        # partial decompression to work with. Most auto-generated data
 
409
        # compresses a bit too well, we want a combination, so we combine a sha
 
410
        # hash with compressible data.
 
411
        for i in range(2048):
 
412
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
413
            content_chunks.append(next_content)
 
414
            next_sha1 = osutils.sha_string(next_content)
 
415
            content_chunks.append(next_sha1 + '\n')
 
416
        content = ''.join(content_chunks)
 
417
        self.assertEqual(158634, len(content))
 
418
        z_content = zlib.compress(content)
 
419
        self.assertEqual(57182, len(z_content))
 
420
        block = groupcompress.GroupCompressBlock()
 
421
        block._z_content_chunks = (z_content,)
 
422
        block._z_content_length = len(z_content)
 
423
        block._compressor_name = 'zlib'
 
424
        block._content_length = 158634
 
425
        self.assertIs(None, block._content)
 
426
        block._ensure_content(100)
 
427
        self.assertIsNot(None, block._content)
 
428
        # We have decompressed at least 100 bytes
 
429
        self.assertTrue(len(block._content) >= 100)
 
430
        # We have not decompressed the whole content
 
431
        self.assertTrue(len(block._content) < 158634)
 
432
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
433
        # ensuring content that we already have shouldn't cause any more data
 
434
        # to be extracted
 
435
        cur_len = len(block._content)
 
436
        block._ensure_content(cur_len - 10)
 
437
        self.assertEqual(cur_len, len(block._content))
 
438
        # Now we want a bit more content
 
439
        cur_len += 10
 
440
        block._ensure_content(cur_len)
 
441
        self.assertTrue(len(block._content) >= cur_len)
 
442
        self.assertTrue(len(block._content) < 158634)
 
443
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
444
        # And now lets finish
 
445
        block._ensure_content(158634)
 
446
        self.assertEqualDiff(content, block._content)
 
447
        # And the decompressor is finalized
 
448
        self.assertIs(None, block._z_content_decompressor)
 
449
 
 
450
    def test__ensure_all_content(self):
 
451
        content_chunks = []
 
452
        # We need a sufficient amount of data so that zlib.decompress has
 
453
        # partial decompression to work with. Most auto-generated data
 
454
        # compresses a bit too well, we want a combination, so we combine a sha
 
455
        # hash with compressible data.
 
456
        for i in range(2048):
 
457
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
458
            content_chunks.append(next_content)
 
459
            next_sha1 = osutils.sha_string(next_content)
 
460
            content_chunks.append(next_sha1 + '\n')
 
461
        content = ''.join(content_chunks)
 
462
        self.assertEqual(158634, len(content))
 
463
        z_content = zlib.compress(content)
 
464
        self.assertEqual(57182, len(z_content))
 
465
        block = groupcompress.GroupCompressBlock()
 
466
        block._z_content_chunks = (z_content,)
 
467
        block._z_content_length = len(z_content)
 
468
        block._compressor_name = 'zlib'
 
469
        block._content_length = 158634
 
470
        self.assertIs(None, block._content)
 
471
        # The first _ensure_content got all of the required data
 
472
        block._ensure_content(158634)
 
473
        self.assertEqualDiff(content, block._content)
 
474
        # And we should have released the _z_content_decompressor since it was
 
475
        # fully consumed
 
476
        self.assertIs(None, block._z_content_decompressor)
 
477
 
 
478
    def test__dump(self):
 
479
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
480
        key_to_text = {('1',): dup_content + '1 unique\n',
 
481
                       ('2',): dup_content + '2 extra special\n'}
 
482
        locs, block = self.make_block(key_to_text)
 
483
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
484
                          ('d', 21, len(key_to_text[('2',)]),
 
485
                           [('c', 2, len(dup_content)),
 
486
                            ('i', len('2 extra special\n'), '')
 
487
                           ]),
 
488
                         ], block._dump())
 
489
 
 
490
 
 
491
class TestCaseWithGroupCompressVersionedFiles(
 
492
        tests.TestCaseWithMemoryTransport):
 
493
 
 
494
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
495
                     dir='.', inconsistency_fatal=True):
 
496
        t = self.get_transport(dir)
 
497
        t.ensure_base()
 
498
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
499
            delta=False, keylength=keylength,
 
500
            inconsistency_fatal=inconsistency_fatal)(t)
 
501
        if do_cleanup:
 
502
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
503
        return vf
 
504
 
 
505
 
 
506
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
507
 
 
508
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
509
        builder = btree_index.BTreeBuilder(ref_lists)
 
510
        for node, references, value in nodes:
 
511
            builder.add_node(node, references, value)
 
512
        stream = builder.finish()
 
513
        trans = self.get_transport()
 
514
        size = trans.put_file(name, stream)
 
515
        return btree_index.BTreeGraphIndex(trans, name, size)
 
516
 
 
517
    def make_g_index_missing_parent(self):
 
518
        graph_index = self.make_g_index('missing_parent', 1,
 
519
            [(('parent', ), '2 78 2 10', ([],)),
 
520
             (('tip', ), '2 78 2 10',
 
521
              ([('parent', ), ('missing-parent', )],)),
 
522
              ])
 
523
        return graph_index
 
524
 
 
525
    def test_get_record_stream_as_requested(self):
 
526
        # Consider promoting 'as-requested' to general availability, and
 
527
        # make this a VF interface test
 
528
        vf = self.make_test_vf(False, dir='source')
 
529
        vf.add_lines(('a',), (), ['lines\n'])
 
530
        vf.add_lines(('b',), (), ['lines\n'])
 
531
        vf.add_lines(('c',), (), ['lines\n'])
 
532
        vf.add_lines(('d',), (), ['lines\n'])
 
533
        vf.writer.end()
 
534
        keys = [record.key for record in vf.get_record_stream(
 
535
                    [('a',), ('b',), ('c',), ('d',)],
 
536
                    'as-requested', False)]
 
537
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
538
        keys = [record.key for record in vf.get_record_stream(
 
539
                    [('b',), ('a',), ('d',), ('c',)],
 
540
                    'as-requested', False)]
 
541
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
542
 
 
543
        # It should work even after being repacked into another VF
 
544
        vf2 = self.make_test_vf(False, dir='target')
 
545
        vf2.insert_record_stream(vf.get_record_stream(
 
546
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
547
        vf2.writer.end()
 
548
 
 
549
        keys = [record.key for record in vf2.get_record_stream(
 
550
                    [('a',), ('b',), ('c',), ('d',)],
 
551
                    'as-requested', False)]
 
552
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
553
        keys = [record.key for record in vf2.get_record_stream(
 
554
                    [('b',), ('a',), ('d',), ('c',)],
 
555
                    'as-requested', False)]
 
556
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
557
 
 
558
    def test_get_record_stream_max_bytes_to_index_default(self):
 
559
        vf = self.make_test_vf(True, dir='source')
 
560
        vf.add_lines(('a',), (), ['lines\n'])
 
561
        vf.writer.end()
 
562
        record = next(vf.get_record_stream([('a',)], 'unordered', True))
 
563
        self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
 
564
                         record._manager._get_compressor_settings())
 
565
 
 
566
    def test_get_record_stream_accesses_compressor_settings(self):
 
567
        vf = self.make_test_vf(True, dir='source')
 
568
        vf.add_lines(('a',), (), ['lines\n'])
 
569
        vf.writer.end()
 
570
        vf._max_bytes_to_index = 1234
 
571
        record = next(vf.get_record_stream([('a',)], 'unordered', True))
 
572
        self.assertEqual(dict(max_bytes_to_index=1234),
 
573
                         record._manager._get_compressor_settings())
 
574
 
 
575
    def test_insert_record_stream_reuses_blocks(self):
 
576
        vf = self.make_test_vf(True, dir='source')
 
577
        def grouped_stream(revision_ids, first_parents=()):
 
578
            parents = first_parents
 
579
            for revision_id in revision_ids:
 
580
                key = (revision_id,)
 
581
                record = versionedfile.FulltextContentFactory(
 
582
                    key, parents, None,
 
583
                    'some content that is\n'
 
584
                    'identical except for\n'
 
585
                    'revision_id:%s\n' % (revision_id,))
 
586
                yield record
 
587
                parents = (key,)
 
588
        # One group, a-d
 
589
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
590
        # Second group, e-h
 
591
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
592
                                               first_parents=(('d',),)))
 
593
        block_bytes = {}
 
594
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
595
                                      'unordered', False)
 
596
        num_records = 0
 
597
        for record in stream:
 
598
            if record.key in [('a',), ('e',)]:
 
599
                self.assertEqual('groupcompress-block', record.storage_kind)
 
600
            else:
 
601
                self.assertEqual('groupcompress-block-ref',
 
602
                                 record.storage_kind)
 
603
            block_bytes[record.key] = record._manager._block._z_content
 
604
            num_records += 1
 
605
        self.assertEqual(8, num_records)
 
606
        for r in 'abcd':
 
607
            key = (r,)
 
608
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
609
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
610
        for r in 'efgh':
 
611
            key = (r,)
 
612
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
613
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
614
        # Now copy the blocks into another vf, and ensure that the blocks are
 
615
        # preserved without creating new entries
 
616
        vf2 = self.make_test_vf(True, dir='target')
 
617
        # ordering in 'groupcompress' order, should actually swap the groups in
 
618
        # the target vf, but the groups themselves should not be disturbed.
 
619
        def small_size_stream():
 
620
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
621
                                               'groupcompress', False):
 
622
                record._manager._full_enough_block_size = \
 
623
                    record._manager._block._content_length
 
624
                yield record
 
625
                        
 
626
        vf2.insert_record_stream(small_size_stream())
 
627
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
628
                                       'groupcompress', False)
 
629
        vf2.writer.end()
 
630
        num_records = 0
 
631
        for record in stream:
 
632
            num_records += 1
 
633
            self.assertEqual(block_bytes[record.key],
 
634
                             record._manager._block._z_content)
 
635
        self.assertEqual(8, num_records)
 
636
 
 
637
    def test_insert_record_stream_packs_on_the_fly(self):
 
638
        vf = self.make_test_vf(True, dir='source')
 
639
        def grouped_stream(revision_ids, first_parents=()):
 
640
            parents = first_parents
 
641
            for revision_id in revision_ids:
 
642
                key = (revision_id,)
 
643
                record = versionedfile.FulltextContentFactory(
 
644
                    key, parents, None,
 
645
                    'some content that is\n'
 
646
                    'identical except for\n'
 
647
                    'revision_id:%s\n' % (revision_id,))
 
648
                yield record
 
649
                parents = (key,)
 
650
        # One group, a-d
 
651
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
652
        # Second group, e-h
 
653
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
654
                                               first_parents=(('d',),)))
 
655
        # Now copy the blocks into another vf, and see that the
 
656
        # insert_record_stream rebuilt a new block on-the-fly because of
 
657
        # under-utilization
 
658
        vf2 = self.make_test_vf(True, dir='target')
 
659
        vf2.insert_record_stream(vf.get_record_stream(
 
660
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
661
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
662
                                       'groupcompress', False)
 
663
        vf2.writer.end()
 
664
        num_records = 0
 
665
        # All of the records should be recombined into a single block
 
666
        block = None
 
667
        for record in stream:
 
668
            num_records += 1
 
669
            if block is None:
 
670
                block = record._manager._block
 
671
            else:
 
672
                self.assertIs(block, record._manager._block)
 
673
        self.assertEqual(8, num_records)
 
674
 
 
675
    def test__insert_record_stream_no_reuse_block(self):
 
676
        vf = self.make_test_vf(True, dir='source')
 
677
        def grouped_stream(revision_ids, first_parents=()):
 
678
            parents = first_parents
 
679
            for revision_id in revision_ids:
 
680
                key = (revision_id,)
 
681
                record = versionedfile.FulltextContentFactory(
 
682
                    key, parents, None,
 
683
                    'some content that is\n'
 
684
                    'identical except for\n'
 
685
                    'revision_id:%s\n' % (revision_id,))
 
686
                yield record
 
687
                parents = (key,)
 
688
        # One group, a-d
 
689
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
690
        # Second group, e-h
 
691
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
692
                                               first_parents=(('d',),)))
 
693
        vf.writer.end()
 
694
        self.assertEqual(8, len(list(vf.get_record_stream(
 
695
                                        [(r,) for r in 'abcdefgh'],
 
696
                                        'unordered', False))))
 
697
        # Now copy the blocks into another vf, and ensure that the blocks are
 
698
        # preserved without creating new entries
 
699
        vf2 = self.make_test_vf(True, dir='target')
 
700
        # ordering in 'groupcompress' order, should actually swap the groups in
 
701
        # the target vf, but the groups themselves should not be disturbed.
 
702
        list(vf2._insert_record_stream(vf.get_record_stream(
 
703
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
704
            reuse_blocks=False))
 
705
        vf2.writer.end()
 
706
        # After inserting with reuse_blocks=False, we should have everything in
 
707
        # a single new block.
 
708
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
709
                                       'groupcompress', False)
 
710
        block = None
 
711
        for record in stream:
 
712
            if block is None:
 
713
                block = record._manager._block
 
714
            else:
 
715
                self.assertIs(block, record._manager._block)
 
716
 
 
717
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
718
        unvalidated = self.make_g_index_missing_parent()
 
719
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
720
        index = groupcompress._GCGraphIndex(combined,
 
721
            is_locked=lambda: True, parents=True,
 
722
            track_external_parent_refs=True)
 
723
        index.scan_unvalidated_index(unvalidated)
 
724
        self.assertEqual(
 
725
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
726
 
 
727
    def test_track_external_parent_refs(self):
 
728
        g_index = self.make_g_index('empty', 1, [])
 
729
        mod_index = btree_index.BTreeBuilder(1, 1)
 
730
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
731
        index = groupcompress._GCGraphIndex(combined,
 
732
            is_locked=lambda: True, parents=True,
 
733
            add_callback=mod_index.add_nodes,
 
734
            track_external_parent_refs=True)
 
735
        index.add_records([
 
736
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
737
        self.assertEqual(
 
738
            frozenset([('parent-1',), ('parent-2',)]),
 
739
            index.get_missing_parents())
 
740
 
 
741
    def make_source_with_b(self, a_parent, path):
 
742
        source = self.make_test_vf(True, dir=path)
 
743
        source.add_lines(('a',), (), ['lines\n'])
 
744
        if a_parent:
 
745
            b_parents = (('a',),)
 
746
        else:
 
747
            b_parents = ()
 
748
        source.add_lines(('b',), b_parents, ['lines\n'])
 
749
        return source
 
750
 
 
751
    def do_inconsistent_inserts(self, inconsistency_fatal):
 
752
        target = self.make_test_vf(True, dir='target',
 
753
                                   inconsistency_fatal=inconsistency_fatal)
 
754
        for x in range(2):
 
755
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
756
            target.insert_record_stream(source.get_record_stream(
 
757
                [('b',)], 'unordered', False))
 
758
 
 
759
    def test_inconsistent_redundant_inserts_warn(self):
 
760
        """Should not insert a record that is already present."""
 
761
        warnings = []
 
762
        def warning(template, args):
 
763
            warnings.append(template % args)
 
764
        _trace_warning = trace.warning
 
765
        trace.warning = warning
 
766
        try:
 
767
            self.do_inconsistent_inserts(inconsistency_fatal=False)
 
768
        finally:
 
769
            trace.warning = _trace_warning
 
770
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
771
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
772
                         warnings)
 
773
 
 
774
    def test_inconsistent_redundant_inserts_raises(self):
 
775
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
776
                              inconsistency_fatal=True)
 
777
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
778
                              " in add_records:"
 
779
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
780
                              " 0 8', \(\(\('a',\),\),\)\)")
 
781
 
 
782
    def test_clear_cache(self):
 
783
        vf = self.make_source_with_b(True, 'source')
 
784
        vf.writer.end()
 
785
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
 
786
                                           True):
 
787
            pass
 
788
        self.assertTrue(len(vf._group_cache) > 0)
 
789
        vf.clear_cache()
 
790
        self.assertEqual(0, len(vf._group_cache))
 
791
 
 
792
 
 
793
class TestGroupCompressConfig(tests.TestCaseWithTransport):
 
794
 
 
795
    def make_test_vf(self):
 
796
        t = self.get_transport('.')
 
797
        t.ensure_base()
 
798
        factory = groupcompress.make_pack_factory(graph=True,
 
799
            delta=False, keylength=1, inconsistency_fatal=True)
 
800
        vf = factory(t)
 
801
        self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
802
        return vf
 
803
 
 
804
    def test_max_bytes_to_index_default(self):
 
805
        vf = self.make_test_vf()
 
806
        gc = vf._make_group_compressor()
 
807
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
808
                         vf._max_bytes_to_index)
 
809
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
810
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
811
                             gc._delta_index._max_bytes_to_index)
 
812
 
 
813
    def test_max_bytes_to_index_in_config(self):
 
814
        c = config.GlobalConfig()
 
815
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
 
816
        vf = self.make_test_vf()
 
817
        gc = vf._make_group_compressor()
 
818
        self.assertEqual(10000, vf._max_bytes_to_index)
 
819
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
820
            self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
 
821
 
 
822
    def test_max_bytes_to_index_bad_config(self):
 
823
        c = config.GlobalConfig()
 
824
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
 
825
        vf = self.make_test_vf()
 
826
        # TODO: This is triggering a warning, we might want to trap and make
 
827
        #       sure it is readable.
 
828
        gc = vf._make_group_compressor()
 
829
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
830
                         vf._max_bytes_to_index)
 
831
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
832
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
833
                             gc._delta_index._max_bytes_to_index)
 
834
 
 
835
 
 
836
class StubGCVF(object):
 
837
    def __init__(self, canned_get_blocks=None):
 
838
        self._group_cache = {}
 
839
        self._canned_get_blocks = canned_get_blocks or []
 
840
    def _get_blocks(self, read_memos):
 
841
        return iter(self._canned_get_blocks)
 
842
    
 
843
 
 
844
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
 
845
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
 
846
    
 
847
    def test_add_key_new_read_memo(self):
 
848
        """Adding a key with an uncached read_memo new to this batch adds that
 
849
        read_memo to the list of memos to fetch.
 
850
        """
 
851
        # locations are: index_memo, ignored, parents, ignored
 
852
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
 
853
        # and (idx, offset, size) is known as the 'read_memo', identifying the
 
854
        # raw bytes needed.
 
855
        read_memo = ('fake index', 100, 50)
 
856
        locations = {
 
857
            ('key',): (read_memo + (None, None), None, None, None)}
 
858
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
859
        total_size = batcher.add_key(('key',))
 
860
        self.assertEqual(50, total_size)
 
861
        self.assertEqual([('key',)], batcher.keys)
 
862
        self.assertEqual([read_memo], batcher.memos_to_get)
 
863
 
 
864
    def test_add_key_duplicate_read_memo(self):
 
865
        """read_memos that occur multiple times in a batch will only be fetched
 
866
        once.
 
867
        """
 
868
        read_memo = ('fake index', 100, 50)
 
869
        # Two keys, both sharing the same read memo (but different overall
 
870
        # index_memos).
 
871
        locations = {
 
872
            ('key1',): (read_memo + (0, 1), None, None, None),
 
873
            ('key2',): (read_memo + (1, 2), None, None, None)}
 
874
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
875
        total_size = batcher.add_key(('key1',))
 
876
        total_size = batcher.add_key(('key2',))
 
877
        self.assertEqual(50, total_size)
 
878
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
 
879
        self.assertEqual([read_memo], batcher.memos_to_get)
 
880
 
 
881
    def test_add_key_cached_read_memo(self):
 
882
        """Adding a key with a cached read_memo will not cause that read_memo
 
883
        to be added to the list to fetch.
 
884
        """
 
885
        read_memo = ('fake index', 100, 50)
 
886
        gcvf = StubGCVF()
 
887
        gcvf._group_cache[read_memo] = 'fake block'
 
888
        locations = {
 
889
            ('key',): (read_memo + (None, None), None, None, None)}
 
890
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
891
        total_size = batcher.add_key(('key',))
 
892
        self.assertEqual(0, total_size)
 
893
        self.assertEqual([('key',)], batcher.keys)
 
894
        self.assertEqual([], batcher.memos_to_get)
 
895
 
 
896
    def test_yield_factories_empty(self):
 
897
        """An empty batch yields no factories."""
 
898
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
 
899
        self.assertEqual([], list(batcher.yield_factories()))
 
900
 
 
901
    def test_yield_factories_calls_get_blocks(self):
 
902
        """Uncached memos are retrieved via get_blocks."""
 
903
        read_memo1 = ('fake index', 100, 50)
 
904
        read_memo2 = ('fake index', 150, 40)
 
905
        gcvf = StubGCVF(
 
906
            canned_get_blocks=[
 
907
                (read_memo1, groupcompress.GroupCompressBlock()),
 
908
                (read_memo2, groupcompress.GroupCompressBlock())])
 
909
        locations = {
 
910
            ('key1',): (read_memo1 + (None, None), None, None, None),
 
911
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
912
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
913
        batcher.add_key(('key1',))
 
914
        batcher.add_key(('key2',))
 
915
        factories = list(batcher.yield_factories(full_flush=True))
 
916
        self.assertLength(2, factories)
 
917
        keys = [f.key for f in factories]
 
918
        kinds = [f.storage_kind for f in factories]
 
919
        self.assertEqual([('key1',), ('key2',)], keys)
 
920
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
 
921
 
 
922
    def test_yield_factories_flushing(self):
 
923
        """yield_factories holds back on yielding results from the final block
 
924
        unless passed full_flush=True.
 
925
        """
 
926
        fake_block = groupcompress.GroupCompressBlock()
 
927
        read_memo = ('fake index', 100, 50)
 
928
        gcvf = StubGCVF()
 
929
        gcvf._group_cache[read_memo] = fake_block
 
930
        locations = {
 
931
            ('key',): (read_memo + (None, None), None, None, None)}
 
932
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
933
        batcher.add_key(('key',))
 
934
        self.assertEqual([], list(batcher.yield_factories()))
 
935
        factories = list(batcher.yield_factories(full_flush=True))
 
936
        self.assertLength(1, factories)
 
937
        self.assertEqual(('key',), factories[0].key)
 
938
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
 
939
 
 
940
 
 
941
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
942
 
 
943
    _texts = {
 
944
        ('key1',): "this is a text\n"
 
945
                   "with a reasonable amount of compressible bytes\n"
 
946
                   "which can be shared between various other texts\n",
 
947
        ('key2',): "another text\n"
 
948
                   "with a reasonable amount of compressible bytes\n"
 
949
                   "which can be shared between various other texts\n",
 
950
        ('key3',): "yet another text which won't be extracted\n"
 
951
                   "with a reasonable amount of compressible bytes\n"
 
952
                   "which can be shared between various other texts\n",
 
953
        ('key4',): "this will be extracted\n"
 
954
                   "but references most of its bytes from\n"
 
955
                   "yet another text which won't be extracted\n"
 
956
                   "with a reasonable amount of compressible bytes\n"
 
957
                   "which can be shared between various other texts\n",
 
958
    }
 
959
    def make_block(self, key_to_text):
 
960
        """Create a GroupCompressBlock, filling it with the given texts."""
 
961
        compressor = groupcompress.GroupCompressor()
 
962
        start = 0
 
963
        for key in sorted(key_to_text):
 
964
            compressor.compress(key, key_to_text[key], None)
 
965
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
966
                    in compressor.labels_deltas.items())
 
967
        block = compressor.flush()
 
968
        raw_bytes = block.to_bytes()
 
969
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
970
 
 
971
    def add_key_to_manager(self, key, locations, block, manager):
 
972
        start, end = locations[key]
 
973
        manager.add_factory(key, (), start, end)
 
974
 
 
975
    def make_block_and_full_manager(self, texts):
 
976
        locations, block = self.make_block(texts)
 
977
        manager = groupcompress._LazyGroupContentManager(block)
 
978
        for key in sorted(texts):
 
979
            self.add_key_to_manager(key, locations, block, manager)
 
980
        return block, manager
 
981
 
 
982
    def test_get_fulltexts(self):
 
983
        locations, block = self.make_block(self._texts)
 
984
        manager = groupcompress._LazyGroupContentManager(block)
 
985
        self.add_key_to_manager(('key1',), locations, block, manager)
 
986
        self.add_key_to_manager(('key2',), locations, block, manager)
 
987
        result_order = []
 
988
        for record in manager.get_record_stream():
 
989
            result_order.append(record.key)
 
990
            text = self._texts[record.key]
 
991
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
992
        self.assertEqual([('key1',), ('key2',)], result_order)
 
993
 
 
994
        # If we build the manager in the opposite order, we should get them
 
995
        # back in the opposite order
 
996
        manager = groupcompress._LazyGroupContentManager(block)
 
997
        self.add_key_to_manager(('key2',), locations, block, manager)
 
998
        self.add_key_to_manager(('key1',), locations, block, manager)
 
999
        result_order = []
 
1000
        for record in manager.get_record_stream():
 
1001
            result_order.append(record.key)
 
1002
            text = self._texts[record.key]
 
1003
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
1004
        self.assertEqual([('key2',), ('key1',)], result_order)
 
1005
 
 
1006
    def test__wire_bytes_no_keys(self):
 
1007
        locations, block = self.make_block(self._texts)
 
1008
        manager = groupcompress._LazyGroupContentManager(block)
 
1009
        wire_bytes = manager._wire_bytes()
 
1010
        block_length = len(block.to_bytes())
 
1011
        # We should have triggered a strip, since we aren't using any content
 
1012
        stripped_block = manager._block.to_bytes()
 
1013
        self.assertTrue(block_length > len(stripped_block))
 
1014
        empty_z_header = zlib.compress('')
 
1015
        self.assertEqual('groupcompress-block\n'
 
1016
                         '8\n' # len(compress(''))
 
1017
                         '0\n' # len('')
 
1018
                         '%d\n'# compressed block len
 
1019
                         '%s'  # zheader
 
1020
                         '%s'  # block
 
1021
                         % (len(stripped_block), empty_z_header,
 
1022
                            stripped_block),
 
1023
                         wire_bytes)
 
1024
 
 
1025
    def test__wire_bytes(self):
 
1026
        locations, block = self.make_block(self._texts)
 
1027
        manager = groupcompress._LazyGroupContentManager(block)
 
1028
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1029
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1030
        block_bytes = block.to_bytes()
 
1031
        wire_bytes = manager._wire_bytes()
 
1032
        (storage_kind, z_header_len, header_len,
 
1033
         block_len, rest) = wire_bytes.split('\n', 4)
 
1034
        z_header_len = int(z_header_len)
 
1035
        header_len = int(header_len)
 
1036
        block_len = int(block_len)
 
1037
        self.assertEqual('groupcompress-block', storage_kind)
 
1038
        self.assertEqual(34, z_header_len)
 
1039
        self.assertEqual(26, header_len)
 
1040
        self.assertEqual(len(block_bytes), block_len)
 
1041
        z_header = rest[:z_header_len]
 
1042
        header = zlib.decompress(z_header)
 
1043
        self.assertEqual(header_len, len(header))
 
1044
        entry1 = locations[('key1',)]
 
1045
        entry4 = locations[('key4',)]
 
1046
        self.assertEqualDiff('key1\n'
 
1047
                             '\n'  # no parents
 
1048
                             '%d\n' # start offset
 
1049
                             '%d\n' # end offset
 
1050
                             'key4\n'
 
1051
                             '\n'
 
1052
                             '%d\n'
 
1053
                             '%d\n'
 
1054
                             % (entry1[0], entry1[1],
 
1055
                                entry4[0], entry4[1]),
 
1056
                            header)
 
1057
        z_block = rest[z_header_len:]
 
1058
        self.assertEqual(block_bytes, z_block)
 
1059
 
 
1060
    def test_from_bytes(self):
 
1061
        locations, block = self.make_block(self._texts)
 
1062
        manager = groupcompress._LazyGroupContentManager(block)
 
1063
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1064
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1065
        wire_bytes = manager._wire_bytes()
 
1066
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
1067
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
1068
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
1069
        self.assertEqual(2, len(manager._factories))
 
1070
        self.assertEqual(block._z_content, manager._block._z_content)
 
1071
        result_order = []
 
1072
        for record in manager.get_record_stream():
 
1073
            result_order.append(record.key)
 
1074
            text = self._texts[record.key]
 
1075
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
1076
        self.assertEqual([('key1',), ('key4',)], result_order)
 
1077
 
 
1078
    def test__check_rebuild_no_changes(self):
 
1079
        block, manager = self.make_block_and_full_manager(self._texts)
 
1080
        manager._check_rebuild_block()
 
1081
        self.assertIs(block, manager._block)
 
1082
 
 
1083
    def test__check_rebuild_only_one(self):
 
1084
        locations, block = self.make_block(self._texts)
 
1085
        manager = groupcompress._LazyGroupContentManager(block)
 
1086
        # Request just the first key, which should trigger a 'strip' action
 
1087
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1088
        manager._check_rebuild_block()
 
1089
        self.assertIsNot(block, manager._block)
 
1090
        self.assertTrue(block._content_length > manager._block._content_length)
 
1091
        # We should be able to still get the content out of this block, though
 
1092
        # it should only have 1 entry
 
1093
        for record in manager.get_record_stream():
 
1094
            self.assertEqual(('key1',), record.key)
 
1095
            self.assertEqual(self._texts[record.key],
 
1096
                             record.get_bytes_as('fulltext'))
 
1097
 
 
1098
    def test__check_rebuild_middle(self):
 
1099
        locations, block = self.make_block(self._texts)
 
1100
        manager = groupcompress._LazyGroupContentManager(block)
 
1101
        # Request a small key in the middle should trigger a 'rebuild'
 
1102
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1103
        manager._check_rebuild_block()
 
1104
        self.assertIsNot(block, manager._block)
 
1105
        self.assertTrue(block._content_length > manager._block._content_length)
 
1106
        for record in manager.get_record_stream():
 
1107
            self.assertEqual(('key4',), record.key)
 
1108
            self.assertEqual(self._texts[record.key],
 
1109
                             record.get_bytes_as('fulltext'))
 
1110
 
 
1111
    def test_manager_default_compressor_settings(self):
 
1112
        locations, old_block = self.make_block(self._texts)
 
1113
        manager = groupcompress._LazyGroupContentManager(old_block)
 
1114
        gcvf = groupcompress.GroupCompressVersionedFiles
 
1115
        # It doesn't greedily evaluate _max_bytes_to_index
 
1116
        self.assertIs(None, manager._compressor_settings)
 
1117
        self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
 
1118
                         manager._get_compressor_settings())
 
1119
 
 
1120
    def test_manager_custom_compressor_settings(self):
 
1121
        locations, old_block = self.make_block(self._texts)
 
1122
        called = []
 
1123
        def compressor_settings():
 
1124
            called.append('called')
 
1125
            return (10,)
 
1126
        manager = groupcompress._LazyGroupContentManager(old_block,
 
1127
            get_compressor_settings=compressor_settings)
 
1128
        gcvf = groupcompress.GroupCompressVersionedFiles
 
1129
        # It doesn't greedily evaluate compressor_settings
 
1130
        self.assertIs(None, manager._compressor_settings)
 
1131
        self.assertEqual((10,), manager._get_compressor_settings())
 
1132
        self.assertEqual((10,), manager._get_compressor_settings())
 
1133
        self.assertEqual((10,), manager._compressor_settings)
 
1134
        # Only called 1 time
 
1135
        self.assertEqual(['called'], called)
 
1136
 
 
1137
    def test__rebuild_handles_compressor_settings(self):
 
1138
        if not isinstance(groupcompress.GroupCompressor,
 
1139
                          groupcompress.PyrexGroupCompressor):
 
1140
            raise tests.TestNotApplicable('pure-python compressor'
 
1141
                ' does not handle compressor_settings')
 
1142
        locations, old_block = self.make_block(self._texts)
 
1143
        manager = groupcompress._LazyGroupContentManager(old_block,
 
1144
            get_compressor_settings=lambda: dict(max_bytes_to_index=32))
 
1145
        gc = manager._make_group_compressor()
 
1146
        self.assertEqual(32, gc._delta_index._max_bytes_to_index)
 
1147
        self.add_key_to_manager(('key3',), locations, old_block, manager)
 
1148
        self.add_key_to_manager(('key4',), locations, old_block, manager)
 
1149
        action, last_byte, total_bytes = manager._check_rebuild_action()
 
1150
        self.assertEqual('rebuild', action)
 
1151
        manager._rebuild_block()
 
1152
        new_block = manager._block
 
1153
        self.assertIsNot(old_block, new_block)
 
1154
        # Because of the new max_bytes_to_index, we do a poor job of
 
1155
        # rebuilding. This is a side-effect of the change, but at least it does
 
1156
        # show the setting had an effect.
 
1157
        self.assertTrue(old_block._content_length < new_block._content_length)
 
1158
 
 
1159
    def test_check_is_well_utilized_all_keys(self):
 
1160
        block, manager = self.make_block_and_full_manager(self._texts)
 
1161
        self.assertFalse(manager.check_is_well_utilized())
 
1162
        # Though we can fake it by changing the recommended minimum size
 
1163
        manager._full_enough_block_size = block._content_length
 
1164
        self.assertTrue(manager.check_is_well_utilized())
 
1165
        # Setting it just above causes it to fail
 
1166
        manager._full_enough_block_size = block._content_length + 1
 
1167
        self.assertFalse(manager.check_is_well_utilized())
 
1168
        # Setting the mixed-block size doesn't do anything, because the content
 
1169
        # is considered to not be 'mixed'
 
1170
        manager._full_enough_mixed_block_size = block._content_length
 
1171
        self.assertFalse(manager.check_is_well_utilized())
 
1172
 
 
1173
    def test_check_is_well_utilized_mixed_keys(self):
 
1174
        texts = {}
 
1175
        f1k1 = ('f1', 'k1')
 
1176
        f1k2 = ('f1', 'k2')
 
1177
        f2k1 = ('f2', 'k1')
 
1178
        f2k2 = ('f2', 'k2')
 
1179
        texts[f1k1] = self._texts[('key1',)]
 
1180
        texts[f1k2] = self._texts[('key2',)]
 
1181
        texts[f2k1] = self._texts[('key3',)]
 
1182
        texts[f2k2] = self._texts[('key4',)]
 
1183
        block, manager = self.make_block_and_full_manager(texts)
 
1184
        self.assertFalse(manager.check_is_well_utilized())
 
1185
        manager._full_enough_block_size = block._content_length
 
1186
        self.assertTrue(manager.check_is_well_utilized())
 
1187
        manager._full_enough_block_size = block._content_length + 1
 
1188
        self.assertFalse(manager.check_is_well_utilized())
 
1189
        manager._full_enough_mixed_block_size = block._content_length
 
1190
        self.assertTrue(manager.check_is_well_utilized())
 
1191
 
 
1192
    def test_check_is_well_utilized_partial_use(self):
 
1193
        locations, block = self.make_block(self._texts)
 
1194
        manager = groupcompress._LazyGroupContentManager(block)
 
1195
        manager._full_enough_block_size = block._content_length
 
1196
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1197
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1198
        # Just using the content from key1 and 2 is not enough to be considered
 
1199
        # 'complete'
 
1200
        self.assertFalse(manager.check_is_well_utilized())
 
1201
        # However if we add key3, then we have enough, as we only require 75%
 
1202
        # consumption
 
1203
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1204
        self.assertTrue(manager.check_is_well_utilized())
 
1205
 
 
1206
 
 
1207
class Test_GCBuildDetails(tests.TestCase):
 
1208
 
 
1209
    def test_acts_like_tuple(self):
 
1210
        # _GCBuildDetails inlines some of the data that used to be spread out
 
1211
        # across a bunch of tuples
 
1212
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1213
            ('INDEX', 10, 20, 0, 5))
 
1214
        self.assertEqual(4, len(bd))
 
1215
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
 
1216
        self.assertEqual(None, bd[1]) # Compression Parent is always None
 
1217
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
 
1218
        self.assertEqual(('group', None), bd[3]) # Record details
 
1219
 
 
1220
    def test__repr__(self):
 
1221
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1222
            ('INDEX', 10, 20, 0, 5))
 
1223
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
 
1224
                         " (('parent1',), ('parent2',)))",
 
1225
                         repr(bd))
 
1226