/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: John Arbash Meinel
  • Date: 2009-10-17 04:43:14 UTC
  • mto: This revision was merged to the branch mainline in revision 4756.
  • Revision ID: john@arbash-meinel.com-20091017044314-nlvrrqnz0f2wzcp4
change the GroupcompressBlock code a bit.
If the first decompress request is big enough, just decompress everything.
And when we do that, let go of the decompressobj.

After digging through the zlib code, it looks like 1 zlib stream object
contains a 5kB internal state, and another 4*64kB buffers. (about 260kB
of total state.)
That turns out to be quite a lot if you think about it.


In the case of branching a copy of 'bzr.dev' locally, this turned out
to be 383MB w/ bzr.dev and 345MB w/ only this patch. (So ~11% of peak).

Also, this was 'unreferenced' memory, because it is hidden in the
zlib internal state in working buffers. So it wasn't memory that Meliae
could find. \o/.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from bzrlib import (
 
22
    btree_index,
 
23
    groupcompress,
 
24
    errors,
 
25
    index as _mod_index,
 
26
    osutils,
 
27
    tests,
 
28
    trace,
 
29
    versionedfile,
 
30
    )
 
31
from bzrlib.osutils import sha_string
 
32
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
33
 
 
34
 
 
35
def load_tests(standard_tests, module, loader):
 
36
    """Parameterize tests for all versions of groupcompress."""
 
37
    to_adapt, result = tests.split_suite_by_condition(
 
38
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
39
    scenarios = [
 
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
41
        ]
 
42
    if CompiledGroupCompressFeature.available():
 
43
        scenarios.append(('C',
 
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
45
    return tests.multiply_tests(to_adapt, scenarios, result)
 
46
 
 
47
 
 
48
class TestGroupCompressor(tests.TestCase):
 
49
 
 
50
    def _chunks_to_repr_lines(self, chunks):
 
51
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
52
 
 
53
    def assertEqualDiffEncoded(self, expected, actual):
 
54
        """Compare the actual content to the expected content.
 
55
 
 
56
        :param expected: A group of chunks that we expect to see
 
57
        :param actual: The measured 'chunks'
 
58
 
 
59
        We will transform the chunks back into lines, and then run 'repr()'
 
60
        over them to handle non-ascii characters.
 
61
        """
 
62
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
63
                             self._chunks_to_repr_lines(actual))
 
64
 
 
65
 
 
66
class TestAllGroupCompressors(TestGroupCompressor):
 
67
    """Tests for GroupCompressor"""
 
68
 
 
69
    compressor = None # Set by multiply_tests
 
70
 
 
71
    def test_empty_delta(self):
 
72
        compressor = self.compressor()
 
73
        self.assertEqual([], compressor.chunks)
 
74
 
 
75
    def test_one_nosha_delta(self):
 
76
        # diff against NUKK
 
77
        compressor = self.compressor()
 
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
79
            'strange\ncommon\n', None)
 
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
82
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
83
        self.assertEqual(0, start_point)
 
84
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
85
 
 
86
    def test_empty_content(self):
 
87
        compressor = self.compressor()
 
88
        # Adding empty bytes should return the 'null' record
 
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
90
                                                                 '', None)
 
91
        self.assertEqual(0, start_point)
 
92
        self.assertEqual(0, end_point)
 
93
        self.assertEqual('fulltext', kind)
 
94
        self.assertEqual(groupcompress._null_sha1, sha1)
 
95
        self.assertEqual(0, compressor.endpoint)
 
96
        self.assertEqual([], compressor.chunks)
 
97
        # Even after adding some content
 
98
        compressor.compress(('content',), 'some\nbytes\n', None)
 
99
        self.assertTrue(compressor.endpoint > 0)
 
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
101
                                                                 '', None)
 
102
        self.assertEqual(0, start_point)
 
103
        self.assertEqual(0, end_point)
 
104
        self.assertEqual('fulltext', kind)
 
105
        self.assertEqual(groupcompress._null_sha1, sha1)
 
106
 
 
107
    def test_extract_from_compressor(self):
 
108
        # Knit fetching will try to reconstruct texts locally which results in
 
109
        # reading something that is in the compressor stream already.
 
110
        compressor = self.compressor()
 
111
        sha1_1, _, _, _ = compressor.compress(('label',),
 
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
113
        expected_lines = list(compressor.chunks)
 
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
116
        # get the first out
 
117
        self.assertEqual(('strange\ncommon long line\n'
 
118
                          'that needs a 16 byte match\n', sha1_1),
 
119
                         compressor.extract(('label',)))
 
120
        # and the second
 
121
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
122
                          'different\n', sha1_2),
 
123
                         compressor.extract(('newlabel',)))
 
124
 
 
125
    def test_pop_last(self):
 
126
        compressor = self.compressor()
 
127
        _, _, _, _ = compressor.compress(('key1',),
 
128
            'some text\nfor the first entry\n', None)
 
129
        expected_lines = list(compressor.chunks)
 
130
        _, _, _, _ = compressor.compress(('key2',),
 
131
            'some text\nfor the second entry\n', None)
 
132
        compressor.pop_last()
 
133
        self.assertEqual(expected_lines, compressor.chunks)
 
134
 
 
135
 
 
136
class TestPyrexGroupCompressor(TestGroupCompressor):
 
137
 
 
138
    _test_needs_features = [CompiledGroupCompressFeature]
 
139
    compressor = groupcompress.PyrexGroupCompressor
 
140
 
 
141
    def test_stats(self):
 
142
        compressor = self.compressor()
 
143
        compressor.compress(('label',),
 
144
                            'strange\n'
 
145
                            'common very very long line\n'
 
146
                            'plus more text\n', None)
 
147
        compressor.compress(('newlabel',),
 
148
                            'common very very long line\n'
 
149
                            'plus more text\n'
 
150
                            'different\n'
 
151
                            'moredifferent\n', None)
 
152
        compressor.compress(('label3',),
 
153
                            'new\n'
 
154
                            'common very very long line\n'
 
155
                            'plus more text\n'
 
156
                            'different\n'
 
157
                            'moredifferent\n', None)
 
158
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
159
 
 
160
    def test_two_nosha_delta(self):
 
161
        compressor = self.compressor()
 
162
        sha1_1, _, _, _ = compressor.compress(('label',),
 
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
164
        expected_lines = list(compressor.chunks)
 
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
167
        self.assertEqual(sha_string('common long line\n'
 
168
                                    'that needs a 16 byte match\n'
 
169
                                    'different\n'), sha1_2)
 
170
        expected_lines.extend([
 
171
            # 'delta', delta length
 
172
            'd\x0f',
 
173
            # source and target length
 
174
            '\x36',
 
175
            # copy the line common
 
176
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
177
            # add the line different, and the trailing newline
 
178
            '\x0adifferent\n', # insert 10 bytes
 
179
            ])
 
180
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
181
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
182
 
 
183
    def test_three_nosha_delta(self):
 
184
        # The first interesting test: make a change that should use lines from
 
185
        # both parents.
 
186
        compressor = self.compressor()
 
187
        sha1_1, _, _, _ = compressor.compress(('label',),
 
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
190
            'different\nmoredifferent\nand then some more\n', None)
 
191
        expected_lines = list(compressor.chunks)
 
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
193
            'new\ncommon very very long line\nwith some extra text\n'
 
194
            'different\nmoredifferent\nand then some more\n',
 
195
            None)
 
196
        self.assertEqual(
 
197
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
198
                       'different\nmoredifferent\nand then some more\n'),
 
199
            sha1_3)
 
200
        expected_lines.extend([
 
201
            # 'delta', delta length
 
202
            'd\x0b',
 
203
            # source and target length
 
204
            '\x5f'
 
205
            # insert new
 
206
            '\x03new',
 
207
            # Copy of first parent 'common' range
 
208
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
209
            # Copy of second parent 'different' range
 
210
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
211
            ])
 
212
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
213
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
214
 
 
215
 
 
216
class TestPythonGroupCompressor(TestGroupCompressor):
 
217
 
 
218
    compressor = groupcompress.PythonGroupCompressor
 
219
 
 
220
    def test_stats(self):
 
221
        compressor = self.compressor()
 
222
        compressor.compress(('label',),
 
223
                            'strange\n'
 
224
                            'common very very long line\n'
 
225
                            'plus more text\n', None)
 
226
        compressor.compress(('newlabel',),
 
227
                            'common very very long line\n'
 
228
                            'plus more text\n'
 
229
                            'different\n'
 
230
                            'moredifferent\n', None)
 
231
        compressor.compress(('label3',),
 
232
                            'new\n'
 
233
                            'common very very long line\n'
 
234
                            'plus more text\n'
 
235
                            'different\n'
 
236
                            'moredifferent\n', None)
 
237
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
238
 
 
239
    def test_two_nosha_delta(self):
 
240
        compressor = self.compressor()
 
241
        sha1_1, _, _, _ = compressor.compress(('label',),
 
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
243
        expected_lines = list(compressor.chunks)
 
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
246
        self.assertEqual(sha_string('common long line\n'
 
247
                                    'that needs a 16 byte match\n'
 
248
                                    'different\n'), sha1_2)
 
249
        expected_lines.extend([
 
250
            # 'delta', delta length
 
251
            'd\x0f',
 
252
            # target length
 
253
            '\x36',
 
254
            # copy the line common
 
255
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
256
            # add the line different, and the trailing newline
 
257
            '\x0adifferent\n', # insert 10 bytes
 
258
            ])
 
259
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
260
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
261
 
 
262
    def test_three_nosha_delta(self):
 
263
        # The first interesting test: make a change that should use lines from
 
264
        # both parents.
 
265
        compressor = self.compressor()
 
266
        sha1_1, _, _, _ = compressor.compress(('label',),
 
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
269
            'different\nmoredifferent\nand then some more\n', None)
 
270
        expected_lines = list(compressor.chunks)
 
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
272
            'new\ncommon very very long line\nwith some extra text\n'
 
273
            'different\nmoredifferent\nand then some more\n',
 
274
            None)
 
275
        self.assertEqual(
 
276
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
277
                       'different\nmoredifferent\nand then some more\n'),
 
278
            sha1_3)
 
279
        expected_lines.extend([
 
280
            # 'delta', delta length
 
281
            'd\x0c',
 
282
            # target length
 
283
            '\x5f'
 
284
            # insert new
 
285
            '\x04new\n',
 
286
            # Copy of first parent 'common' range
 
287
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
288
            # Copy of second parent 'different' range
 
289
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
290
            ])
 
291
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
292
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
293
 
 
294
 
 
295
class TestGroupCompressBlock(tests.TestCase):
 
296
 
 
297
    def make_block(self, key_to_text):
 
298
        """Create a GroupCompressBlock, filling it with the given texts."""
 
299
        compressor = groupcompress.GroupCompressor()
 
300
        start = 0
 
301
        for key in sorted(key_to_text):
 
302
            compressor.compress(key, key_to_text[key], None)
 
303
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
304
                    in compressor.labels_deltas.iteritems())
 
305
        block = compressor.flush()
 
306
        raw_bytes = block.to_bytes()
 
307
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
308
        # content object
 
309
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
310
 
 
311
    def test_from_empty_bytes(self):
 
312
        self.assertRaises(ValueError,
 
313
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
314
 
 
315
    def test_from_minimal_bytes(self):
 
316
        block = groupcompress.GroupCompressBlock.from_bytes(
 
317
            'gcb1z\n0\n0\n')
 
318
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
319
        self.assertIs(None, block._content)
 
320
        self.assertEqual('', block._z_content)
 
321
        block._ensure_content()
 
322
        self.assertEqual('', block._content)
 
323
        self.assertEqual('', block._z_content)
 
324
        block._ensure_content() # Ensure content is safe to call 2x
 
325
 
 
326
    def test_from_invalid(self):
 
327
        self.assertRaises(ValueError,
 
328
                          groupcompress.GroupCompressBlock.from_bytes,
 
329
                          'this is not a valid header')
 
330
 
 
331
    def test_from_bytes(self):
 
332
        content = ('a tiny bit of content\n')
 
333
        z_content = zlib.compress(content)
 
334
        z_bytes = (
 
335
            'gcb1z\n' # group compress block v1 plain
 
336
            '%d\n' # Length of compressed content
 
337
            '%d\n' # Length of uncompressed content
 
338
            '%s'   # Compressed content
 
339
            ) % (len(z_content), len(content), z_content)
 
340
        block = groupcompress.GroupCompressBlock.from_bytes(
 
341
            z_bytes)
 
342
        self.assertEqual(z_content, block._z_content)
 
343
        self.assertIs(None, block._content)
 
344
        self.assertEqual(len(z_content), block._z_content_length)
 
345
        self.assertEqual(len(content), block._content_length)
 
346
        block._ensure_content()
 
347
        self.assertEqual(z_content, block._z_content)
 
348
        self.assertEqual(content, block._content)
 
349
 
 
350
    def test_to_bytes(self):
 
351
        content = ('this is some content\n'
 
352
                   'this content will be compressed\n')
 
353
        gcb = groupcompress.GroupCompressBlock()
 
354
        gcb.set_content(content)
 
355
        bytes = gcb.to_bytes()
 
356
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
357
        self.assertEqual(gcb._content_length, len(content))
 
358
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
359
                          '%d\n' # Length of compressed content
 
360
                          '%d\n' # Length of uncompressed content
 
361
                         ) % (gcb._z_content_length, gcb._content_length)
 
362
        self.assertStartsWith(bytes, expected_header)
 
363
        remaining_bytes = bytes[len(expected_header):]
 
364
        raw_bytes = zlib.decompress(remaining_bytes)
 
365
        self.assertEqual(content, raw_bytes)
 
366
 
 
367
        # we should get the same results if using the chunked version
 
368
        gcb = groupcompress.GroupCompressBlock()
 
369
        gcb.set_chunked_content(['this is some content\n'
 
370
                                 'this content will be compressed\n'],
 
371
                                 len(content))
 
372
        old_bytes = bytes
 
373
        bytes = gcb.to_bytes()
 
374
        self.assertEqual(old_bytes, bytes)
 
375
 
 
376
    def test_partial_decomp(self):
 
377
        content_chunks = []
 
378
        # We need a sufficient amount of data so that zlib.decompress has
 
379
        # partial decompression to work with. Most auto-generated data
 
380
        # compresses a bit too well, we want a combination, so we combine a sha
 
381
        # hash with compressible data.
 
382
        for i in xrange(2048):
 
383
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
384
            content_chunks.append(next_content)
 
385
            next_sha1 = osutils.sha_string(next_content)
 
386
            content_chunks.append(next_sha1 + '\n')
 
387
        content = ''.join(content_chunks)
 
388
        self.assertEqual(158634, len(content))
 
389
        z_content = zlib.compress(content)
 
390
        self.assertEqual(57182, len(z_content))
 
391
        block = groupcompress.GroupCompressBlock()
 
392
        block._z_content = z_content
 
393
        block._z_content_length = len(z_content)
 
394
        block._compressor_name = 'zlib'
 
395
        block._content_length = 158634
 
396
        self.assertIs(None, block._content)
 
397
        block._ensure_content(100)
 
398
        self.assertIsNot(None, block._content)
 
399
        # We have decompressed at least 100 bytes
 
400
        self.assertTrue(len(block._content) >= 100)
 
401
        # We have not decompressed the whole content
 
402
        self.assertTrue(len(block._content) < 158634)
 
403
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
404
        # ensuring content that we already have shouldn't cause any more data
 
405
        # to be extracted
 
406
        cur_len = len(block._content)
 
407
        block._ensure_content(cur_len - 10)
 
408
        self.assertEqual(cur_len, len(block._content))
 
409
        # Now we want a bit more content
 
410
        cur_len += 10
 
411
        block._ensure_content(cur_len)
 
412
        self.assertTrue(len(block._content) >= cur_len)
 
413
        self.assertTrue(len(block._content) < 158634)
 
414
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
415
        # And now lets finish
 
416
        block._ensure_content(158634)
 
417
        self.assertEqualDiff(content, block._content)
 
418
        # And the decompressor is finalized
 
419
        self.assertIs(None, block._z_content_decompressor)
 
420
 
 
421
    def test__ensure_all_content(self):
 
422
        content_chunks = []
 
423
        # We need a sufficient amount of data so that zlib.decompress has
 
424
        # partial decompression to work with. Most auto-generated data
 
425
        # compresses a bit too well, we want a combination, so we combine a sha
 
426
        # hash with compressible data.
 
427
        for i in xrange(2048):
 
428
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
429
            content_chunks.append(next_content)
 
430
            next_sha1 = osutils.sha_string(next_content)
 
431
            content_chunks.append(next_sha1 + '\n')
 
432
        content = ''.join(content_chunks)
 
433
        self.assertEqual(158634, len(content))
 
434
        z_content = zlib.compress(content)
 
435
        self.assertEqual(57182, len(z_content))
 
436
        block = groupcompress.GroupCompressBlock()
 
437
        block._z_content = z_content
 
438
        block._z_content_length = len(z_content)
 
439
        block._compressor_name = 'zlib'
 
440
        block._content_length = 158634
 
441
        self.assertIs(None, block._content)
 
442
        # The first _ensure_content got all of the required data
 
443
        block._ensure_content(158634)
 
444
        self.assertEqualDiff(content, block._content)
 
445
        # And we should have released the _z_content_decompressor since it was
 
446
        # fully consumed
 
447
        self.assertIs(None, block._z_content_decompressor)
 
448
 
 
449
    def test__dump(self):
 
450
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
451
        key_to_text = {('1',): dup_content + '1 unique\n',
 
452
                       ('2',): dup_content + '2 extra special\n'}
 
453
        locs, block = self.make_block(key_to_text)
 
454
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
455
                          ('d', 21, len(key_to_text[('2',)]),
 
456
                           [('c', 2, len(dup_content)),
 
457
                            ('i', len('2 extra special\n'), '')
 
458
                           ]),
 
459
                         ], block._dump())
 
460
 
 
461
 
 
462
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
463
 
 
464
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
465
                     dir='.', inconsistency_fatal=True):
 
466
        t = self.get_transport(dir)
 
467
        t.ensure_base()
 
468
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
469
            delta=False, keylength=keylength,
 
470
            inconsistency_fatal=inconsistency_fatal)(t)
 
471
        if do_cleanup:
 
472
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
473
        return vf
 
474
 
 
475
 
 
476
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
477
 
 
478
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
479
        builder = btree_index.BTreeBuilder(ref_lists)
 
480
        for node, references, value in nodes:
 
481
            builder.add_node(node, references, value)
 
482
        stream = builder.finish()
 
483
        trans = self.get_transport()
 
484
        size = trans.put_file(name, stream)
 
485
        return btree_index.BTreeGraphIndex(trans, name, size)
 
486
 
 
487
    def make_g_index_missing_parent(self):
 
488
        graph_index = self.make_g_index('missing_parent', 1,
 
489
            [(('parent', ), '2 78 2 10', ([],)),
 
490
             (('tip', ), '2 78 2 10',
 
491
              ([('parent', ), ('missing-parent', )],)),
 
492
              ])
 
493
        return graph_index
 
494
 
 
495
    def test_get_record_stream_as_requested(self):
 
496
        # Consider promoting 'as-requested' to general availability, and
 
497
        # make this a VF interface test
 
498
        vf = self.make_test_vf(False, dir='source')
 
499
        vf.add_lines(('a',), (), ['lines\n'])
 
500
        vf.add_lines(('b',), (), ['lines\n'])
 
501
        vf.add_lines(('c',), (), ['lines\n'])
 
502
        vf.add_lines(('d',), (), ['lines\n'])
 
503
        vf.writer.end()
 
504
        keys = [record.key for record in vf.get_record_stream(
 
505
                    [('a',), ('b',), ('c',), ('d',)],
 
506
                    'as-requested', False)]
 
507
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
508
        keys = [record.key for record in vf.get_record_stream(
 
509
                    [('b',), ('a',), ('d',), ('c',)],
 
510
                    'as-requested', False)]
 
511
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
512
 
 
513
        # It should work even after being repacked into another VF
 
514
        vf2 = self.make_test_vf(False, dir='target')
 
515
        vf2.insert_record_stream(vf.get_record_stream(
 
516
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
517
        vf2.writer.end()
 
518
 
 
519
        keys = [record.key for record in vf2.get_record_stream(
 
520
                    [('a',), ('b',), ('c',), ('d',)],
 
521
                    'as-requested', False)]
 
522
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
523
        keys = [record.key for record in vf2.get_record_stream(
 
524
                    [('b',), ('a',), ('d',), ('c',)],
 
525
                    'as-requested', False)]
 
526
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
527
 
 
528
    def test_insert_record_stream_reuses_blocks(self):
 
529
        vf = self.make_test_vf(True, dir='source')
 
530
        def grouped_stream(revision_ids, first_parents=()):
 
531
            parents = first_parents
 
532
            for revision_id in revision_ids:
 
533
                key = (revision_id,)
 
534
                record = versionedfile.FulltextContentFactory(
 
535
                    key, parents, None,
 
536
                    'some content that is\n'
 
537
                    'identical except for\n'
 
538
                    'revision_id:%s\n' % (revision_id,))
 
539
                yield record
 
540
                parents = (key,)
 
541
        # One group, a-d
 
542
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
543
        # Second group, e-h
 
544
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
545
                                               first_parents=(('d',),)))
 
546
        block_bytes = {}
 
547
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
548
                                      'unordered', False)
 
549
        num_records = 0
 
550
        for record in stream:
 
551
            if record.key in [('a',), ('e',)]:
 
552
                self.assertEqual('groupcompress-block', record.storage_kind)
 
553
            else:
 
554
                self.assertEqual('groupcompress-block-ref',
 
555
                                 record.storage_kind)
 
556
            block_bytes[record.key] = record._manager._block._z_content
 
557
            num_records += 1
 
558
        self.assertEqual(8, num_records)
 
559
        for r in 'abcd':
 
560
            key = (r,)
 
561
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
562
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
563
        for r in 'efgh':
 
564
            key = (r,)
 
565
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
566
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
567
        # Now copy the blocks into another vf, and ensure that the blocks are
 
568
        # preserved without creating new entries
 
569
        vf2 = self.make_test_vf(True, dir='target')
 
570
        # ordering in 'groupcompress' order, should actually swap the groups in
 
571
        # the target vf, but the groups themselves should not be disturbed.
 
572
        def small_size_stream():
 
573
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
574
                                               'groupcompress', False):
 
575
                record._manager._full_enough_block_size = \
 
576
                    record._manager._block._content_length
 
577
                yield record
 
578
                        
 
579
        vf2.insert_record_stream(small_size_stream())
 
580
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
581
                                       'groupcompress', False)
 
582
        vf2.writer.end()
 
583
        num_records = 0
 
584
        for record in stream:
 
585
            num_records += 1
 
586
            self.assertEqual(block_bytes[record.key],
 
587
                             record._manager._block._z_content)
 
588
        self.assertEqual(8, num_records)
 
589
 
 
590
    def test_insert_record_stream_packs_on_the_fly(self):
 
591
        vf = self.make_test_vf(True, dir='source')
 
592
        def grouped_stream(revision_ids, first_parents=()):
 
593
            parents = first_parents
 
594
            for revision_id in revision_ids:
 
595
                key = (revision_id,)
 
596
                record = versionedfile.FulltextContentFactory(
 
597
                    key, parents, None,
 
598
                    'some content that is\n'
 
599
                    'identical except for\n'
 
600
                    'revision_id:%s\n' % (revision_id,))
 
601
                yield record
 
602
                parents = (key,)
 
603
        # One group, a-d
 
604
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
605
        # Second group, e-h
 
606
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
607
                                               first_parents=(('d',),)))
 
608
        # Now copy the blocks into another vf, and see that the
 
609
        # insert_record_stream rebuilt a new block on-the-fly because of
 
610
        # under-utilization
 
611
        vf2 = self.make_test_vf(True, dir='target')
 
612
        vf2.insert_record_stream(vf.get_record_stream(
 
613
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
614
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
615
                                       'groupcompress', False)
 
616
        vf2.writer.end()
 
617
        num_records = 0
 
618
        # All of the records should be recombined into a single block
 
619
        block = None
 
620
        for record in stream:
 
621
            num_records += 1
 
622
            if block is None:
 
623
                block = record._manager._block
 
624
            else:
 
625
                self.assertIs(block, record._manager._block)
 
626
        self.assertEqual(8, num_records)
 
627
 
 
628
    def test__insert_record_stream_no_reuse_block(self):
 
629
        vf = self.make_test_vf(True, dir='source')
 
630
        def grouped_stream(revision_ids, first_parents=()):
 
631
            parents = first_parents
 
632
            for revision_id in revision_ids:
 
633
                key = (revision_id,)
 
634
                record = versionedfile.FulltextContentFactory(
 
635
                    key, parents, None,
 
636
                    'some content that is\n'
 
637
                    'identical except for\n'
 
638
                    'revision_id:%s\n' % (revision_id,))
 
639
                yield record
 
640
                parents = (key,)
 
641
        # One group, a-d
 
642
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
643
        # Second group, e-h
 
644
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
645
                                               first_parents=(('d',),)))
 
646
        vf.writer.end()
 
647
        self.assertEqual(8, len(list(vf.get_record_stream(
 
648
                                        [(r,) for r in 'abcdefgh'],
 
649
                                        'unordered', False))))
 
650
        # Now copy the blocks into another vf, and ensure that the blocks are
 
651
        # preserved without creating new entries
 
652
        vf2 = self.make_test_vf(True, dir='target')
 
653
        # ordering in 'groupcompress' order, should actually swap the groups in
 
654
        # the target vf, but the groups themselves should not be disturbed.
 
655
        list(vf2._insert_record_stream(vf.get_record_stream(
 
656
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
657
            reuse_blocks=False))
 
658
        vf2.writer.end()
 
659
        # After inserting with reuse_blocks=False, we should have everything in
 
660
        # a single new block.
 
661
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
662
                                       'groupcompress', False)
 
663
        block = None
 
664
        for record in stream:
 
665
            if block is None:
 
666
                block = record._manager._block
 
667
            else:
 
668
                self.assertIs(block, record._manager._block)
 
669
 
 
670
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
671
        unvalidated = self.make_g_index_missing_parent()
 
672
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
673
        index = groupcompress._GCGraphIndex(combined,
 
674
            is_locked=lambda: True, parents=True,
 
675
            track_external_parent_refs=True)
 
676
        index.scan_unvalidated_index(unvalidated)
 
677
        self.assertEqual(
 
678
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
679
 
 
680
    def test_track_external_parent_refs(self):
 
681
        g_index = self.make_g_index('empty', 1, [])
 
682
        mod_index = btree_index.BTreeBuilder(1, 1)
 
683
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
684
        index = groupcompress._GCGraphIndex(combined,
 
685
            is_locked=lambda: True, parents=True,
 
686
            add_callback=mod_index.add_nodes,
 
687
            track_external_parent_refs=True)
 
688
        index.add_records([
 
689
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
690
        self.assertEqual(
 
691
            frozenset([('parent-1',), ('parent-2',)]),
 
692
            index.get_missing_parents())
 
693
 
 
694
    def make_source_with_b(self, a_parent, path):
 
695
        source = self.make_test_vf(True, dir=path)
 
696
        source.add_lines(('a',), (), ['lines\n'])
 
697
        if a_parent:
 
698
            b_parents = (('a',),)
 
699
        else:
 
700
            b_parents = ()
 
701
        source.add_lines(('b',), b_parents, ['lines\n'])
 
702
        return source
 
703
 
 
704
    def do_inconsistent_inserts(self, inconsistency_fatal):
 
705
        target = self.make_test_vf(True, dir='target',
 
706
                                   inconsistency_fatal=inconsistency_fatal)
 
707
        for x in range(2):
 
708
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
709
            target.insert_record_stream(source.get_record_stream(
 
710
                [('b',)], 'unordered', False))
 
711
 
 
712
    def test_inconsistent_redundant_inserts_warn(self):
 
713
        """Should not insert a record that is already present."""
 
714
        warnings = []
 
715
        def warning(template, args):
 
716
            warnings.append(template % args)
 
717
        _trace_warning = trace.warning
 
718
        trace.warning = warning
 
719
        try:
 
720
            self.do_inconsistent_inserts(inconsistency_fatal=False)
 
721
        finally:
 
722
            trace.warning = _trace_warning
 
723
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
724
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
725
                         warnings)
 
726
 
 
727
    def test_inconsistent_redundant_inserts_raises(self):
 
728
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
729
                              inconsistency_fatal=True)
 
730
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
731
                              " in add_records:"
 
732
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
733
                              " 0 8', \(\(\('a',\),\),\)\)")
 
734
 
 
735
 
 
736
class StubGCVF(object):
 
737
    def __init__(self, canned_get_blocks=None):
 
738
        self._group_cache = {}
 
739
        self._canned_get_blocks = canned_get_blocks or []
 
740
    def _get_blocks(self, read_memos):
 
741
        return iter(self._canned_get_blocks)
 
742
    
 
743
 
 
744
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
 
745
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
 
746
    
 
747
    def test_add_key_new_read_memo(self):
 
748
        """Adding a key with an uncached read_memo new to this batch adds that
 
749
        read_memo to the list of memos to fetch.
 
750
        """
 
751
        # locations are: index_memo, ignored, parents, ignored
 
752
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
 
753
        # and (idx, offset, size) is known as the 'read_memo', identifying the
 
754
        # raw bytes needed.
 
755
        read_memo = ('fake index', 100, 50)
 
756
        locations = {
 
757
            ('key',): (read_memo + (None, None), None, None, None)}
 
758
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
759
        total_size = batcher.add_key(('key',))
 
760
        self.assertEqual(50, total_size)
 
761
        self.assertEqual([('key',)], batcher.keys)
 
762
        self.assertEqual([read_memo], batcher.memos_to_get)
 
763
 
 
764
    def test_add_key_duplicate_read_memo(self):
 
765
        """read_memos that occur multiple times in a batch will only be fetched
 
766
        once.
 
767
        """
 
768
        read_memo = ('fake index', 100, 50)
 
769
        # Two keys, both sharing the same read memo (but different overall
 
770
        # index_memos).
 
771
        locations = {
 
772
            ('key1',): (read_memo + (0, 1), None, None, None),
 
773
            ('key2',): (read_memo + (1, 2), None, None, None)}
 
774
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
775
        total_size = batcher.add_key(('key1',))
 
776
        total_size = batcher.add_key(('key2',))
 
777
        self.assertEqual(50, total_size)
 
778
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
 
779
        self.assertEqual([read_memo], batcher.memos_to_get)
 
780
 
 
781
    def test_add_key_cached_read_memo(self):
 
782
        """Adding a key with a cached read_memo will not cause that read_memo
 
783
        to be added to the list to fetch.
 
784
        """
 
785
        read_memo = ('fake index', 100, 50)
 
786
        gcvf = StubGCVF()
 
787
        gcvf._group_cache[read_memo] = 'fake block'
 
788
        locations = {
 
789
            ('key',): (read_memo + (None, None), None, None, None)}
 
790
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
791
        total_size = batcher.add_key(('key',))
 
792
        self.assertEqual(0, total_size)
 
793
        self.assertEqual([('key',)], batcher.keys)
 
794
        self.assertEqual([], batcher.memos_to_get)
 
795
 
 
796
    def test_yield_factories_empty(self):
 
797
        """An empty batch yields no factories."""
 
798
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
 
799
        self.assertEqual([], list(batcher.yield_factories()))
 
800
 
 
801
    def test_yield_factories_calls_get_blocks(self):
 
802
        """Uncached memos are retrieved via get_blocks."""
 
803
        read_memo1 = ('fake index', 100, 50)
 
804
        read_memo2 = ('fake index', 150, 40)
 
805
        gcvf = StubGCVF(
 
806
            canned_get_blocks=[
 
807
                (read_memo1, groupcompress.GroupCompressBlock()),
 
808
                (read_memo2, groupcompress.GroupCompressBlock())])
 
809
        locations = {
 
810
            ('key1',): (read_memo1 + (None, None), None, None, None),
 
811
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
812
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
813
        batcher.add_key(('key1',))
 
814
        batcher.add_key(('key2',))
 
815
        factories = list(batcher.yield_factories(full_flush=True))
 
816
        self.assertLength(2, factories)
 
817
        keys = [f.key for f in factories]
 
818
        kinds = [f.storage_kind for f in factories]
 
819
        self.assertEqual([('key1',), ('key2',)], keys)
 
820
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
 
821
 
 
822
    def test_yield_factories_flushing(self):
 
823
        """yield_factories holds back on yielding results from the final block
 
824
        unless passed full_flush=True.
 
825
        """
 
826
        fake_block = groupcompress.GroupCompressBlock()
 
827
        read_memo = ('fake index', 100, 50)
 
828
        gcvf = StubGCVF()
 
829
        gcvf._group_cache[read_memo] = fake_block
 
830
        locations = {
 
831
            ('key',): (read_memo + (None, None), None, None, None)}
 
832
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
833
        batcher.add_key(('key',))
 
834
        self.assertEqual([], list(batcher.yield_factories()))
 
835
        factories = list(batcher.yield_factories(full_flush=True))
 
836
        self.assertLength(1, factories)
 
837
        self.assertEqual(('key',), factories[0].key)
 
838
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
 
839
 
 
840
 
 
841
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
842
 
 
843
    _texts = {
 
844
        ('key1',): "this is a text\n"
 
845
                   "with a reasonable amount of compressible bytes\n"
 
846
                   "which can be shared between various other texts\n",
 
847
        ('key2',): "another text\n"
 
848
                   "with a reasonable amount of compressible bytes\n"
 
849
                   "which can be shared between various other texts\n",
 
850
        ('key3',): "yet another text which won't be extracted\n"
 
851
                   "with a reasonable amount of compressible bytes\n"
 
852
                   "which can be shared between various other texts\n",
 
853
        ('key4',): "this will be extracted\n"
 
854
                   "but references most of its bytes from\n"
 
855
                   "yet another text which won't be extracted\n"
 
856
                   "with a reasonable amount of compressible bytes\n"
 
857
                   "which can be shared between various other texts\n",
 
858
    }
 
859
    def make_block(self, key_to_text):
 
860
        """Create a GroupCompressBlock, filling it with the given texts."""
 
861
        compressor = groupcompress.GroupCompressor()
 
862
        start = 0
 
863
        for key in sorted(key_to_text):
 
864
            compressor.compress(key, key_to_text[key], None)
 
865
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
866
                    in compressor.labels_deltas.iteritems())
 
867
        block = compressor.flush()
 
868
        raw_bytes = block.to_bytes()
 
869
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
870
 
 
871
    def add_key_to_manager(self, key, locations, block, manager):
 
872
        start, end = locations[key]
 
873
        manager.add_factory(key, (), start, end)
 
874
 
 
875
    def make_block_and_full_manager(self, texts):
 
876
        locations, block = self.make_block(texts)
 
877
        manager = groupcompress._LazyGroupContentManager(block)
 
878
        for key in sorted(texts):
 
879
            self.add_key_to_manager(key, locations, block, manager)
 
880
        return block, manager
 
881
 
 
882
    def test_get_fulltexts(self):
 
883
        locations, block = self.make_block(self._texts)
 
884
        manager = groupcompress._LazyGroupContentManager(block)
 
885
        self.add_key_to_manager(('key1',), locations, block, manager)
 
886
        self.add_key_to_manager(('key2',), locations, block, manager)
 
887
        result_order = []
 
888
        for record in manager.get_record_stream():
 
889
            result_order.append(record.key)
 
890
            text = self._texts[record.key]
 
891
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
892
        self.assertEqual([('key1',), ('key2',)], result_order)
 
893
 
 
894
        # If we build the manager in the opposite order, we should get them
 
895
        # back in the opposite order
 
896
        manager = groupcompress._LazyGroupContentManager(block)
 
897
        self.add_key_to_manager(('key2',), locations, block, manager)
 
898
        self.add_key_to_manager(('key1',), locations, block, manager)
 
899
        result_order = []
 
900
        for record in manager.get_record_stream():
 
901
            result_order.append(record.key)
 
902
            text = self._texts[record.key]
 
903
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
904
        self.assertEqual([('key2',), ('key1',)], result_order)
 
905
 
 
906
    def test__wire_bytes_no_keys(self):
 
907
        locations, block = self.make_block(self._texts)
 
908
        manager = groupcompress._LazyGroupContentManager(block)
 
909
        wire_bytes = manager._wire_bytes()
 
910
        block_length = len(block.to_bytes())
 
911
        # We should have triggered a strip, since we aren't using any content
 
912
        stripped_block = manager._block.to_bytes()
 
913
        self.assertTrue(block_length > len(stripped_block))
 
914
        empty_z_header = zlib.compress('')
 
915
        self.assertEqual('groupcompress-block\n'
 
916
                         '8\n' # len(compress(''))
 
917
                         '0\n' # len('')
 
918
                         '%d\n'# compressed block len
 
919
                         '%s'  # zheader
 
920
                         '%s'  # block
 
921
                         % (len(stripped_block), empty_z_header,
 
922
                            stripped_block),
 
923
                         wire_bytes)
 
924
 
 
925
    def test__wire_bytes(self):
 
926
        locations, block = self.make_block(self._texts)
 
927
        manager = groupcompress._LazyGroupContentManager(block)
 
928
        self.add_key_to_manager(('key1',), locations, block, manager)
 
929
        self.add_key_to_manager(('key4',), locations, block, manager)
 
930
        block_bytes = block.to_bytes()
 
931
        wire_bytes = manager._wire_bytes()
 
932
        (storage_kind, z_header_len, header_len,
 
933
         block_len, rest) = wire_bytes.split('\n', 4)
 
934
        z_header_len = int(z_header_len)
 
935
        header_len = int(header_len)
 
936
        block_len = int(block_len)
 
937
        self.assertEqual('groupcompress-block', storage_kind)
 
938
        self.assertEqual(34, z_header_len)
 
939
        self.assertEqual(26, header_len)
 
940
        self.assertEqual(len(block_bytes), block_len)
 
941
        z_header = rest[:z_header_len]
 
942
        header = zlib.decompress(z_header)
 
943
        self.assertEqual(header_len, len(header))
 
944
        entry1 = locations[('key1',)]
 
945
        entry4 = locations[('key4',)]
 
946
        self.assertEqualDiff('key1\n'
 
947
                             '\n'  # no parents
 
948
                             '%d\n' # start offset
 
949
                             '%d\n' # end offset
 
950
                             'key4\n'
 
951
                             '\n'
 
952
                             '%d\n'
 
953
                             '%d\n'
 
954
                             % (entry1[0], entry1[1],
 
955
                                entry4[0], entry4[1]),
 
956
                            header)
 
957
        z_block = rest[z_header_len:]
 
958
        self.assertEqual(block_bytes, z_block)
 
959
 
 
960
    def test_from_bytes(self):
 
961
        locations, block = self.make_block(self._texts)
 
962
        manager = groupcompress._LazyGroupContentManager(block)
 
963
        self.add_key_to_manager(('key1',), locations, block, manager)
 
964
        self.add_key_to_manager(('key4',), locations, block, manager)
 
965
        wire_bytes = manager._wire_bytes()
 
966
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
967
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
968
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
969
        self.assertEqual(2, len(manager._factories))
 
970
        self.assertEqual(block._z_content, manager._block._z_content)
 
971
        result_order = []
 
972
        for record in manager.get_record_stream():
 
973
            result_order.append(record.key)
 
974
            text = self._texts[record.key]
 
975
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
976
        self.assertEqual([('key1',), ('key4',)], result_order)
 
977
 
 
978
    def test__check_rebuild_no_changes(self):
 
979
        block, manager = self.make_block_and_full_manager(self._texts)
 
980
        manager._check_rebuild_block()
 
981
        self.assertIs(block, manager._block)
 
982
 
 
983
    def test__check_rebuild_only_one(self):
 
984
        locations, block = self.make_block(self._texts)
 
985
        manager = groupcompress._LazyGroupContentManager(block)
 
986
        # Request just the first key, which should trigger a 'strip' action
 
987
        self.add_key_to_manager(('key1',), locations, block, manager)
 
988
        manager._check_rebuild_block()
 
989
        self.assertIsNot(block, manager._block)
 
990
        self.assertTrue(block._content_length > manager._block._content_length)
 
991
        # We should be able to still get the content out of this block, though
 
992
        # it should only have 1 entry
 
993
        for record in manager.get_record_stream():
 
994
            self.assertEqual(('key1',), record.key)
 
995
            self.assertEqual(self._texts[record.key],
 
996
                             record.get_bytes_as('fulltext'))
 
997
 
 
998
    def test__check_rebuild_middle(self):
 
999
        locations, block = self.make_block(self._texts)
 
1000
        manager = groupcompress._LazyGroupContentManager(block)
 
1001
        # Request a small key in the middle should trigger a 'rebuild'
 
1002
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1003
        manager._check_rebuild_block()
 
1004
        self.assertIsNot(block, manager._block)
 
1005
        self.assertTrue(block._content_length > manager._block._content_length)
 
1006
        for record in manager.get_record_stream():
 
1007
            self.assertEqual(('key4',), record.key)
 
1008
            self.assertEqual(self._texts[record.key],
 
1009
                             record.get_bytes_as('fulltext'))
 
1010
 
 
1011
    def test_check_is_well_utilized_all_keys(self):
 
1012
        block, manager = self.make_block_and_full_manager(self._texts)
 
1013
        self.assertFalse(manager.check_is_well_utilized())
 
1014
        # Though we can fake it by changing the recommended minimum size
 
1015
        manager._full_enough_block_size = block._content_length
 
1016
        self.assertTrue(manager.check_is_well_utilized())
 
1017
        # Setting it just above causes it to fail
 
1018
        manager._full_enough_block_size = block._content_length + 1
 
1019
        self.assertFalse(manager.check_is_well_utilized())
 
1020
        # Setting the mixed-block size doesn't do anything, because the content
 
1021
        # is considered to not be 'mixed'
 
1022
        manager._full_enough_mixed_block_size = block._content_length
 
1023
        self.assertFalse(manager.check_is_well_utilized())
 
1024
 
 
1025
    def test_check_is_well_utilized_mixed_keys(self):
 
1026
        texts = {}
 
1027
        f1k1 = ('f1', 'k1')
 
1028
        f1k2 = ('f1', 'k2')
 
1029
        f2k1 = ('f2', 'k1')
 
1030
        f2k2 = ('f2', 'k2')
 
1031
        texts[f1k1] = self._texts[('key1',)]
 
1032
        texts[f1k2] = self._texts[('key2',)]
 
1033
        texts[f2k1] = self._texts[('key3',)]
 
1034
        texts[f2k2] = self._texts[('key4',)]
 
1035
        block, manager = self.make_block_and_full_manager(texts)
 
1036
        self.assertFalse(manager.check_is_well_utilized())
 
1037
        manager._full_enough_block_size = block._content_length
 
1038
        self.assertTrue(manager.check_is_well_utilized())
 
1039
        manager._full_enough_block_size = block._content_length + 1
 
1040
        self.assertFalse(manager.check_is_well_utilized())
 
1041
        manager._full_enough_mixed_block_size = block._content_length
 
1042
        self.assertTrue(manager.check_is_well_utilized())
 
1043
 
 
1044
    def test_check_is_well_utilized_partial_use(self):
 
1045
        locations, block = self.make_block(self._texts)
 
1046
        manager = groupcompress._LazyGroupContentManager(block)
 
1047
        manager._full_enough_block_size = block._content_length
 
1048
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1049
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1050
        # Just using the content from key1 and 2 is not enough to be considered
 
1051
        # 'complete'
 
1052
        self.assertFalse(manager.check_is_well_utilized())
 
1053
        # However if we add key3, then we have enough, as we only require 75%
 
1054
        # consumption
 
1055
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1056
        self.assertTrue(manager.check_is_well_utilized())