/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Robert Collins
  • Date: 2010-05-06 23:41:35 UTC
  • mto: This revision was merged to the branch mainline in revision 5223.
  • Revision ID: robertc@robertcollins.net-20100506234135-yivbzczw1sejxnxc
Lock methods on ``Tree``, ``Branch`` and ``Repository`` are now
expected to return an object which can be used to unlock them. This reduces
duplicate code when using cleanups. The previous 'tokens's returned by
``Branch.lock_write`` and ``Repository.lock_write`` are now attributes
on the result of the lock_write. ``repository.RepositoryWriteLockResult``
and ``branch.BranchWriteLockResult`` document this. (Robert Collins)

``log._get_info_for_log_files`` now takes an add_cleanup callable.
(Robert Collins)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008-2011 Canonical Ltd
 
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
18
18
 
19
19
import zlib
20
20
 
21
 
from .. import (
22
 
    config,
 
21
from bzrlib import (
 
22
    btree_index,
 
23
    groupcompress,
23
24
    errors,
 
25
    index as _mod_index,
24
26
    osutils,
25
27
    tests,
26
28
    trace,
27
 
    )
28
 
from ..bzr import (
29
 
    btree_index,
30
 
    groupcompress,
31
 
    knit,
32
 
    index as _mod_index,
33
29
    versionedfile,
34
30
    )
35
 
from ..osutils import sha_string
36
 
from .test__groupcompress import compiled_groupcompress_feature
37
 
from .scenarios import load_tests_apply_scenarios
38
 
 
39
 
 
40
 
def group_compress_implementation_scenarios():
 
31
from bzrlib.osutils import sha_string
 
32
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
 
33
 
 
34
 
 
35
def load_tests(standard_tests, module, loader):
 
36
    """Parameterize tests for all versions of groupcompress."""
 
37
    to_adapt, result = tests.split_suite_by_condition(
 
38
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
41
39
    scenarios = [
42
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
43
41
        ]
44
42
    if compiled_groupcompress_feature.available():
45
43
        scenarios.append(('C',
46
 
                          {'compressor': groupcompress.PyrexGroupCompressor}))
47
 
    return scenarios
48
 
 
49
 
 
50
 
load_tests = load_tests_apply_scenarios
 
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
45
    return tests.multiply_tests(to_adapt, scenarios, result)
51
46
 
52
47
 
53
48
class TestGroupCompressor(tests.TestCase):
54
49
 
55
50
    def _chunks_to_repr_lines(self, chunks):
56
 
        return '\n'.join(map(repr, b''.join(chunks).split(b'\n')))
 
51
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
57
52
 
58
53
    def assertEqualDiffEncoded(self, expected, actual):
59
54
        """Compare the actual content to the expected content.
71
66
class TestAllGroupCompressors(TestGroupCompressor):
72
67
    """Tests for GroupCompressor"""
73
68
 
74
 
    scenarios = group_compress_implementation_scenarios()
75
 
    compressor = None  # Set by scenario
 
69
    compressor = None # Set by multiply_tests
76
70
 
77
71
    def test_empty_delta(self):
78
72
        compressor = self.compressor()
81
75
    def test_one_nosha_delta(self):
82
76
        # diff against NUKK
83
77
        compressor = self.compressor()
84
 
        text = b'strange\ncommon\n'
85
 
        sha1, start_point, end_point, _ = compressor.compress(
86
 
            ('label',), [text], len(text), None)
87
 
        self.assertEqual(sha_string(b'strange\ncommon\n'), sha1)
88
 
        expected_lines = b'f\x0fstrange\ncommon\n'
89
 
        self.assertEqual(expected_lines, b''.join(compressor.chunks))
 
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
79
            'strange\ncommon\n', None)
 
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
82
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
90
83
        self.assertEqual(0, start_point)
91
 
        self.assertEqual(len(expected_lines), end_point)
 
84
        self.assertEqual(sum(map(len, expected_lines)), end_point)
92
85
 
93
86
    def test_empty_content(self):
94
87
        compressor = self.compressor()
95
88
        # Adding empty bytes should return the 'null' record
96
 
        sha1, start_point, end_point, kind = compressor.compress(
97
 
            ('empty',), [], 0, None)
 
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
90
                                                                 '', None)
98
91
        self.assertEqual(0, start_point)
99
92
        self.assertEqual(0, end_point)
100
93
        self.assertEqual('fulltext', kind)
102
95
        self.assertEqual(0, compressor.endpoint)
103
96
        self.assertEqual([], compressor.chunks)
104
97
        # Even after adding some content
105
 
        text = b'some\nbytes\n'
106
 
        compressor.compress(('content',), [text], len(text), None)
 
98
        compressor.compress(('content',), 'some\nbytes\n', None)
107
99
        self.assertTrue(compressor.endpoint > 0)
108
 
        sha1, start_point, end_point, kind = compressor.compress(
109
 
            ('empty2',), [], 0, None)
 
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
101
                                                                 '', None)
110
102
        self.assertEqual(0, start_point)
111
103
        self.assertEqual(0, end_point)
112
104
        self.assertEqual('fulltext', kind)
116
108
        # Knit fetching will try to reconstruct texts locally which results in
117
109
        # reading something that is in the compressor stream already.
118
110
        compressor = self.compressor()
119
 
        text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
120
 
        sha1_1, _, _, _ = compressor.compress(
121
 
            ('label',), [text], len(text), None)
 
111
        sha1_1, _, _, _ = compressor.compress(('label',),
 
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
122
113
        expected_lines = list(compressor.chunks)
123
 
        text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
124
 
        sha1_2, _, end_point, _ = compressor.compress(
125
 
            ('newlabel',), [text], len(text), None)
 
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
126
116
        # get the first out
127
 
        self.assertEqual(([b'strange\ncommon long line\n'
128
 
                           b'that needs a 16 byte match\n'], sha1_1),
 
117
        self.assertEqual(('strange\ncommon long line\n'
 
118
                          'that needs a 16 byte match\n', sha1_1),
129
119
                         compressor.extract(('label',)))
130
120
        # and the second
131
 
        self.assertEqual(([b'common long line\nthat needs a 16 byte match\n'
132
 
                           b'different\n'], sha1_2),
 
121
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
122
                          'different\n', sha1_2),
133
123
                         compressor.extract(('newlabel',)))
134
124
 
135
125
    def test_pop_last(self):
136
126
        compressor = self.compressor()
137
 
        text = b'some text\nfor the first entry\n'
138
 
        _, _, _, _ = compressor.compress(
139
 
            ('key1',), [text], len(text), None)
 
127
        _, _, _, _ = compressor.compress(('key1',),
 
128
            'some text\nfor the first entry\n', None)
140
129
        expected_lines = list(compressor.chunks)
141
 
        text = b'some text\nfor the second entry\n'
142
 
        _, _, _, _ = compressor.compress(
143
 
            ('key2',), [text], len(text), None)
 
130
        _, _, _, _ = compressor.compress(('key2',),
 
131
            'some text\nfor the second entry\n', None)
144
132
        compressor.pop_last()
145
133
        self.assertEqual(expected_lines, compressor.chunks)
146
134
 
152
140
 
153
141
    def test_stats(self):
154
142
        compressor = self.compressor()
155
 
        chunks = [b'strange\n',
156
 
                  b'common very very long line\n',
157
 
                  b'plus more text\n']
158
 
        compressor.compress(
159
 
            ('label',), chunks, sum(map(len, chunks)), None)
160
 
        chunks = [
161
 
            b'common very very long line\n',
162
 
            b'plus more text\n',
163
 
            b'different\n',
164
 
            b'moredifferent\n']
165
 
        compressor.compress(
166
 
            ('newlabel',),
167
 
            chunks, sum(map(len, chunks)), None)
168
 
        chunks = [
169
 
            b'new\n',
170
 
            b'common very very long line\n',
171
 
            b'plus more text\n',
172
 
            b'different\n',
173
 
            b'moredifferent\n']
174
 
        compressor.compress(
175
 
            ('label3',), chunks, sum(map(len, chunks)), None)
 
143
        compressor.compress(('label',),
 
144
                            'strange\n'
 
145
                            'common very very long line\n'
 
146
                            'plus more text\n', None)
 
147
        compressor.compress(('newlabel',),
 
148
                            'common very very long line\n'
 
149
                            'plus more text\n'
 
150
                            'different\n'
 
151
                            'moredifferent\n', None)
 
152
        compressor.compress(('label3',),
 
153
                            'new\n'
 
154
                            'common very very long line\n'
 
155
                            'plus more text\n'
 
156
                            'different\n'
 
157
                            'moredifferent\n', None)
176
158
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
177
159
 
178
160
    def test_two_nosha_delta(self):
179
161
        compressor = self.compressor()
180
 
        text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
181
 
        sha1_1, _, _, _ = compressor.compress(('label',), [text], len(text), None)
 
162
        sha1_1, _, _, _ = compressor.compress(('label',),
 
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
182
164
        expected_lines = list(compressor.chunks)
183
 
        text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
184
 
        sha1_2, start_point, end_point, _ = compressor.compress(
185
 
            ('newlabel',), [text], len(text), None)
186
 
        self.assertEqual(sha_string(text), sha1_2)
 
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
167
        self.assertEqual(sha_string('common long line\n'
 
168
                                    'that needs a 16 byte match\n'
 
169
                                    'different\n'), sha1_2)
187
170
        expected_lines.extend([
188
171
            # 'delta', delta length
189
 
            b'd\x0f',
 
172
            'd\x0f',
190
173
            # source and target length
191
 
            b'\x36',
 
174
            '\x36',
192
175
            # copy the line common
193
 
            b'\x91\x0a\x2c',  # copy, offset 0x0a, len 0x2c
 
176
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
194
177
            # add the line different, and the trailing newline
195
 
            b'\x0adifferent\n',  # insert 10 bytes
 
178
            '\x0adifferent\n', # insert 10 bytes
196
179
            ])
197
180
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
198
181
        self.assertEqual(sum(map(len, expected_lines)), end_point)
201
184
        # The first interesting test: make a change that should use lines from
202
185
        # both parents.
203
186
        compressor = self.compressor()
204
 
        text = b'strange\ncommon very very long line\nwith some extra text\n'
205
 
        sha1_1, _, _, _ = compressor.compress(
206
 
            ('label',), [text], len(text), None)
207
 
        text = b'different\nmoredifferent\nand then some more\n'
208
 
        sha1_2, _, _, _ = compressor.compress(
209
 
            ('newlabel',), [text], len(text), None)
 
187
        sha1_1, _, _, _ = compressor.compress(('label',),
 
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
190
            'different\nmoredifferent\nand then some more\n', None)
210
191
        expected_lines = list(compressor.chunks)
211
 
        text = (b'new\ncommon very very long line\nwith some extra text\n'
212
 
                b'different\nmoredifferent\nand then some more\n')
213
 
        sha1_3, start_point, end_point, _ = compressor.compress(
214
 
            ('label3',), [text], len(text), None)
215
 
        self.assertEqual(sha_string(text), sha1_3)
 
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
193
            'new\ncommon very very long line\nwith some extra text\n'
 
194
            'different\nmoredifferent\nand then some more\n',
 
195
            None)
 
196
        self.assertEqual(
 
197
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
198
                       'different\nmoredifferent\nand then some more\n'),
 
199
            sha1_3)
216
200
        expected_lines.extend([
217
201
            # 'delta', delta length
218
 
            b'd\x0b',
 
202
            'd\x0b',
219
203
            # source and target length
220
 
            b'\x5f'
 
204
            '\x5f'
221
205
            # insert new
222
 
            b'\x03new',
 
206
            '\x03new',
223
207
            # Copy of first parent 'common' range
224
 
            b'\x91\x09\x31'  # copy, offset 0x09, 0x31 bytes
 
208
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
225
209
            # Copy of second parent 'different' range
226
 
            b'\x91\x3c\x2b'  # copy, offset 0x3c, 0x2b bytes
 
210
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
227
211
            ])
228
212
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
229
213
        self.assertEqual(sum(map(len, expected_lines)), end_point)
235
219
 
236
220
    def test_stats(self):
237
221
        compressor = self.compressor()
238
 
        chunks = [b'strange\n',
239
 
                  b'common very very long line\n',
240
 
                  b'plus more text\n']
241
 
        compressor.compress(
242
 
            ('label',), chunks, sum(map(len, chunks)), None)
243
 
        chunks = [
244
 
            b'common very very long line\n',
245
 
            b'plus more text\n',
246
 
            b'different\n',
247
 
            b'moredifferent\n']
248
 
        compressor.compress(
249
 
            ('newlabel',), chunks, sum(map(len, chunks)), None)
250
 
        chunks = [
251
 
            b'new\n',
252
 
            b'common very very long line\n',
253
 
            b'plus more text\n',
254
 
            b'different\n',
255
 
            b'moredifferent\n']
256
 
        compressor.compress(
257
 
            ('label3',),
258
 
            chunks, sum(map(len, chunks)), None)
 
222
        compressor.compress(('label',),
 
223
                            'strange\n'
 
224
                            'common very very long line\n'
 
225
                            'plus more text\n', None)
 
226
        compressor.compress(('newlabel',),
 
227
                            'common very very long line\n'
 
228
                            'plus more text\n'
 
229
                            'different\n'
 
230
                            'moredifferent\n', None)
 
231
        compressor.compress(('label3',),
 
232
                            'new\n'
 
233
                            'common very very long line\n'
 
234
                            'plus more text\n'
 
235
                            'different\n'
 
236
                            'moredifferent\n', None)
259
237
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
260
238
 
261
239
    def test_two_nosha_delta(self):
262
240
        compressor = self.compressor()
263
 
        text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
264
 
        sha1_1, _, _, _ = compressor.compress(
265
 
            ('label',), [text], len(text), None)
 
241
        sha1_1, _, _, _ = compressor.compress(('label',),
 
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
266
243
        expected_lines = list(compressor.chunks)
267
 
        text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
268
 
        sha1_2, start_point, end_point, _ = compressor.compress(
269
 
            ('newlabel',), [text], len(text), None)
270
 
        self.assertEqual(sha_string(text), sha1_2)
 
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
246
        self.assertEqual(sha_string('common long line\n'
 
247
                                    'that needs a 16 byte match\n'
 
248
                                    'different\n'), sha1_2)
271
249
        expected_lines.extend([
272
250
            # 'delta', delta length
273
 
            b'd\x0f',
 
251
            'd\x0f',
274
252
            # target length
275
 
            b'\x36',
 
253
            '\x36',
276
254
            # copy the line common
277
 
            b'\x91\x0a\x2c',  # copy, offset 0x0a, len 0x2c
 
255
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
278
256
            # add the line different, and the trailing newline
279
 
            b'\x0adifferent\n',  # insert 10 bytes
 
257
            '\x0adifferent\n', # insert 10 bytes
280
258
            ])
281
259
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
282
260
        self.assertEqual(sum(map(len, expected_lines)), end_point)
285
263
        # The first interesting test: make a change that should use lines from
286
264
        # both parents.
287
265
        compressor = self.compressor()
288
 
        text = b'strange\ncommon very very long line\nwith some extra text\n'
289
 
        sha1_1, _, _, _ = compressor.compress(
290
 
            ('label',), [text], len(text), None)
291
 
        text = b'different\nmoredifferent\nand then some more\n'
292
 
        sha1_2, _, _, _ = compressor.compress(
293
 
            ('newlabel',), [text], len(text), None)
 
266
        sha1_1, _, _, _ = compressor.compress(('label',),
 
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
269
            'different\nmoredifferent\nand then some more\n', None)
294
270
        expected_lines = list(compressor.chunks)
295
 
        text = (b'new\ncommon very very long line\nwith some extra text\n'
296
 
                b'different\nmoredifferent\nand then some more\n')
297
 
        sha1_3, start_point, end_point, _ = compressor.compress(
298
 
            ('label3',), [text], len(text), None)
299
 
        self.assertEqual(sha_string(text), sha1_3)
 
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
272
            'new\ncommon very very long line\nwith some extra text\n'
 
273
            'different\nmoredifferent\nand then some more\n',
 
274
            None)
 
275
        self.assertEqual(
 
276
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
277
                       'different\nmoredifferent\nand then some more\n'),
 
278
            sha1_3)
300
279
        expected_lines.extend([
301
280
            # 'delta', delta length
302
 
            b'd\x0c',
 
281
            'd\x0c',
303
282
            # target length
304
 
            b'\x5f'
 
283
            '\x5f'
305
284
            # insert new
306
 
            b'\x04new\n',
 
285
            '\x04new\n',
307
286
            # Copy of first parent 'common' range
308
 
            b'\x91\x0a\x30'  # copy, offset 0x0a, 0x30 bytes
 
287
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
309
288
            # Copy of second parent 'different' range
310
 
            b'\x91\x3c\x2b'  # copy, offset 0x3c, 0x2b bytes
 
289
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
311
290
            ])
312
291
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
313
292
        self.assertEqual(sum(map(len, expected_lines)), end_point)
320
299
        compressor = groupcompress.GroupCompressor()
321
300
        start = 0
322
301
        for key in sorted(key_to_text):
323
 
            compressor.compress(
324
 
                key, [key_to_text[key]], len(key_to_text[key]), None)
 
302
            compressor.compress(key, key_to_text[key], None)
325
303
        locs = dict((key, (start, end)) for key, (start, _, end, _)
326
 
                    in compressor.labels_deltas.items())
 
304
                    in compressor.labels_deltas.iteritems())
327
305
        block = compressor.flush()
328
306
        raw_bytes = block.to_bytes()
329
307
        # Go through from_bytes(to_bytes()) so that we start with a compressed
332
310
 
333
311
    def test_from_empty_bytes(self):
334
312
        self.assertRaises(ValueError,
335
 
                          groupcompress.GroupCompressBlock.from_bytes, b'')
 
313
                          groupcompress.GroupCompressBlock.from_bytes, '')
336
314
 
337
315
    def test_from_minimal_bytes(self):
338
316
        block = groupcompress.GroupCompressBlock.from_bytes(
339
 
            b'gcb1z\n0\n0\n')
 
317
            'gcb1z\n0\n0\n')
340
318
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
341
319
        self.assertIs(None, block._content)
342
 
        self.assertEqual(b'', block._z_content)
 
320
        self.assertEqual('', block._z_content)
343
321
        block._ensure_content()
344
 
        self.assertEqual(b'', block._content)
345
 
        self.assertEqual(b'', block._z_content)
346
 
        block._ensure_content()  # Ensure content is safe to call 2x
 
322
        self.assertEqual('', block._content)
 
323
        self.assertEqual('', block._z_content)
 
324
        block._ensure_content() # Ensure content is safe to call 2x
347
325
 
348
326
    def test_from_invalid(self):
349
327
        self.assertRaises(ValueError,
350
328
                          groupcompress.GroupCompressBlock.from_bytes,
351
 
                          b'this is not a valid header')
 
329
                          'this is not a valid header')
352
330
 
353
331
    def test_from_bytes(self):
354
 
        content = (b'a tiny bit of content\n')
 
332
        content = ('a tiny bit of content\n')
355
333
        z_content = zlib.compress(content)
356
334
        z_bytes = (
357
 
            b'gcb1z\n'  # group compress block v1 plain
358
 
            b'%d\n'  # Length of compressed content
359
 
            b'%d\n'  # Length of uncompressed content
360
 
            b'%s'   # Compressed content
 
335
            'gcb1z\n' # group compress block v1 plain
 
336
            '%d\n' # Length of compressed content
 
337
            '%d\n' # Length of uncompressed content
 
338
            '%s'   # Compressed content
361
339
            ) % (len(z_content), len(content), z_content)
362
340
        block = groupcompress.GroupCompressBlock.from_bytes(
363
341
            z_bytes)
369
347
        self.assertEqual(z_content, block._z_content)
370
348
        self.assertEqual(content, block._content)
371
349
 
372
 
    def test_to_chunks(self):
373
 
        content_chunks = [b'this is some content\n',
374
 
                          b'this content will be compressed\n']
375
 
        content_len = sum(map(len, content_chunks))
376
 
        content = b''.join(content_chunks)
377
 
        gcb = groupcompress.GroupCompressBlock()
378
 
        gcb.set_chunked_content(content_chunks, content_len)
379
 
        total_len, block_chunks = gcb.to_chunks()
380
 
        block_bytes = b''.join(block_chunks)
381
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
382
 
        self.assertEqual(total_len, len(block_bytes))
383
 
        self.assertEqual(gcb._content_length, content_len)
384
 
        expected_header = (b'gcb1z\n'  # group compress block v1 zlib
385
 
                           b'%d\n'  # Length of compressed content
386
 
                           b'%d\n'  # Length of uncompressed content
387
 
                           ) % (gcb._z_content_length, gcb._content_length)
388
 
        # The first chunk should be the header chunk. It is small, fixed size,
389
 
        # and there is no compelling reason to split it up
390
 
        self.assertEqual(expected_header, block_chunks[0])
391
 
        self.assertStartsWith(block_bytes, expected_header)
392
 
        remaining_bytes = block_bytes[len(expected_header):]
393
 
        raw_bytes = zlib.decompress(remaining_bytes)
394
 
        self.assertEqual(content, raw_bytes)
395
 
 
396
350
    def test_to_bytes(self):
397
 
        content = (b'this is some content\n'
398
 
                   b'this content will be compressed\n')
 
351
        content = ('this is some content\n'
 
352
                   'this content will be compressed\n')
399
353
        gcb = groupcompress.GroupCompressBlock()
400
354
        gcb.set_content(content)
401
 
        data = gcb.to_bytes()
 
355
        bytes = gcb.to_bytes()
402
356
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
403
357
        self.assertEqual(gcb._content_length, len(content))
404
 
        expected_header = (b'gcb1z\n'  # group compress block v1 zlib
405
 
                           b'%d\n'  # Length of compressed content
406
 
                           b'%d\n'  # Length of uncompressed content
407
 
                           ) % (gcb._z_content_length, gcb._content_length)
408
 
        self.assertStartsWith(data, expected_header)
409
 
        remaining_bytes = data[len(expected_header):]
 
358
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
359
                          '%d\n' # Length of compressed content
 
360
                          '%d\n' # Length of uncompressed content
 
361
                         ) % (gcb._z_content_length, gcb._content_length)
 
362
        self.assertStartsWith(bytes, expected_header)
 
363
        remaining_bytes = bytes[len(expected_header):]
410
364
        raw_bytes = zlib.decompress(remaining_bytes)
411
365
        self.assertEqual(content, raw_bytes)
412
366
 
413
367
        # we should get the same results if using the chunked version
414
368
        gcb = groupcompress.GroupCompressBlock()
415
 
        gcb.set_chunked_content([b'this is some content\n'
416
 
                                 b'this content will be compressed\n'],
417
 
                                len(content))
418
 
        old_data = data
419
 
        data = gcb.to_bytes()
420
 
        self.assertEqual(old_data, data)
 
369
        gcb.set_chunked_content(['this is some content\n'
 
370
                                 'this content will be compressed\n'],
 
371
                                 len(content))
 
372
        old_bytes = bytes
 
373
        bytes = gcb.to_bytes()
 
374
        self.assertEqual(old_bytes, bytes)
421
375
 
422
376
    def test_partial_decomp(self):
423
377
        content_chunks = []
425
379
        # partial decompression to work with. Most auto-generated data
426
380
        # compresses a bit too well, we want a combination, so we combine a sha
427
381
        # hash with compressible data.
428
 
        for i in range(2048):
429
 
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
 
382
        for i in xrange(2048):
 
383
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
430
384
            content_chunks.append(next_content)
431
385
            next_sha1 = osutils.sha_string(next_content)
432
 
            content_chunks.append(next_sha1 + b'\n')
433
 
        content = b''.join(content_chunks)
 
386
            content_chunks.append(next_sha1 + '\n')
 
387
        content = ''.join(content_chunks)
434
388
        self.assertEqual(158634, len(content))
435
389
        z_content = zlib.compress(content)
436
390
        self.assertEqual(57182, len(z_content))
437
391
        block = groupcompress.GroupCompressBlock()
438
 
        block._z_content_chunks = (z_content,)
 
392
        block._z_content = z_content
439
393
        block._z_content_length = len(z_content)
440
394
        block._compressor_name = 'zlib'
441
395
        block._content_length = 158634
470
424
        # partial decompression to work with. Most auto-generated data
471
425
        # compresses a bit too well, we want a combination, so we combine a sha
472
426
        # hash with compressible data.
473
 
        for i in range(2048):
474
 
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
 
427
        for i in xrange(2048):
 
428
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
475
429
            content_chunks.append(next_content)
476
430
            next_sha1 = osutils.sha_string(next_content)
477
 
            content_chunks.append(next_sha1 + b'\n')
478
 
        content = b''.join(content_chunks)
 
431
            content_chunks.append(next_sha1 + '\n')
 
432
        content = ''.join(content_chunks)
479
433
        self.assertEqual(158634, len(content))
480
434
        z_content = zlib.compress(content)
481
435
        self.assertEqual(57182, len(z_content))
482
436
        block = groupcompress.GroupCompressBlock()
483
 
        block._z_content_chunks = (z_content,)
 
437
        block._z_content = z_content
484
438
        block._z_content_length = len(z_content)
485
439
        block._compressor_name = 'zlib'
486
440
        block._content_length = 158634
493
447
        self.assertIs(None, block._z_content_decompressor)
494
448
 
495
449
    def test__dump(self):
496
 
        dup_content = b'some duplicate content\nwhich is sufficiently long\n'
497
 
        key_to_text = {(b'1',): dup_content + b'1 unique\n',
498
 
                       (b'2',): dup_content + b'2 extra special\n'}
 
450
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
451
        key_to_text = {('1',): dup_content + '1 unique\n',
 
452
                       ('2',): dup_content + '2 extra special\n'}
499
453
        locs, block = self.make_block(key_to_text)
500
 
        self.assertEqual([(b'f', len(key_to_text[(b'1',)])),
501
 
                          (b'd', 21, len(key_to_text[(b'2',)]),
502
 
                           [(b'c', 2, len(dup_content)),
503
 
                            (b'i', len(b'2 extra special\n'), b'')
504
 
                            ]),
505
 
                          ], block._dump())
 
454
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
455
                          ('d', 21, len(key_to_text[('2',)]),
 
456
                           [('c', 2, len(dup_content)),
 
457
                            ('i', len('2 extra special\n'), '')
 
458
                           ]),
 
459
                         ], block._dump())
506
460
 
507
461
 
508
462
class TestCaseWithGroupCompressVersionedFiles(
513
467
        t = self.get_transport(dir)
514
468
        t.ensure_base()
515
469
        vf = groupcompress.make_pack_factory(graph=create_graph,
516
 
                                             delta=False, keylength=keylength,
517
 
                                             inconsistency_fatal=inconsistency_fatal)(t)
 
470
            delta=False, keylength=keylength,
 
471
            inconsistency_fatal=inconsistency_fatal)(t)
518
472
        if do_cleanup:
519
473
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
520
474
        return vf
533
487
 
534
488
    def make_g_index_missing_parent(self):
535
489
        graph_index = self.make_g_index('missing_parent', 1,
536
 
                                        [((b'parent', ), b'2 78 2 10', ([],)),
537
 
                                         ((b'tip', ), b'2 78 2 10',
538
 
                                            ([(b'parent', ), (b'missing-parent', )],)),
539
 
                                         ])
 
490
            [(('parent', ), '2 78 2 10', ([],)),
 
491
             (('tip', ), '2 78 2 10',
 
492
              ([('parent', ), ('missing-parent', )],)),
 
493
              ])
540
494
        return graph_index
541
495
 
542
496
    def test_get_record_stream_as_requested(self):
543
497
        # Consider promoting 'as-requested' to general availability, and
544
498
        # make this a VF interface test
545
499
        vf = self.make_test_vf(False, dir='source')
546
 
        vf.add_lines((b'a',), (), [b'lines\n'])
547
 
        vf.add_lines((b'b',), (), [b'lines\n'])
548
 
        vf.add_lines((b'c',), (), [b'lines\n'])
549
 
        vf.add_lines((b'd',), (), [b'lines\n'])
 
500
        vf.add_lines(('a',), (), ['lines\n'])
 
501
        vf.add_lines(('b',), (), ['lines\n'])
 
502
        vf.add_lines(('c',), (), ['lines\n'])
 
503
        vf.add_lines(('d',), (), ['lines\n'])
550
504
        vf.writer.end()
551
505
        keys = [record.key for record in vf.get_record_stream(
552
 
            [(b'a',), (b'b',), (b'c',), (b'd',)],
553
 
            'as-requested', False)]
554
 
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
 
506
                    [('a',), ('b',), ('c',), ('d',)],
 
507
                    'as-requested', False)]
 
508
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
555
509
        keys = [record.key for record in vf.get_record_stream(
556
 
            [(b'b',), (b'a',), (b'd',), (b'c',)],
557
 
            'as-requested', False)]
558
 
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
 
510
                    [('b',), ('a',), ('d',), ('c',)],
 
511
                    'as-requested', False)]
 
512
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
559
513
 
560
514
        # It should work even after being repacked into another VF
561
515
        vf2 = self.make_test_vf(False, dir='target')
562
516
        vf2.insert_record_stream(vf.get_record_stream(
563
 
            [(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
 
517
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
564
518
        vf2.writer.end()
565
519
 
566
520
        keys = [record.key for record in vf2.get_record_stream(
567
 
            [(b'a',), (b'b',), (b'c',), (b'd',)],
568
 
            'as-requested', False)]
569
 
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
 
521
                    [('a',), ('b',), ('c',), ('d',)],
 
522
                    'as-requested', False)]
 
523
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
570
524
        keys = [record.key for record in vf2.get_record_stream(
571
 
            [(b'b',), (b'a',), (b'd',), (b'c',)],
572
 
            'as-requested', False)]
573
 
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
574
 
 
575
 
    def test_get_record_stream_max_bytes_to_index_default(self):
576
 
        vf = self.make_test_vf(True, dir='source')
577
 
        vf.add_lines((b'a',), (), [b'lines\n'])
578
 
        vf.writer.end()
579
 
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
580
 
        self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
581
 
                         record._manager._get_compressor_settings())
582
 
 
583
 
    def test_get_record_stream_accesses_compressor_settings(self):
584
 
        vf = self.make_test_vf(True, dir='source')
585
 
        vf.add_lines((b'a',), (), [b'lines\n'])
586
 
        vf.writer.end()
587
 
        vf._max_bytes_to_index = 1234
588
 
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
589
 
        self.assertEqual(dict(max_bytes_to_index=1234),
590
 
                         record._manager._get_compressor_settings())
591
 
 
592
 
    @staticmethod
593
 
    def grouped_stream(revision_ids, first_parents=()):
594
 
        parents = first_parents
595
 
        for revision_id in revision_ids:
596
 
            key = (revision_id,)
597
 
            record = versionedfile.FulltextContentFactory(
598
 
                key, parents, None,
599
 
                b'some content that is\n'
600
 
                b'identical except for\n'
601
 
                b'revision_id:%s\n' % (revision_id,))
602
 
            yield record
603
 
            parents = (key,)
 
525
                    [('b',), ('a',), ('d',), ('c',)],
 
526
                    'as-requested', False)]
 
527
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
604
528
 
605
529
    def test_insert_record_stream_reuses_blocks(self):
606
530
        vf = self.make_test_vf(True, dir='source')
 
531
        def grouped_stream(revision_ids, first_parents=()):
 
532
            parents = first_parents
 
533
            for revision_id in revision_ids:
 
534
                key = (revision_id,)
 
535
                record = versionedfile.FulltextContentFactory(
 
536
                    key, parents, None,
 
537
                    'some content that is\n'
 
538
                    'identical except for\n'
 
539
                    'revision_id:%s\n' % (revision_id,))
 
540
                yield record
 
541
                parents = (key,)
607
542
        # One group, a-d
608
 
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
 
543
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
609
544
        # Second group, e-h
610
 
        vf.insert_record_stream(self.grouped_stream(
611
 
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
 
545
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
546
                                               first_parents=(('d',),)))
612
547
        block_bytes = {}
613
 
        stream = vf.get_record_stream(
614
 
            [(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
 
548
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
549
                                      'unordered', False)
615
550
        num_records = 0
616
551
        for record in stream:
617
 
            if record.key in [(b'a',), (b'e',)]:
 
552
            if record.key in [('a',), ('e',)]:
618
553
                self.assertEqual('groupcompress-block', record.storage_kind)
619
554
            else:
620
555
                self.assertEqual('groupcompress-block-ref',
623
558
            num_records += 1
624
559
        self.assertEqual(8, num_records)
625
560
        for r in 'abcd':
626
 
            key = (r.encode(),)
627
 
            self.assertIs(block_bytes[key], block_bytes[(b'a',)])
628
 
            self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
 
561
            key = (r,)
 
562
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
563
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
629
564
        for r in 'efgh':
630
 
            key = (r.encode(),)
631
 
            self.assertIs(block_bytes[key], block_bytes[(b'e',)])
632
 
            self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
 
565
            key = (r,)
 
566
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
567
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
633
568
        # Now copy the blocks into another vf, and ensure that the blocks are
634
569
        # preserved without creating new entries
635
570
        vf2 = self.make_test_vf(True, dir='target')
636
 
        keys = [(r.encode(),) for r in 'abcdefgh']
637
571
        # ordering in 'groupcompress' order, should actually swap the groups in
638
572
        # the target vf, but the groups themselves should not be disturbed.
639
 
 
640
573
        def small_size_stream():
641
 
            for record in vf.get_record_stream(keys, 'groupcompress', False):
 
574
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
575
                                               'groupcompress', False):
642
576
                record._manager._full_enough_block_size = \
643
577
                    record._manager._block._content_length
644
578
                yield record
645
 
 
 
579
                        
646
580
        vf2.insert_record_stream(small_size_stream())
647
 
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
 
581
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
582
                                       'groupcompress', False)
648
583
        vf2.writer.end()
649
584
        num_records = 0
650
585
        for record in stream:
655
590
 
656
591
    def test_insert_record_stream_packs_on_the_fly(self):
657
592
        vf = self.make_test_vf(True, dir='source')
 
593
        def grouped_stream(revision_ids, first_parents=()):
 
594
            parents = first_parents
 
595
            for revision_id in revision_ids:
 
596
                key = (revision_id,)
 
597
                record = versionedfile.FulltextContentFactory(
 
598
                    key, parents, None,
 
599
                    'some content that is\n'
 
600
                    'identical except for\n'
 
601
                    'revision_id:%s\n' % (revision_id,))
 
602
                yield record
 
603
                parents = (key,)
658
604
        # One group, a-d
659
 
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
 
605
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
660
606
        # Second group, e-h
661
 
        vf.insert_record_stream(self.grouped_stream(
662
 
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
 
607
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
608
                                               first_parents=(('d',),)))
663
609
        # Now copy the blocks into another vf, and see that the
664
610
        # insert_record_stream rebuilt a new block on-the-fly because of
665
611
        # under-utilization
666
612
        vf2 = self.make_test_vf(True, dir='target')
667
 
        keys = [(r.encode(),) for r in 'abcdefgh']
668
613
        vf2.insert_record_stream(vf.get_record_stream(
669
 
            keys, 'groupcompress', False))
670
 
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
 
614
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
615
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
616
                                       'groupcompress', False)
671
617
        vf2.writer.end()
672
618
        num_records = 0
673
619
        # All of the records should be recombined into a single block
682
628
 
683
629
    def test__insert_record_stream_no_reuse_block(self):
684
630
        vf = self.make_test_vf(True, dir='source')
 
631
        def grouped_stream(revision_ids, first_parents=()):
 
632
            parents = first_parents
 
633
            for revision_id in revision_ids:
 
634
                key = (revision_id,)
 
635
                record = versionedfile.FulltextContentFactory(
 
636
                    key, parents, None,
 
637
                    'some content that is\n'
 
638
                    'identical except for\n'
 
639
                    'revision_id:%s\n' % (revision_id,))
 
640
                yield record
 
641
                parents = (key,)
685
642
        # One group, a-d
686
 
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
 
643
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
687
644
        # Second group, e-h
688
 
        vf.insert_record_stream(self.grouped_stream(
689
 
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
 
645
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
646
                                               first_parents=(('d',),)))
690
647
        vf.writer.end()
691
 
        keys = [(r.encode(),) for r in 'abcdefgh']
692
 
        self.assertEqual(8, len(list(
693
 
            vf.get_record_stream(keys, 'unordered', False))))
 
648
        self.assertEqual(8, len(list(vf.get_record_stream(
 
649
                                        [(r,) for r in 'abcdefgh'],
 
650
                                        'unordered', False))))
694
651
        # Now copy the blocks into another vf, and ensure that the blocks are
695
652
        # preserved without creating new entries
696
653
        vf2 = self.make_test_vf(True, dir='target')
697
654
        # ordering in 'groupcompress' order, should actually swap the groups in
698
655
        # the target vf, but the groups themselves should not be disturbed.
699
656
        list(vf2._insert_record_stream(vf.get_record_stream(
700
 
            keys, 'groupcompress', False),
 
657
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
701
658
            reuse_blocks=False))
702
659
        vf2.writer.end()
703
660
        # After inserting with reuse_blocks=False, we should have everything in
704
661
        # a single new block.
705
 
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
 
662
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
663
                                       'groupcompress', False)
706
664
        block = None
707
665
        for record in stream:
708
666
            if block is None:
714
672
        unvalidated = self.make_g_index_missing_parent()
715
673
        combined = _mod_index.CombinedGraphIndex([unvalidated])
716
674
        index = groupcompress._GCGraphIndex(combined,
717
 
                                            is_locked=lambda: True, parents=True,
718
 
                                            track_external_parent_refs=True)
 
675
            is_locked=lambda: True, parents=True,
 
676
            track_external_parent_refs=True)
719
677
        index.scan_unvalidated_index(unvalidated)
720
678
        self.assertEqual(
721
 
            frozenset([(b'missing-parent',)]), index.get_missing_parents())
 
679
            frozenset([('missing-parent',)]), index.get_missing_parents())
722
680
 
723
681
    def test_track_external_parent_refs(self):
724
682
        g_index = self.make_g_index('empty', 1, [])
725
683
        mod_index = btree_index.BTreeBuilder(1, 1)
726
684
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
727
685
        index = groupcompress._GCGraphIndex(combined,
728
 
                                            is_locked=lambda: True, parents=True,
729
 
                                            add_callback=mod_index.add_nodes,
730
 
                                            track_external_parent_refs=True)
 
686
            is_locked=lambda: True, parents=True,
 
687
            add_callback=mod_index.add_nodes,
 
688
            track_external_parent_refs=True)
731
689
        index.add_records([
732
 
            ((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
 
690
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
733
691
        self.assertEqual(
734
 
            frozenset([(b'parent-1',), (b'parent-2',)]),
 
692
            frozenset([('parent-1',), ('parent-2',)]),
735
693
            index.get_missing_parents())
736
694
 
737
695
    def make_source_with_b(self, a_parent, path):
738
696
        source = self.make_test_vf(True, dir=path)
739
 
        source.add_lines((b'a',), (), [b'lines\n'])
 
697
        source.add_lines(('a',), (), ['lines\n'])
740
698
        if a_parent:
741
 
            b_parents = ((b'a',),)
 
699
            b_parents = (('a',),)
742
700
        else:
743
701
            b_parents = ()
744
 
        source.add_lines((b'b',), b_parents, [b'lines\n'])
 
702
        source.add_lines(('b',), b_parents, ['lines\n'])
745
703
        return source
746
704
 
747
705
    def do_inconsistent_inserts(self, inconsistency_fatal):
748
706
        target = self.make_test_vf(True, dir='target',
749
707
                                   inconsistency_fatal=inconsistency_fatal)
750
708
        for x in range(2):
751
 
            source = self.make_source_with_b(x == 1, 'source%s' % x)
 
709
            source = self.make_source_with_b(x==1, 'source%s' % x)
752
710
            target.insert_record_stream(source.get_record_stream(
753
 
                [(b'b',)], 'unordered', False))
 
711
                [('b',)], 'unordered', False))
754
712
 
755
713
    def test_inconsistent_redundant_inserts_warn(self):
756
714
        """Should not insert a record that is already present."""
757
715
        warnings = []
758
 
 
759
716
        def warning(template, args):
760
717
            warnings.append(template % args)
761
718
        _trace_warning = trace.warning
764
721
            self.do_inconsistent_inserts(inconsistency_fatal=False)
765
722
        finally:
766
723
            trace.warning = _trace_warning
767
 
        self.assertContainsRe(
768
 
            "\n".join(warnings),
769
 
            r"^inconsistent details in skipped record: \(b?'b',\)"
770
 
            r" \(b?'42 32 0 8', \(\(\),\)\)"
771
 
            r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
 
724
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
725
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
726
                         warnings)
772
727
 
773
728
    def test_inconsistent_redundant_inserts_raises(self):
774
 
        e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
 
729
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
775
730
                              inconsistency_fatal=True)
776
 
        self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
777
 
                              r" in add_records:"
778
 
                              r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
779
 
                              r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
 
731
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
732
                              " in add_records:"
 
733
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
734
                              " 0 8', \(\(\('a',\),\),\)\)")
780
735
 
781
736
    def test_clear_cache(self):
782
737
        vf = self.make_source_with_b(True, 'source')
783
738
        vf.writer.end()
784
 
        for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
 
739
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
785
740
                                           True):
786
741
            pass
787
742
        self.assertTrue(len(vf._group_cache) > 0)
789
744
        self.assertEqual(0, len(vf._group_cache))
790
745
 
791
746
 
792
 
class TestGroupCompressConfig(tests.TestCaseWithTransport):
793
 
 
794
 
    def make_test_vf(self):
795
 
        t = self.get_transport('.')
796
 
        t.ensure_base()
797
 
        factory = groupcompress.make_pack_factory(graph=True,
798
 
                                                  delta=False, keylength=1, inconsistency_fatal=True)
799
 
        vf = factory(t)
800
 
        self.addCleanup(groupcompress.cleanup_pack_group, vf)
801
 
        return vf
802
 
 
803
 
    def test_max_bytes_to_index_default(self):
804
 
        vf = self.make_test_vf()
805
 
        gc = vf._make_group_compressor()
806
 
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
807
 
                         vf._max_bytes_to_index)
808
 
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
809
 
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
810
 
                             gc._delta_index._max_bytes_to_index)
811
 
 
812
 
    def test_max_bytes_to_index_in_config(self):
813
 
        c = config.GlobalConfig()
814
 
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
815
 
        vf = self.make_test_vf()
816
 
        gc = vf._make_group_compressor()
817
 
        self.assertEqual(10000, vf._max_bytes_to_index)
818
 
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
819
 
            self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
820
 
 
821
 
    def test_max_bytes_to_index_bad_config(self):
822
 
        c = config.GlobalConfig()
823
 
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
824
 
        vf = self.make_test_vf()
825
 
        # TODO: This is triggering a warning, we might want to trap and make
826
 
        #       sure it is readable.
827
 
        gc = vf._make_group_compressor()
828
 
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
829
 
                         vf._max_bytes_to_index)
830
 
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
831
 
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
832
 
                             gc._delta_index._max_bytes_to_index)
833
 
 
834
747
 
835
748
class StubGCVF(object):
836
749
    def __init__(self, canned_get_blocks=None):
837
750
        self._group_cache = {}
838
751
        self._canned_get_blocks = canned_get_blocks or []
839
 
 
840
752
    def _get_blocks(self, read_memos):
841
753
        return iter(self._canned_get_blocks)
842
 
 
 
754
    
843
755
 
844
756
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
845
757
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
846
 
 
 
758
    
847
759
    def test_add_key_new_read_memo(self):
848
760
        """Adding a key with an uncached read_memo new to this batch adds that
849
761
        read_memo to the list of memos to fetch.
907
819
                (read_memo1, groupcompress.GroupCompressBlock()),
908
820
                (read_memo2, groupcompress.GroupCompressBlock())])
909
821
        locations = {
910
 
            ('key1',): (read_memo1 + (0, 0), None, None, None),
911
 
            ('key2',): (read_memo2 + (0, 0), None, None, None)}
 
822
            ('key1',): (read_memo1 + (None, None), None, None, None),
 
823
            ('key2',): (read_memo2 + (None, None), None, None, None)}
912
824
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
913
825
        batcher.add_key(('key1',))
914
826
        batcher.add_key(('key2',))
928
840
        gcvf = StubGCVF()
929
841
        gcvf._group_cache[read_memo] = fake_block
930
842
        locations = {
931
 
            ('key',): (read_memo + (0, 0), None, None, None)}
 
843
            ('key',): (read_memo + (None, None), None, None, None)}
932
844
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
933
845
        batcher.add_key(('key',))
934
846
        self.assertEqual([], list(batcher.yield_factories()))
941
853
class TestLazyGroupCompress(tests.TestCaseWithTransport):
942
854
 
943
855
    _texts = {
944
 
        (b'key1',): b"this is a text\n"
945
 
        b"with a reasonable amount of compressible bytes\n"
946
 
        b"which can be shared between various other texts\n",
947
 
        (b'key2',): b"another text\n"
948
 
        b"with a reasonable amount of compressible bytes\n"
949
 
        b"which can be shared between various other texts\n",
950
 
        (b'key3',): b"yet another text which won't be extracted\n"
951
 
        b"with a reasonable amount of compressible bytes\n"
952
 
        b"which can be shared between various other texts\n",
953
 
        (b'key4',): b"this will be extracted\n"
954
 
        b"but references most of its bytes from\n"
955
 
        b"yet another text which won't be extracted\n"
956
 
        b"with a reasonable amount of compressible bytes\n"
957
 
        b"which can be shared between various other texts\n",
 
856
        ('key1',): "this is a text\n"
 
857
                   "with a reasonable amount of compressible bytes\n"
 
858
                   "which can be shared between various other texts\n",
 
859
        ('key2',): "another text\n"
 
860
                   "with a reasonable amount of compressible bytes\n"
 
861
                   "which can be shared between various other texts\n",
 
862
        ('key3',): "yet another text which won't be extracted\n"
 
863
                   "with a reasonable amount of compressible bytes\n"
 
864
                   "which can be shared between various other texts\n",
 
865
        ('key4',): "this will be extracted\n"
 
866
                   "but references most of its bytes from\n"
 
867
                   "yet another text which won't be extracted\n"
 
868
                   "with a reasonable amount of compressible bytes\n"
 
869
                   "which can be shared between various other texts\n",
958
870
    }
959
 
 
960
871
    def make_block(self, key_to_text):
961
872
        """Create a GroupCompressBlock, filling it with the given texts."""
962
873
        compressor = groupcompress.GroupCompressor()
963
874
        start = 0
964
875
        for key in sorted(key_to_text):
965
 
            compressor.compress(
966
 
                key, [key_to_text[key]], len(key_to_text[key]), None)
 
876
            compressor.compress(key, key_to_text[key], None)
967
877
        locs = dict((key, (start, end)) for key, (start, _, end, _)
968
 
                    in compressor.labels_deltas.items())
 
878
                    in compressor.labels_deltas.iteritems())
969
879
        block = compressor.flush()
970
880
        raw_bytes = block.to_bytes()
971
881
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
984
894
    def test_get_fulltexts(self):
985
895
        locations, block = self.make_block(self._texts)
986
896
        manager = groupcompress._LazyGroupContentManager(block)
987
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
988
 
        self.add_key_to_manager((b'key2',), locations, block, manager)
 
897
        self.add_key_to_manager(('key1',), locations, block, manager)
 
898
        self.add_key_to_manager(('key2',), locations, block, manager)
989
899
        result_order = []
990
900
        for record in manager.get_record_stream():
991
901
            result_order.append(record.key)
992
902
            text = self._texts[record.key]
993
903
            self.assertEqual(text, record.get_bytes_as('fulltext'))
994
 
        self.assertEqual([(b'key1',), (b'key2',)], result_order)
 
904
        self.assertEqual([('key1',), ('key2',)], result_order)
995
905
 
996
906
        # If we build the manager in the opposite order, we should get them
997
907
        # back in the opposite order
998
908
        manager = groupcompress._LazyGroupContentManager(block)
999
 
        self.add_key_to_manager((b'key2',), locations, block, manager)
1000
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
909
        self.add_key_to_manager(('key2',), locations, block, manager)
 
910
        self.add_key_to_manager(('key1',), locations, block, manager)
1001
911
        result_order = []
1002
912
        for record in manager.get_record_stream():
1003
913
            result_order.append(record.key)
1004
914
            text = self._texts[record.key]
1005
915
            self.assertEqual(text, record.get_bytes_as('fulltext'))
1006
 
        self.assertEqual([(b'key2',), (b'key1',)], result_order)
 
916
        self.assertEqual([('key2',), ('key1',)], result_order)
1007
917
 
1008
918
    def test__wire_bytes_no_keys(self):
1009
919
        locations, block = self.make_block(self._texts)
1013
923
        # We should have triggered a strip, since we aren't using any content
1014
924
        stripped_block = manager._block.to_bytes()
1015
925
        self.assertTrue(block_length > len(stripped_block))
1016
 
        empty_z_header = zlib.compress(b'')
1017
 
        self.assertEqual(b'groupcompress-block\n'
1018
 
                         b'8\n'  # len(compress(''))
1019
 
                         b'0\n'  # len('')
1020
 
                         b'%d\n'  # compressed block len
1021
 
                         b'%s'  # zheader
1022
 
                         b'%s'  # block
 
926
        empty_z_header = zlib.compress('')
 
927
        self.assertEqual('groupcompress-block\n'
 
928
                         '8\n' # len(compress(''))
 
929
                         '0\n' # len('')
 
930
                         '%d\n'# compressed block len
 
931
                         '%s'  # zheader
 
932
                         '%s'  # block
1023
933
                         % (len(stripped_block), empty_z_header,
1024
934
                            stripped_block),
1025
935
                         wire_bytes)
1027
937
    def test__wire_bytes(self):
1028
938
        locations, block = self.make_block(self._texts)
1029
939
        manager = groupcompress._LazyGroupContentManager(block)
1030
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
1031
 
        self.add_key_to_manager((b'key4',), locations, block, manager)
 
940
        self.add_key_to_manager(('key1',), locations, block, manager)
 
941
        self.add_key_to_manager(('key4',), locations, block, manager)
1032
942
        block_bytes = block.to_bytes()
1033
943
        wire_bytes = manager._wire_bytes()
1034
944
        (storage_kind, z_header_len, header_len,
1035
 
         block_len, rest) = wire_bytes.split(b'\n', 4)
 
945
         block_len, rest) = wire_bytes.split('\n', 4)
1036
946
        z_header_len = int(z_header_len)
1037
947
        header_len = int(header_len)
1038
948
        block_len = int(block_len)
1039
 
        self.assertEqual(b'groupcompress-block', storage_kind)
 
949
        self.assertEqual('groupcompress-block', storage_kind)
1040
950
        self.assertEqual(34, z_header_len)
1041
951
        self.assertEqual(26, header_len)
1042
952
        self.assertEqual(len(block_bytes), block_len)
1043
953
        z_header = rest[:z_header_len]
1044
954
        header = zlib.decompress(z_header)
1045
955
        self.assertEqual(header_len, len(header))
1046
 
        entry1 = locations[(b'key1',)]
1047
 
        entry4 = locations[(b'key4',)]
1048
 
        self.assertEqualDiff(b'key1\n'
1049
 
                             b'\n'  # no parents
1050
 
                             b'%d\n'  # start offset
1051
 
                             b'%d\n'  # end offset
1052
 
                             b'key4\n'
1053
 
                             b'\n'
1054
 
                             b'%d\n'
1055
 
                             b'%d\n'
 
956
        entry1 = locations[('key1',)]
 
957
        entry4 = locations[('key4',)]
 
958
        self.assertEqualDiff('key1\n'
 
959
                             '\n'  # no parents
 
960
                             '%d\n' # start offset
 
961
                             '%d\n' # end offset
 
962
                             'key4\n'
 
963
                             '\n'
 
964
                             '%d\n'
 
965
                             '%d\n'
1056
966
                             % (entry1[0], entry1[1],
1057
967
                                entry4[0], entry4[1]),
1058
 
                             header)
 
968
                            header)
1059
969
        z_block = rest[z_header_len:]
1060
970
        self.assertEqual(block_bytes, z_block)
1061
971
 
1062
972
    def test_from_bytes(self):
1063
973
        locations, block = self.make_block(self._texts)
1064
974
        manager = groupcompress._LazyGroupContentManager(block)
1065
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
1066
 
        self.add_key_to_manager((b'key4',), locations, block, manager)
 
975
        self.add_key_to_manager(('key1',), locations, block, manager)
 
976
        self.add_key_to_manager(('key4',), locations, block, manager)
1067
977
        wire_bytes = manager._wire_bytes()
1068
 
        self.assertStartsWith(wire_bytes, b'groupcompress-block\n')
 
978
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1069
979
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
1070
980
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
1071
981
        self.assertEqual(2, len(manager._factories))
1075
985
            result_order.append(record.key)
1076
986
            text = self._texts[record.key]
1077
987
            self.assertEqual(text, record.get_bytes_as('fulltext'))
1078
 
        self.assertEqual([(b'key1',), (b'key4',)], result_order)
 
988
        self.assertEqual([('key1',), ('key4',)], result_order)
1079
989
 
1080
990
    def test__check_rebuild_no_changes(self):
1081
991
        block, manager = self.make_block_and_full_manager(self._texts)
1086
996
        locations, block = self.make_block(self._texts)
1087
997
        manager = groupcompress._LazyGroupContentManager(block)
1088
998
        # Request just the first key, which should trigger a 'strip' action
1089
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
999
        self.add_key_to_manager(('key1',), locations, block, manager)
1090
1000
        manager._check_rebuild_block()
1091
1001
        self.assertIsNot(block, manager._block)
1092
1002
        self.assertTrue(block._content_length > manager._block._content_length)
1093
1003
        # We should be able to still get the content out of this block, though
1094
1004
        # it should only have 1 entry
1095
1005
        for record in manager.get_record_stream():
1096
 
            self.assertEqual((b'key1',), record.key)
 
1006
            self.assertEqual(('key1',), record.key)
1097
1007
            self.assertEqual(self._texts[record.key],
1098
1008
                             record.get_bytes_as('fulltext'))
1099
1009
 
1101
1011
        locations, block = self.make_block(self._texts)
1102
1012
        manager = groupcompress._LazyGroupContentManager(block)
1103
1013
        # Request a small key in the middle should trigger a 'rebuild'
1104
 
        self.add_key_to_manager((b'key4',), locations, block, manager)
 
1014
        self.add_key_to_manager(('key4',), locations, block, manager)
1105
1015
        manager._check_rebuild_block()
1106
1016
        self.assertIsNot(block, manager._block)
1107
1017
        self.assertTrue(block._content_length > manager._block._content_length)
1108
1018
        for record in manager.get_record_stream():
1109
 
            self.assertEqual((b'key4',), record.key)
 
1019
            self.assertEqual(('key4',), record.key)
1110
1020
            self.assertEqual(self._texts[record.key],
1111
1021
                             record.get_bytes_as('fulltext'))
1112
1022
 
1113
 
    def test_manager_default_compressor_settings(self):
1114
 
        locations, old_block = self.make_block(self._texts)
1115
 
        manager = groupcompress._LazyGroupContentManager(old_block)
1116
 
        gcvf = groupcompress.GroupCompressVersionedFiles
1117
 
        # It doesn't greedily evaluate _max_bytes_to_index
1118
 
        self.assertIs(None, manager._compressor_settings)
1119
 
        self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1120
 
                         manager._get_compressor_settings())
1121
 
 
1122
 
    def test_manager_custom_compressor_settings(self):
1123
 
        locations, old_block = self.make_block(self._texts)
1124
 
        called = []
1125
 
 
1126
 
        def compressor_settings():
1127
 
            called.append('called')
1128
 
            return (10,)
1129
 
        manager = groupcompress._LazyGroupContentManager(old_block,
1130
 
                                                         get_compressor_settings=compressor_settings)
1131
 
        gcvf = groupcompress.GroupCompressVersionedFiles
1132
 
        # It doesn't greedily evaluate compressor_settings
1133
 
        self.assertIs(None, manager._compressor_settings)
1134
 
        self.assertEqual((10,), manager._get_compressor_settings())
1135
 
        self.assertEqual((10,), manager._get_compressor_settings())
1136
 
        self.assertEqual((10,), manager._compressor_settings)
1137
 
        # Only called 1 time
1138
 
        self.assertEqual(['called'], called)
1139
 
 
1140
 
    def test__rebuild_handles_compressor_settings(self):
1141
 
        if not isinstance(groupcompress.GroupCompressor,
1142
 
                          groupcompress.PyrexGroupCompressor):
1143
 
            raise tests.TestNotApplicable('pure-python compressor'
1144
 
                                          ' does not handle compressor_settings')
1145
 
        locations, old_block = self.make_block(self._texts)
1146
 
        manager = groupcompress._LazyGroupContentManager(old_block,
1147
 
                                                         get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1148
 
        gc = manager._make_group_compressor()
1149
 
        self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1150
 
        self.add_key_to_manager((b'key3',), locations, old_block, manager)
1151
 
        self.add_key_to_manager((b'key4',), locations, old_block, manager)
1152
 
        action, last_byte, total_bytes = manager._check_rebuild_action()
1153
 
        self.assertEqual('rebuild', action)
1154
 
        manager._rebuild_block()
1155
 
        new_block = manager._block
1156
 
        self.assertIsNot(old_block, new_block)
1157
 
        # Because of the new max_bytes_to_index, we do a poor job of
1158
 
        # rebuilding. This is a side-effect of the change, but at least it does
1159
 
        # show the setting had an effect.
1160
 
        self.assertTrue(old_block._content_length < new_block._content_length)
1161
 
 
1162
1023
    def test_check_is_well_utilized_all_keys(self):
1163
1024
        block, manager = self.make_block_and_full_manager(self._texts)
1164
1025
        self.assertFalse(manager.check_is_well_utilized())
1175
1036
 
1176
1037
    def test_check_is_well_utilized_mixed_keys(self):
1177
1038
        texts = {}
1178
 
        f1k1 = (b'f1', b'k1')
1179
 
        f1k2 = (b'f1', b'k2')
1180
 
        f2k1 = (b'f2', b'k1')
1181
 
        f2k2 = (b'f2', b'k2')
1182
 
        texts[f1k1] = self._texts[(b'key1',)]
1183
 
        texts[f1k2] = self._texts[(b'key2',)]
1184
 
        texts[f2k1] = self._texts[(b'key3',)]
1185
 
        texts[f2k2] = self._texts[(b'key4',)]
 
1039
        f1k1 = ('f1', 'k1')
 
1040
        f1k2 = ('f1', 'k2')
 
1041
        f2k1 = ('f2', 'k1')
 
1042
        f2k2 = ('f2', 'k2')
 
1043
        texts[f1k1] = self._texts[('key1',)]
 
1044
        texts[f1k2] = self._texts[('key2',)]
 
1045
        texts[f2k1] = self._texts[('key3',)]
 
1046
        texts[f2k2] = self._texts[('key4',)]
1186
1047
        block, manager = self.make_block_and_full_manager(texts)
1187
1048
        self.assertFalse(manager.check_is_well_utilized())
1188
1049
        manager._full_enough_block_size = block._content_length
1196
1057
        locations, block = self.make_block(self._texts)
1197
1058
        manager = groupcompress._LazyGroupContentManager(block)
1198
1059
        manager._full_enough_block_size = block._content_length
1199
 
        self.add_key_to_manager((b'key1',), locations, block, manager)
1200
 
        self.add_key_to_manager((b'key2',), locations, block, manager)
 
1060
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1061
        self.add_key_to_manager(('key2',), locations, block, manager)
1201
1062
        # Just using the content from key1 and 2 is not enough to be considered
1202
1063
        # 'complete'
1203
1064
        self.assertFalse(manager.check_is_well_utilized())
1204
1065
        # However if we add key3, then we have enough, as we only require 75%
1205
1066
        # consumption
1206
 
        self.add_key_to_manager((b'key4',), locations, block, manager)
 
1067
        self.add_key_to_manager(('key4',), locations, block, manager)
1207
1068
        self.assertTrue(manager.check_is_well_utilized())
1208
 
 
1209
 
 
1210
 
class Test_GCBuildDetails(tests.TestCase):
1211
 
 
1212
 
    def test_acts_like_tuple(self):
1213
 
        # _GCBuildDetails inlines some of the data that used to be spread out
1214
 
        # across a bunch of tuples
1215
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1216
 
                                           ('INDEX', 10, 20, 0, 5))
1217
 
        self.assertEqual(4, len(bd))
1218
 
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1219
 
        self.assertEqual(None, bd[1])  # Compression Parent is always None
1220
 
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1221
 
        self.assertEqual(('group', None), bd[3])  # Record details
1222
 
 
1223
 
    def test__repr__(self):
1224
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1225
 
                                           ('INDEX', 10, 20, 0, 5))
1226
 
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1227
 
                         " (('parent1',), ('parent2',)))",
1228
 
                         repr(bd))