/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/tests/test_groupcompress.py

  • Committer: Jelmer Vernooij
  • Date: 2020-04-05 19:11:34 UTC
  • mto: (7490.7.16 work)
  • mto: This revision was merged to the branch mainline in revision 7501.
  • Revision ID: jelmer@jelmer.uk-20200405191134-0aebh8ikiwygxma5
Populate the .gitignore file.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
 
1
# Copyright (C) 2008-2011 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
18
18
 
19
19
import zlib
20
20
 
21
 
from bzrlib import (
22
 
    btree_index,
23
 
    groupcompress,
 
21
from .. import (
 
22
    config,
24
23
    errors,
25
 
    index as _mod_index,
26
24
    osutils,
27
25
    tests,
28
26
    trace,
 
27
    )
 
28
from ..bzr import (
 
29
    btree_index,
 
30
    groupcompress,
 
31
    knit,
 
32
    index as _mod_index,
29
33
    versionedfile,
30
34
    )
31
 
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
33
 
 
34
 
 
35
 
def load_tests(standard_tests, module, loader):
36
 
    """Parameterize tests for all versions of groupcompress."""
37
 
    to_adapt, result = tests.split_suite_by_condition(
38
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
35
from ..osutils import sha_string
 
36
from .test__groupcompress import compiled_groupcompress_feature
 
37
from .scenarios import load_tests_apply_scenarios
 
38
 
 
39
 
 
40
def group_compress_implementation_scenarios():
39
41
    scenarios = [
40
42
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
43
        ]
42
44
    if compiled_groupcompress_feature.available():
43
45
        scenarios.append(('C',
44
 
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
 
    return tests.multiply_tests(to_adapt, scenarios, result)
 
46
                          {'compressor': groupcompress.PyrexGroupCompressor}))
 
47
    return scenarios
 
48
 
 
49
 
 
50
load_tests = load_tests_apply_scenarios
46
51
 
47
52
 
48
53
class TestGroupCompressor(tests.TestCase):
49
54
 
50
55
    def _chunks_to_repr_lines(self, chunks):
51
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
56
        return '\n'.join(map(repr, b''.join(chunks).split(b'\n')))
52
57
 
53
58
    def assertEqualDiffEncoded(self, expected, actual):
54
59
        """Compare the actual content to the expected content.
66
71
class TestAllGroupCompressors(TestGroupCompressor):
67
72
    """Tests for GroupCompressor"""
68
73
 
69
 
    compressor = None # Set by multiply_tests
 
74
    scenarios = group_compress_implementation_scenarios()
 
75
    compressor = None  # Set by scenario
70
76
 
71
77
    def test_empty_delta(self):
72
78
        compressor = self.compressor()
75
81
    def test_one_nosha_delta(self):
76
82
        # diff against NUKK
77
83
        compressor = self.compressor()
78
 
        sha1, start_point, end_point, _ = compressor.compress(('label',),
79
 
            'strange\ncommon\n', None)
80
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
84
        text = b'strange\ncommon\n'
 
85
        sha1, start_point, end_point, _ = compressor.compress(
 
86
            ('label',), [text], len(text), None)
 
87
        self.assertEqual(sha_string(b'strange\ncommon\n'), sha1)
 
88
        expected_lines = b'f\x0fstrange\ncommon\n'
 
89
        self.assertEqual(expected_lines, b''.join(compressor.chunks))
83
90
        self.assertEqual(0, start_point)
84
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
91
        self.assertEqual(len(expected_lines), end_point)
85
92
 
86
93
    def test_empty_content(self):
87
94
        compressor = self.compressor()
88
95
        # Adding empty bytes should return the 'null' record
89
 
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
 
                                                                 '', None)
 
96
        sha1, start_point, end_point, kind = compressor.compress(
 
97
            ('empty',), [], 0, None)
91
98
        self.assertEqual(0, start_point)
92
99
        self.assertEqual(0, end_point)
93
100
        self.assertEqual('fulltext', kind)
95
102
        self.assertEqual(0, compressor.endpoint)
96
103
        self.assertEqual([], compressor.chunks)
97
104
        # Even after adding some content
98
 
        compressor.compress(('content',), 'some\nbytes\n', None)
 
105
        text = b'some\nbytes\n'
 
106
        compressor.compress(('content',), [text], len(text), None)
99
107
        self.assertTrue(compressor.endpoint > 0)
100
 
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
 
                                                                 '', None)
 
108
        sha1, start_point, end_point, kind = compressor.compress(
 
109
            ('empty2',), [], 0, None)
102
110
        self.assertEqual(0, start_point)
103
111
        self.assertEqual(0, end_point)
104
112
        self.assertEqual('fulltext', kind)
108
116
        # Knit fetching will try to reconstruct texts locally which results in
109
117
        # reading something that is in the compressor stream already.
110
118
        compressor = self.compressor()
111
 
        sha1_1, _, _, _ = compressor.compress(('label',),
112
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
119
        text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
 
120
        sha1_1, _, _, _ = compressor.compress(
 
121
            ('label',), [text], len(text), None)
113
122
        expected_lines = list(compressor.chunks)
114
 
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
123
        text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
 
124
        sha1_2, _, end_point, _ = compressor.compress(
 
125
            ('newlabel',), [text], len(text), None)
116
126
        # get the first out
117
 
        self.assertEqual(('strange\ncommon long line\n'
118
 
                          'that needs a 16 byte match\n', sha1_1),
 
127
        self.assertEqual(([b'strange\ncommon long line\n'
 
128
                           b'that needs a 16 byte match\n'], sha1_1),
119
129
                         compressor.extract(('label',)))
120
130
        # and the second
121
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
 
                          'different\n', sha1_2),
 
131
        self.assertEqual(([b'common long line\nthat needs a 16 byte match\n'
 
132
                           b'different\n'], sha1_2),
123
133
                         compressor.extract(('newlabel',)))
124
134
 
125
135
    def test_pop_last(self):
126
136
        compressor = self.compressor()
127
 
        _, _, _, _ = compressor.compress(('key1',),
128
 
            'some text\nfor the first entry\n', None)
 
137
        text = b'some text\nfor the first entry\n'
 
138
        _, _, _, _ = compressor.compress(
 
139
            ('key1',), [text], len(text), None)
129
140
        expected_lines = list(compressor.chunks)
130
 
        _, _, _, _ = compressor.compress(('key2',),
131
 
            'some text\nfor the second entry\n', None)
 
141
        text = b'some text\nfor the second entry\n'
 
142
        _, _, _, _ = compressor.compress(
 
143
            ('key2',), [text], len(text), None)
132
144
        compressor.pop_last()
133
145
        self.assertEqual(expected_lines, compressor.chunks)
134
146
 
140
152
 
141
153
    def test_stats(self):
142
154
        compressor = self.compressor()
143
 
        compressor.compress(('label',),
144
 
                            'strange\n'
145
 
                            'common very very long line\n'
146
 
                            'plus more text\n', None)
147
 
        compressor.compress(('newlabel',),
148
 
                            'common very very long line\n'
149
 
                            'plus more text\n'
150
 
                            'different\n'
151
 
                            'moredifferent\n', None)
152
 
        compressor.compress(('label3',),
153
 
                            'new\n'
154
 
                            'common very very long line\n'
155
 
                            'plus more text\n'
156
 
                            'different\n'
157
 
                            'moredifferent\n', None)
 
155
        chunks = [b'strange\n',
 
156
                  b'common very very long line\n',
 
157
                  b'plus more text\n']
 
158
        compressor.compress(
 
159
            ('label',), chunks, sum(map(len, chunks)), None)
 
160
        chunks = [
 
161
            b'common very very long line\n',
 
162
            b'plus more text\n',
 
163
            b'different\n',
 
164
            b'moredifferent\n']
 
165
        compressor.compress(
 
166
            ('newlabel',),
 
167
            chunks, sum(map(len, chunks)), None)
 
168
        chunks = [
 
169
            b'new\n',
 
170
            b'common very very long line\n',
 
171
            b'plus more text\n',
 
172
            b'different\n',
 
173
            b'moredifferent\n']
 
174
        compressor.compress(
 
175
            ('label3',), chunks, sum(map(len, chunks)), None)
158
176
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
159
177
 
160
178
    def test_two_nosha_delta(self):
161
179
        compressor = self.compressor()
162
 
        sha1_1, _, _, _ = compressor.compress(('label',),
163
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
180
        text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
 
181
        sha1_1, _, _, _ = compressor.compress(('label',), [text], len(text), None)
164
182
        expected_lines = list(compressor.chunks)
165
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
 
        self.assertEqual(sha_string('common long line\n'
168
 
                                    'that needs a 16 byte match\n'
169
 
                                    'different\n'), sha1_2)
 
183
        text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
 
184
        sha1_2, start_point, end_point, _ = compressor.compress(
 
185
            ('newlabel',), [text], len(text), None)
 
186
        self.assertEqual(sha_string(text), sha1_2)
170
187
        expected_lines.extend([
171
188
            # 'delta', delta length
172
 
            'd\x0f',
 
189
            b'd\x0f',
173
190
            # source and target length
174
 
            '\x36',
 
191
            b'\x36',
175
192
            # copy the line common
176
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
193
            b'\x91\x0a\x2c',  # copy, offset 0x0a, len 0x2c
177
194
            # add the line different, and the trailing newline
178
 
            '\x0adifferent\n', # insert 10 bytes
 
195
            b'\x0adifferent\n',  # insert 10 bytes
179
196
            ])
180
197
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
198
        self.assertEqual(sum(map(len, expected_lines)), end_point)
184
201
        # The first interesting test: make a change that should use lines from
185
202
        # both parents.
186
203
        compressor = self.compressor()
187
 
        sha1_1, _, _, _ = compressor.compress(('label',),
188
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
189
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
 
            'different\nmoredifferent\nand then some more\n', None)
 
204
        text = b'strange\ncommon very very long line\nwith some extra text\n'
 
205
        sha1_1, _, _, _ = compressor.compress(
 
206
            ('label',), [text], len(text), None)
 
207
        text = b'different\nmoredifferent\nand then some more\n'
 
208
        sha1_2, _, _, _ = compressor.compress(
 
209
            ('newlabel',), [text], len(text), None)
191
210
        expected_lines = list(compressor.chunks)
192
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
 
            'new\ncommon very very long line\nwith some extra text\n'
194
 
            'different\nmoredifferent\nand then some more\n',
195
 
            None)
196
 
        self.assertEqual(
197
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
198
 
                       'different\nmoredifferent\nand then some more\n'),
199
 
            sha1_3)
 
211
        text = (b'new\ncommon very very long line\nwith some extra text\n'
 
212
                b'different\nmoredifferent\nand then some more\n')
 
213
        sha1_3, start_point, end_point, _ = compressor.compress(
 
214
            ('label3',), [text], len(text), None)
 
215
        self.assertEqual(sha_string(text), sha1_3)
200
216
        expected_lines.extend([
201
217
            # 'delta', delta length
202
 
            'd\x0b',
 
218
            b'd\x0b',
203
219
            # source and target length
204
 
            '\x5f'
 
220
            b'\x5f'
205
221
            # insert new
206
 
            '\x03new',
 
222
            b'\x03new',
207
223
            # Copy of first parent 'common' range
208
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
224
            b'\x91\x09\x31'  # copy, offset 0x09, 0x31 bytes
209
225
            # Copy of second parent 'different' range
210
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
226
            b'\x91\x3c\x2b'  # copy, offset 0x3c, 0x2b bytes
211
227
            ])
212
228
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
229
        self.assertEqual(sum(map(len, expected_lines)), end_point)
219
235
 
220
236
    def test_stats(self):
221
237
        compressor = self.compressor()
222
 
        compressor.compress(('label',),
223
 
                            'strange\n'
224
 
                            'common very very long line\n'
225
 
                            'plus more text\n', None)
226
 
        compressor.compress(('newlabel',),
227
 
                            'common very very long line\n'
228
 
                            'plus more text\n'
229
 
                            'different\n'
230
 
                            'moredifferent\n', None)
231
 
        compressor.compress(('label3',),
232
 
                            'new\n'
233
 
                            'common very very long line\n'
234
 
                            'plus more text\n'
235
 
                            'different\n'
236
 
                            'moredifferent\n', None)
 
238
        chunks = [b'strange\n',
 
239
                  b'common very very long line\n',
 
240
                  b'plus more text\n']
 
241
        compressor.compress(
 
242
            ('label',), chunks, sum(map(len, chunks)), None)
 
243
        chunks = [
 
244
            b'common very very long line\n',
 
245
            b'plus more text\n',
 
246
            b'different\n',
 
247
            b'moredifferent\n']
 
248
        compressor.compress(
 
249
            ('newlabel',), chunks, sum(map(len, chunks)), None)
 
250
        chunks = [
 
251
            b'new\n',
 
252
            b'common very very long line\n',
 
253
            b'plus more text\n',
 
254
            b'different\n',
 
255
            b'moredifferent\n']
 
256
        compressor.compress(
 
257
            ('label3',),
 
258
            chunks, sum(map(len, chunks)), None)
237
259
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
238
260
 
239
261
    def test_two_nosha_delta(self):
240
262
        compressor = self.compressor()
241
 
        sha1_1, _, _, _ = compressor.compress(('label',),
242
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
263
        text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
 
264
        sha1_1, _, _, _ = compressor.compress(
 
265
            ('label',), [text], len(text), None)
243
266
        expected_lines = list(compressor.chunks)
244
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
 
        self.assertEqual(sha_string('common long line\n'
247
 
                                    'that needs a 16 byte match\n'
248
 
                                    'different\n'), sha1_2)
 
267
        text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
 
268
        sha1_2, start_point, end_point, _ = compressor.compress(
 
269
            ('newlabel',), [text], len(text), None)
 
270
        self.assertEqual(sha_string(text), sha1_2)
249
271
        expected_lines.extend([
250
272
            # 'delta', delta length
251
 
            'd\x0f',
 
273
            b'd\x0f',
252
274
            # target length
253
 
            '\x36',
 
275
            b'\x36',
254
276
            # copy the line common
255
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
277
            b'\x91\x0a\x2c',  # copy, offset 0x0a, len 0x2c
256
278
            # add the line different, and the trailing newline
257
 
            '\x0adifferent\n', # insert 10 bytes
 
279
            b'\x0adifferent\n',  # insert 10 bytes
258
280
            ])
259
281
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
282
        self.assertEqual(sum(map(len, expected_lines)), end_point)
263
285
        # The first interesting test: make a change that should use lines from
264
286
        # both parents.
265
287
        compressor = self.compressor()
266
 
        sha1_1, _, _, _ = compressor.compress(('label',),
267
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
268
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
 
            'different\nmoredifferent\nand then some more\n', None)
 
288
        text = b'strange\ncommon very very long line\nwith some extra text\n'
 
289
        sha1_1, _, _, _ = compressor.compress(
 
290
            ('label',), [text], len(text), None)
 
291
        text = b'different\nmoredifferent\nand then some more\n'
 
292
        sha1_2, _, _, _ = compressor.compress(
 
293
            ('newlabel',), [text], len(text), None)
270
294
        expected_lines = list(compressor.chunks)
271
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
 
            'new\ncommon very very long line\nwith some extra text\n'
273
 
            'different\nmoredifferent\nand then some more\n',
274
 
            None)
275
 
        self.assertEqual(
276
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
277
 
                       'different\nmoredifferent\nand then some more\n'),
278
 
            sha1_3)
 
295
        text = (b'new\ncommon very very long line\nwith some extra text\n'
 
296
                b'different\nmoredifferent\nand then some more\n')
 
297
        sha1_3, start_point, end_point, _ = compressor.compress(
 
298
            ('label3',), [text], len(text), None)
 
299
        self.assertEqual(sha_string(text), sha1_3)
279
300
        expected_lines.extend([
280
301
            # 'delta', delta length
281
 
            'd\x0c',
 
302
            b'd\x0c',
282
303
            # target length
283
 
            '\x5f'
 
304
            b'\x5f'
284
305
            # insert new
285
 
            '\x04new\n',
 
306
            b'\x04new\n',
286
307
            # Copy of first parent 'common' range
287
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
308
            b'\x91\x0a\x30'  # copy, offset 0x0a, 0x30 bytes
288
309
            # Copy of second parent 'different' range
289
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
310
            b'\x91\x3c\x2b'  # copy, offset 0x3c, 0x2b bytes
290
311
            ])
291
312
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
313
        self.assertEqual(sum(map(len, expected_lines)), end_point)
299
320
        compressor = groupcompress.GroupCompressor()
300
321
        start = 0
301
322
        for key in sorted(key_to_text):
302
 
            compressor.compress(key, key_to_text[key], None)
 
323
            compressor.compress(
 
324
                key, [key_to_text[key]], len(key_to_text[key]), None)
303
325
        locs = dict((key, (start, end)) for key, (start, _, end, _)
304
 
                    in compressor.labels_deltas.iteritems())
 
326
                    in compressor.labels_deltas.items())
305
327
        block = compressor.flush()
306
328
        raw_bytes = block.to_bytes()
307
329
        # Go through from_bytes(to_bytes()) so that we start with a compressed
310
332
 
311
333
    def test_from_empty_bytes(self):
312
334
        self.assertRaises(ValueError,
313
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
335
                          groupcompress.GroupCompressBlock.from_bytes, b'')
314
336
 
315
337
    def test_from_minimal_bytes(self):
316
338
        block = groupcompress.GroupCompressBlock.from_bytes(
317
 
            'gcb1z\n0\n0\n')
 
339
            b'gcb1z\n0\n0\n')
318
340
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
341
        self.assertIs(None, block._content)
320
 
        self.assertEqual('', block._z_content)
 
342
        self.assertEqual(b'', block._z_content)
321
343
        block._ensure_content()
322
 
        self.assertEqual('', block._content)
323
 
        self.assertEqual('', block._z_content)
324
 
        block._ensure_content() # Ensure content is safe to call 2x
 
344
        self.assertEqual(b'', block._content)
 
345
        self.assertEqual(b'', block._z_content)
 
346
        block._ensure_content()  # Ensure content is safe to call 2x
325
347
 
326
348
    def test_from_invalid(self):
327
349
        self.assertRaises(ValueError,
328
350
                          groupcompress.GroupCompressBlock.from_bytes,
329
 
                          'this is not a valid header')
 
351
                          b'this is not a valid header')
330
352
 
331
353
    def test_from_bytes(self):
332
 
        content = ('a tiny bit of content\n')
 
354
        content = (b'a tiny bit of content\n')
333
355
        z_content = zlib.compress(content)
334
356
        z_bytes = (
335
 
            'gcb1z\n' # group compress block v1 plain
336
 
            '%d\n' # Length of compressed content
337
 
            '%d\n' # Length of uncompressed content
338
 
            '%s'   # Compressed content
 
357
            b'gcb1z\n'  # group compress block v1 plain
 
358
            b'%d\n'  # Length of compressed content
 
359
            b'%d\n'  # Length of uncompressed content
 
360
            b'%s'   # Compressed content
339
361
            ) % (len(z_content), len(content), z_content)
340
362
        block = groupcompress.GroupCompressBlock.from_bytes(
341
363
            z_bytes)
347
369
        self.assertEqual(z_content, block._z_content)
348
370
        self.assertEqual(content, block._content)
349
371
 
 
372
    def test_to_chunks(self):
 
373
        content_chunks = [b'this is some content\n',
 
374
                          b'this content will be compressed\n']
 
375
        content_len = sum(map(len, content_chunks))
 
376
        content = b''.join(content_chunks)
 
377
        gcb = groupcompress.GroupCompressBlock()
 
378
        gcb.set_chunked_content(content_chunks, content_len)
 
379
        total_len, block_chunks = gcb.to_chunks()
 
380
        block_bytes = b''.join(block_chunks)
 
381
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
382
        self.assertEqual(total_len, len(block_bytes))
 
383
        self.assertEqual(gcb._content_length, content_len)
 
384
        expected_header = (b'gcb1z\n'  # group compress block v1 zlib
 
385
                           b'%d\n'  # Length of compressed content
 
386
                           b'%d\n'  # Length of uncompressed content
 
387
                           ) % (gcb._z_content_length, gcb._content_length)
 
388
        # The first chunk should be the header chunk. It is small, fixed size,
 
389
        # and there is no compelling reason to split it up
 
390
        self.assertEqual(expected_header, block_chunks[0])
 
391
        self.assertStartsWith(block_bytes, expected_header)
 
392
        remaining_bytes = block_bytes[len(expected_header):]
 
393
        raw_bytes = zlib.decompress(remaining_bytes)
 
394
        self.assertEqual(content, raw_bytes)
 
395
 
350
396
    def test_to_bytes(self):
351
 
        content = ('this is some content\n'
352
 
                   'this content will be compressed\n')
 
397
        content = (b'this is some content\n'
 
398
                   b'this content will be compressed\n')
353
399
        gcb = groupcompress.GroupCompressBlock()
354
400
        gcb.set_content(content)
355
 
        bytes = gcb.to_bytes()
 
401
        data = gcb.to_bytes()
356
402
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
403
        self.assertEqual(gcb._content_length, len(content))
358
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
359
 
                          '%d\n' # Length of compressed content
360
 
                          '%d\n' # Length of uncompressed content
361
 
                         ) % (gcb._z_content_length, gcb._content_length)
362
 
        self.assertStartsWith(bytes, expected_header)
363
 
        remaining_bytes = bytes[len(expected_header):]
 
404
        expected_header = (b'gcb1z\n'  # group compress block v1 zlib
 
405
                           b'%d\n'  # Length of compressed content
 
406
                           b'%d\n'  # Length of uncompressed content
 
407
                           ) % (gcb._z_content_length, gcb._content_length)
 
408
        self.assertStartsWith(data, expected_header)
 
409
        remaining_bytes = data[len(expected_header):]
364
410
        raw_bytes = zlib.decompress(remaining_bytes)
365
411
        self.assertEqual(content, raw_bytes)
366
412
 
367
413
        # we should get the same results if using the chunked version
368
414
        gcb = groupcompress.GroupCompressBlock()
369
 
        gcb.set_chunked_content(['this is some content\n'
370
 
                                 'this content will be compressed\n'],
371
 
                                 len(content))
372
 
        old_bytes = bytes
373
 
        bytes = gcb.to_bytes()
374
 
        self.assertEqual(old_bytes, bytes)
 
415
        gcb.set_chunked_content([b'this is some content\n'
 
416
                                 b'this content will be compressed\n'],
 
417
                                len(content))
 
418
        old_data = data
 
419
        data = gcb.to_bytes()
 
420
        self.assertEqual(old_data, data)
375
421
 
376
422
    def test_partial_decomp(self):
377
423
        content_chunks = []
379
425
        # partial decompression to work with. Most auto-generated data
380
426
        # compresses a bit too well, we want a combination, so we combine a sha
381
427
        # hash with compressible data.
382
 
        for i in xrange(2048):
383
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
428
        for i in range(2048):
 
429
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
384
430
            content_chunks.append(next_content)
385
431
            next_sha1 = osutils.sha_string(next_content)
386
 
            content_chunks.append(next_sha1 + '\n')
387
 
        content = ''.join(content_chunks)
 
432
            content_chunks.append(next_sha1 + b'\n')
 
433
        content = b''.join(content_chunks)
388
434
        self.assertEqual(158634, len(content))
389
435
        z_content = zlib.compress(content)
390
436
        self.assertEqual(57182, len(z_content))
391
437
        block = groupcompress.GroupCompressBlock()
392
 
        block._z_content = z_content
 
438
        block._z_content_chunks = (z_content,)
393
439
        block._z_content_length = len(z_content)
394
440
        block._compressor_name = 'zlib'
395
441
        block._content_length = 158634
424
470
        # partial decompression to work with. Most auto-generated data
425
471
        # compresses a bit too well, we want a combination, so we combine a sha
426
472
        # hash with compressible data.
427
 
        for i in xrange(2048):
428
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
473
        for i in range(2048):
 
474
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
429
475
            content_chunks.append(next_content)
430
476
            next_sha1 = osutils.sha_string(next_content)
431
 
            content_chunks.append(next_sha1 + '\n')
432
 
        content = ''.join(content_chunks)
 
477
            content_chunks.append(next_sha1 + b'\n')
 
478
        content = b''.join(content_chunks)
433
479
        self.assertEqual(158634, len(content))
434
480
        z_content = zlib.compress(content)
435
481
        self.assertEqual(57182, len(z_content))
436
482
        block = groupcompress.GroupCompressBlock()
437
 
        block._z_content = z_content
 
483
        block._z_content_chunks = (z_content,)
438
484
        block._z_content_length = len(z_content)
439
485
        block._compressor_name = 'zlib'
440
486
        block._content_length = 158634
447
493
        self.assertIs(None, block._z_content_decompressor)
448
494
 
449
495
    def test__dump(self):
450
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
451
 
        key_to_text = {('1',): dup_content + '1 unique\n',
452
 
                       ('2',): dup_content + '2 extra special\n'}
 
496
        dup_content = b'some duplicate content\nwhich is sufficiently long\n'
 
497
        key_to_text = {(b'1',): dup_content + b'1 unique\n',
 
498
                       (b'2',): dup_content + b'2 extra special\n'}
453
499
        locs, block = self.make_block(key_to_text)
454
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
455
 
                          ('d', 21, len(key_to_text[('2',)]),
456
 
                           [('c', 2, len(dup_content)),
457
 
                            ('i', len('2 extra special\n'), '')
458
 
                           ]),
459
 
                         ], block._dump())
 
500
        self.assertEqual([(b'f', len(key_to_text[(b'1',)])),
 
501
                          (b'd', 21, len(key_to_text[(b'2',)]),
 
502
                           [(b'c', 2, len(dup_content)),
 
503
                            (b'i', len(b'2 extra special\n'), b'')
 
504
                            ]),
 
505
                          ], block._dump())
460
506
 
461
507
 
462
508
class TestCaseWithGroupCompressVersionedFiles(
467
513
        t = self.get_transport(dir)
468
514
        t.ensure_base()
469
515
        vf = groupcompress.make_pack_factory(graph=create_graph,
470
 
            delta=False, keylength=keylength,
471
 
            inconsistency_fatal=inconsistency_fatal)(t)
 
516
                                             delta=False, keylength=keylength,
 
517
                                             inconsistency_fatal=inconsistency_fatal)(t)
472
518
        if do_cleanup:
473
519
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
474
520
        return vf
487
533
 
488
534
    def make_g_index_missing_parent(self):
489
535
        graph_index = self.make_g_index('missing_parent', 1,
490
 
            [(('parent', ), '2 78 2 10', ([],)),
491
 
             (('tip', ), '2 78 2 10',
492
 
              ([('parent', ), ('missing-parent', )],)),
493
 
              ])
 
536
                                        [((b'parent', ), b'2 78 2 10', ([],)),
 
537
                                         ((b'tip', ), b'2 78 2 10',
 
538
                                            ([(b'parent', ), (b'missing-parent', )],)),
 
539
                                         ])
494
540
        return graph_index
495
541
 
496
542
    def test_get_record_stream_as_requested(self):
497
543
        # Consider promoting 'as-requested' to general availability, and
498
544
        # make this a VF interface test
499
545
        vf = self.make_test_vf(False, dir='source')
500
 
        vf.add_lines(('a',), (), ['lines\n'])
501
 
        vf.add_lines(('b',), (), ['lines\n'])
502
 
        vf.add_lines(('c',), (), ['lines\n'])
503
 
        vf.add_lines(('d',), (), ['lines\n'])
 
546
        vf.add_lines((b'a',), (), [b'lines\n'])
 
547
        vf.add_lines((b'b',), (), [b'lines\n'])
 
548
        vf.add_lines((b'c',), (), [b'lines\n'])
 
549
        vf.add_lines((b'd',), (), [b'lines\n'])
504
550
        vf.writer.end()
505
551
        keys = [record.key for record in vf.get_record_stream(
506
 
                    [('a',), ('b',), ('c',), ('d',)],
507
 
                    'as-requested', False)]
508
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
552
            [(b'a',), (b'b',), (b'c',), (b'd',)],
 
553
            'as-requested', False)]
 
554
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
509
555
        keys = [record.key for record in vf.get_record_stream(
510
 
                    [('b',), ('a',), ('d',), ('c',)],
511
 
                    'as-requested', False)]
512
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
556
            [(b'b',), (b'a',), (b'd',), (b'c',)],
 
557
            'as-requested', False)]
 
558
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
513
559
 
514
560
        # It should work even after being repacked into another VF
515
561
        vf2 = self.make_test_vf(False, dir='target')
516
562
        vf2.insert_record_stream(vf.get_record_stream(
517
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
563
            [(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
518
564
        vf2.writer.end()
519
565
 
520
566
        keys = [record.key for record in vf2.get_record_stream(
521
 
                    [('a',), ('b',), ('c',), ('d',)],
522
 
                    'as-requested', False)]
523
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
567
            [(b'a',), (b'b',), (b'c',), (b'd',)],
 
568
            'as-requested', False)]
 
569
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
524
570
        keys = [record.key for record in vf2.get_record_stream(
525
 
                    [('b',), ('a',), ('d',), ('c',)],
526
 
                    'as-requested', False)]
527
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
571
            [(b'b',), (b'a',), (b'd',), (b'c',)],
 
572
            'as-requested', False)]
 
573
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
 
574
 
 
575
    def test_get_record_stream_max_bytes_to_index_default(self):
 
576
        vf = self.make_test_vf(True, dir='source')
 
577
        vf.add_lines((b'a',), (), [b'lines\n'])
 
578
        vf.writer.end()
 
579
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
 
580
        self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
 
581
                         record._manager._get_compressor_settings())
 
582
 
 
583
    def test_get_record_stream_accesses_compressor_settings(self):
 
584
        vf = self.make_test_vf(True, dir='source')
 
585
        vf.add_lines((b'a',), (), [b'lines\n'])
 
586
        vf.writer.end()
 
587
        vf._max_bytes_to_index = 1234
 
588
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
 
589
        self.assertEqual(dict(max_bytes_to_index=1234),
 
590
                         record._manager._get_compressor_settings())
 
591
 
 
592
    @staticmethod
 
593
    def grouped_stream(revision_ids, first_parents=()):
 
594
        parents = first_parents
 
595
        for revision_id in revision_ids:
 
596
            key = (revision_id,)
 
597
            record = versionedfile.FulltextContentFactory(
 
598
                key, parents, None,
 
599
                b'some content that is\n'
 
600
                b'identical except for\n'
 
601
                b'revision_id:%s\n' % (revision_id,))
 
602
            yield record
 
603
            parents = (key,)
528
604
 
529
605
    def test_insert_record_stream_reuses_blocks(self):
530
606
        vf = self.make_test_vf(True, dir='source')
531
 
        def grouped_stream(revision_ids, first_parents=()):
532
 
            parents = first_parents
533
 
            for revision_id in revision_ids:
534
 
                key = (revision_id,)
535
 
                record = versionedfile.FulltextContentFactory(
536
 
                    key, parents, None,
537
 
                    'some content that is\n'
538
 
                    'identical except for\n'
539
 
                    'revision_id:%s\n' % (revision_id,))
540
 
                yield record
541
 
                parents = (key,)
542
607
        # One group, a-d
543
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
608
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
544
609
        # Second group, e-h
545
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
546
 
                                               first_parents=(('d',),)))
 
610
        vf.insert_record_stream(self.grouped_stream(
 
611
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
547
612
        block_bytes = {}
548
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
549
 
                                      'unordered', False)
 
613
        stream = vf.get_record_stream(
 
614
            [(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
550
615
        num_records = 0
551
616
        for record in stream:
552
 
            if record.key in [('a',), ('e',)]:
 
617
            if record.key in [(b'a',), (b'e',)]:
553
618
                self.assertEqual('groupcompress-block', record.storage_kind)
554
619
            else:
555
620
                self.assertEqual('groupcompress-block-ref',
558
623
            num_records += 1
559
624
        self.assertEqual(8, num_records)
560
625
        for r in 'abcd':
561
 
            key = (r,)
562
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
563
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
626
            key = (r.encode(),)
 
627
            self.assertIs(block_bytes[key], block_bytes[(b'a',)])
 
628
            self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
564
629
        for r in 'efgh':
565
 
            key = (r,)
566
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
567
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
630
            key = (r.encode(),)
 
631
            self.assertIs(block_bytes[key], block_bytes[(b'e',)])
 
632
            self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
568
633
        # Now copy the blocks into another vf, and ensure that the blocks are
569
634
        # preserved without creating new entries
570
635
        vf2 = self.make_test_vf(True, dir='target')
 
636
        keys = [(r.encode(),) for r in 'abcdefgh']
571
637
        # ordering in 'groupcompress' order, should actually swap the groups in
572
638
        # the target vf, but the groups themselves should not be disturbed.
 
639
 
573
640
        def small_size_stream():
574
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
 
                                               'groupcompress', False):
 
641
            for record in vf.get_record_stream(keys, 'groupcompress', False):
576
642
                record._manager._full_enough_block_size = \
577
643
                    record._manager._block._content_length
578
644
                yield record
579
 
                        
 
645
 
580
646
        vf2.insert_record_stream(small_size_stream())
581
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
 
                                       'groupcompress', False)
 
647
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
583
648
        vf2.writer.end()
584
649
        num_records = 0
585
650
        for record in stream:
590
655
 
591
656
    def test_insert_record_stream_packs_on_the_fly(self):
592
657
        vf = self.make_test_vf(True, dir='source')
593
 
        def grouped_stream(revision_ids, first_parents=()):
594
 
            parents = first_parents
595
 
            for revision_id in revision_ids:
596
 
                key = (revision_id,)
597
 
                record = versionedfile.FulltextContentFactory(
598
 
                    key, parents, None,
599
 
                    'some content that is\n'
600
 
                    'identical except for\n'
601
 
                    'revision_id:%s\n' % (revision_id,))
602
 
                yield record
603
 
                parents = (key,)
604
658
        # One group, a-d
605
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
659
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
606
660
        # Second group, e-h
607
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
 
                                               first_parents=(('d',),)))
 
661
        vf.insert_record_stream(self.grouped_stream(
 
662
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
609
663
        # Now copy the blocks into another vf, and see that the
610
664
        # insert_record_stream rebuilt a new block on-the-fly because of
611
665
        # under-utilization
612
666
        vf2 = self.make_test_vf(True, dir='target')
 
667
        keys = [(r.encode(),) for r in 'abcdefgh']
613
668
        vf2.insert_record_stream(vf.get_record_stream(
614
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
 
                                       'groupcompress', False)
 
669
            keys, 'groupcompress', False))
 
670
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
617
671
        vf2.writer.end()
618
672
        num_records = 0
619
673
        # All of the records should be recombined into a single block
628
682
 
629
683
    def test__insert_record_stream_no_reuse_block(self):
630
684
        vf = self.make_test_vf(True, dir='source')
631
 
        def grouped_stream(revision_ids, first_parents=()):
632
 
            parents = first_parents
633
 
            for revision_id in revision_ids:
634
 
                key = (revision_id,)
635
 
                record = versionedfile.FulltextContentFactory(
636
 
                    key, parents, None,
637
 
                    'some content that is\n'
638
 
                    'identical except for\n'
639
 
                    'revision_id:%s\n' % (revision_id,))
640
 
                yield record
641
 
                parents = (key,)
642
685
        # One group, a-d
643
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
686
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
644
687
        # Second group, e-h
645
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
646
 
                                               first_parents=(('d',),)))
 
688
        vf.insert_record_stream(self.grouped_stream(
 
689
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
647
690
        vf.writer.end()
648
 
        self.assertEqual(8, len(list(vf.get_record_stream(
649
 
                                        [(r,) for r in 'abcdefgh'],
650
 
                                        'unordered', False))))
 
691
        keys = [(r.encode(),) for r in 'abcdefgh']
 
692
        self.assertEqual(8, len(list(
 
693
            vf.get_record_stream(keys, 'unordered', False))))
651
694
        # Now copy the blocks into another vf, and ensure that the blocks are
652
695
        # preserved without creating new entries
653
696
        vf2 = self.make_test_vf(True, dir='target')
654
697
        # ordering in 'groupcompress' order, should actually swap the groups in
655
698
        # the target vf, but the groups themselves should not be disturbed.
656
699
        list(vf2._insert_record_stream(vf.get_record_stream(
657
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
700
            keys, 'groupcompress', False),
658
701
            reuse_blocks=False))
659
702
        vf2.writer.end()
660
703
        # After inserting with reuse_blocks=False, we should have everything in
661
704
        # a single new block.
662
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
 
                                       'groupcompress', False)
 
705
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
664
706
        block = None
665
707
        for record in stream:
666
708
            if block is None:
672
714
        unvalidated = self.make_g_index_missing_parent()
673
715
        combined = _mod_index.CombinedGraphIndex([unvalidated])
674
716
        index = groupcompress._GCGraphIndex(combined,
675
 
            is_locked=lambda: True, parents=True,
676
 
            track_external_parent_refs=True)
 
717
                                            is_locked=lambda: True, parents=True,
 
718
                                            track_external_parent_refs=True)
677
719
        index.scan_unvalidated_index(unvalidated)
678
720
        self.assertEqual(
679
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
721
            frozenset([(b'missing-parent',)]), index.get_missing_parents())
680
722
 
681
723
    def test_track_external_parent_refs(self):
682
724
        g_index = self.make_g_index('empty', 1, [])
683
725
        mod_index = btree_index.BTreeBuilder(1, 1)
684
726
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
685
727
        index = groupcompress._GCGraphIndex(combined,
686
 
            is_locked=lambda: True, parents=True,
687
 
            add_callback=mod_index.add_nodes,
688
 
            track_external_parent_refs=True)
 
728
                                            is_locked=lambda: True, parents=True,
 
729
                                            add_callback=mod_index.add_nodes,
 
730
                                            track_external_parent_refs=True)
689
731
        index.add_records([
690
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
732
            ((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
691
733
        self.assertEqual(
692
 
            frozenset([('parent-1',), ('parent-2',)]),
 
734
            frozenset([(b'parent-1',), (b'parent-2',)]),
693
735
            index.get_missing_parents())
694
736
 
695
737
    def make_source_with_b(self, a_parent, path):
696
738
        source = self.make_test_vf(True, dir=path)
697
 
        source.add_lines(('a',), (), ['lines\n'])
 
739
        source.add_lines((b'a',), (), [b'lines\n'])
698
740
        if a_parent:
699
 
            b_parents = (('a',),)
 
741
            b_parents = ((b'a',),)
700
742
        else:
701
743
            b_parents = ()
702
 
        source.add_lines(('b',), b_parents, ['lines\n'])
 
744
        source.add_lines((b'b',), b_parents, [b'lines\n'])
703
745
        return source
704
746
 
705
747
    def do_inconsistent_inserts(self, inconsistency_fatal):
706
748
        target = self.make_test_vf(True, dir='target',
707
749
                                   inconsistency_fatal=inconsistency_fatal)
708
750
        for x in range(2):
709
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
751
            source = self.make_source_with_b(x == 1, 'source%s' % x)
710
752
            target.insert_record_stream(source.get_record_stream(
711
 
                [('b',)], 'unordered', False))
 
753
                [(b'b',)], 'unordered', False))
712
754
 
713
755
    def test_inconsistent_redundant_inserts_warn(self):
714
756
        """Should not insert a record that is already present."""
715
757
        warnings = []
 
758
 
716
759
        def warning(template, args):
717
760
            warnings.append(template % args)
718
761
        _trace_warning = trace.warning
721
764
            self.do_inconsistent_inserts(inconsistency_fatal=False)
722
765
        finally:
723
766
            trace.warning = _trace_warning
724
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
726
 
                         warnings)
 
767
        self.assertContainsRe(
 
768
            "\n".join(warnings),
 
769
            r"^inconsistent details in skipped record: \(b?'b',\)"
 
770
            r" \(b?'42 32 0 8', \(\(\),\)\)"
 
771
            r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
727
772
 
728
773
    def test_inconsistent_redundant_inserts_raises(self):
729
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
774
        e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
730
775
                              inconsistency_fatal=True)
731
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
732
 
                              " in add_records:"
733
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
 
                              " 0 8', \(\(\('a',\),\),\)\)")
 
776
        self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
 
777
                              r" in add_records:"
 
778
                              r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
 
779
                              r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
735
780
 
736
781
    def test_clear_cache(self):
737
782
        vf = self.make_source_with_b(True, 'source')
738
783
        vf.writer.end()
739
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
 
784
        for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
740
785
                                           True):
741
786
            pass
742
787
        self.assertTrue(len(vf._group_cache) > 0)
744
789
        self.assertEqual(0, len(vf._group_cache))
745
790
 
746
791
 
 
792
class TestGroupCompressConfig(tests.TestCaseWithTransport):
 
793
 
 
794
    def make_test_vf(self):
 
795
        t = self.get_transport('.')
 
796
        t.ensure_base()
 
797
        factory = groupcompress.make_pack_factory(graph=True,
 
798
                                                  delta=False, keylength=1, inconsistency_fatal=True)
 
799
        vf = factory(t)
 
800
        self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
801
        return vf
 
802
 
 
803
    def test_max_bytes_to_index_default(self):
 
804
        vf = self.make_test_vf()
 
805
        gc = vf._make_group_compressor()
 
806
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
807
                         vf._max_bytes_to_index)
 
808
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
809
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
810
                             gc._delta_index._max_bytes_to_index)
 
811
 
 
812
    def test_max_bytes_to_index_in_config(self):
 
813
        c = config.GlobalConfig()
 
814
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
 
815
        vf = self.make_test_vf()
 
816
        gc = vf._make_group_compressor()
 
817
        self.assertEqual(10000, vf._max_bytes_to_index)
 
818
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
819
            self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
 
820
 
 
821
    def test_max_bytes_to_index_bad_config(self):
 
822
        c = config.GlobalConfig()
 
823
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
 
824
        vf = self.make_test_vf()
 
825
        # TODO: This is triggering a warning, we might want to trap and make
 
826
        #       sure it is readable.
 
827
        gc = vf._make_group_compressor()
 
828
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
829
                         vf._max_bytes_to_index)
 
830
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
831
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
832
                             gc._delta_index._max_bytes_to_index)
 
833
 
747
834
 
748
835
class StubGCVF(object):
749
836
    def __init__(self, canned_get_blocks=None):
750
837
        self._group_cache = {}
751
838
        self._canned_get_blocks = canned_get_blocks or []
 
839
 
752
840
    def _get_blocks(self, read_memos):
753
841
        return iter(self._canned_get_blocks)
754
 
    
 
842
 
755
843
 
756
844
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
845
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
758
 
    
 
846
 
759
847
    def test_add_key_new_read_memo(self):
760
848
        """Adding a key with an uncached read_memo new to this batch adds that
761
849
        read_memo to the list of memos to fetch.
819
907
                (read_memo1, groupcompress.GroupCompressBlock()),
820
908
                (read_memo2, groupcompress.GroupCompressBlock())])
821
909
        locations = {
822
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
823
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
910
            ('key1',): (read_memo1 + (0, 0), None, None, None),
 
911
            ('key2',): (read_memo2 + (0, 0), None, None, None)}
824
912
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
825
913
        batcher.add_key(('key1',))
826
914
        batcher.add_key(('key2',))
840
928
        gcvf = StubGCVF()
841
929
        gcvf._group_cache[read_memo] = fake_block
842
930
        locations = {
843
 
            ('key',): (read_memo + (None, None), None, None, None)}
 
931
            ('key',): (read_memo + (0, 0), None, None, None)}
844
932
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
845
933
        batcher.add_key(('key',))
846
934
        self.assertEqual([], list(batcher.yield_factories()))
853
941
class TestLazyGroupCompress(tests.TestCaseWithTransport):
854
942
 
855
943
    _texts = {
856
 
        ('key1',): "this is a text\n"
857
 
                   "with a reasonable amount of compressible bytes\n"
858
 
                   "which can be shared between various other texts\n",
859
 
        ('key2',): "another text\n"
860
 
                   "with a reasonable amount of compressible bytes\n"
861
 
                   "which can be shared between various other texts\n",
862
 
        ('key3',): "yet another text which won't be extracted\n"
863
 
                   "with a reasonable amount of compressible bytes\n"
864
 
                   "which can be shared between various other texts\n",
865
 
        ('key4',): "this will be extracted\n"
866
 
                   "but references most of its bytes from\n"
867
 
                   "yet another text which won't be extracted\n"
868
 
                   "with a reasonable amount of compressible bytes\n"
869
 
                   "which can be shared between various other texts\n",
 
944
        (b'key1',): b"this is a text\n"
 
945
        b"with a reasonable amount of compressible bytes\n"
 
946
        b"which can be shared between various other texts\n",
 
947
        (b'key2',): b"another text\n"
 
948
        b"with a reasonable amount of compressible bytes\n"
 
949
        b"which can be shared between various other texts\n",
 
950
        (b'key3',): b"yet another text which won't be extracted\n"
 
951
        b"with a reasonable amount of compressible bytes\n"
 
952
        b"which can be shared between various other texts\n",
 
953
        (b'key4',): b"this will be extracted\n"
 
954
        b"but references most of its bytes from\n"
 
955
        b"yet another text which won't be extracted\n"
 
956
        b"with a reasonable amount of compressible bytes\n"
 
957
        b"which can be shared between various other texts\n",
870
958
    }
 
959
 
871
960
    def make_block(self, key_to_text):
872
961
        """Create a GroupCompressBlock, filling it with the given texts."""
873
962
        compressor = groupcompress.GroupCompressor()
874
963
        start = 0
875
964
        for key in sorted(key_to_text):
876
 
            compressor.compress(key, key_to_text[key], None)
 
965
            compressor.compress(
 
966
                key, [key_to_text[key]], len(key_to_text[key]), None)
877
967
        locs = dict((key, (start, end)) for key, (start, _, end, _)
878
 
                    in compressor.labels_deltas.iteritems())
 
968
                    in compressor.labels_deltas.items())
879
969
        block = compressor.flush()
880
970
        raw_bytes = block.to_bytes()
881
971
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
894
984
    def test_get_fulltexts(self):
895
985
        locations, block = self.make_block(self._texts)
896
986
        manager = groupcompress._LazyGroupContentManager(block)
897
 
        self.add_key_to_manager(('key1',), locations, block, manager)
898
 
        self.add_key_to_manager(('key2',), locations, block, manager)
 
987
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
988
        self.add_key_to_manager((b'key2',), locations, block, manager)
899
989
        result_order = []
900
990
        for record in manager.get_record_stream():
901
991
            result_order.append(record.key)
902
992
            text = self._texts[record.key]
903
993
            self.assertEqual(text, record.get_bytes_as('fulltext'))
904
 
        self.assertEqual([('key1',), ('key2',)], result_order)
 
994
        self.assertEqual([(b'key1',), (b'key2',)], result_order)
905
995
 
906
996
        # If we build the manager in the opposite order, we should get them
907
997
        # back in the opposite order
908
998
        manager = groupcompress._LazyGroupContentManager(block)
909
 
        self.add_key_to_manager(('key2',), locations, block, manager)
910
 
        self.add_key_to_manager(('key1',), locations, block, manager)
 
999
        self.add_key_to_manager((b'key2',), locations, block, manager)
 
1000
        self.add_key_to_manager((b'key1',), locations, block, manager)
911
1001
        result_order = []
912
1002
        for record in manager.get_record_stream():
913
1003
            result_order.append(record.key)
914
1004
            text = self._texts[record.key]
915
1005
            self.assertEqual(text, record.get_bytes_as('fulltext'))
916
 
        self.assertEqual([('key2',), ('key1',)], result_order)
 
1006
        self.assertEqual([(b'key2',), (b'key1',)], result_order)
917
1007
 
918
1008
    def test__wire_bytes_no_keys(self):
919
1009
        locations, block = self.make_block(self._texts)
923
1013
        # We should have triggered a strip, since we aren't using any content
924
1014
        stripped_block = manager._block.to_bytes()
925
1015
        self.assertTrue(block_length > len(stripped_block))
926
 
        empty_z_header = zlib.compress('')
927
 
        self.assertEqual('groupcompress-block\n'
928
 
                         '8\n' # len(compress(''))
929
 
                         '0\n' # len('')
930
 
                         '%d\n'# compressed block len
931
 
                         '%s'  # zheader
932
 
                         '%s'  # block
 
1016
        empty_z_header = zlib.compress(b'')
 
1017
        self.assertEqual(b'groupcompress-block\n'
 
1018
                         b'8\n'  # len(compress(''))
 
1019
                         b'0\n'  # len('')
 
1020
                         b'%d\n'  # compressed block len
 
1021
                         b'%s'  # zheader
 
1022
                         b'%s'  # block
933
1023
                         % (len(stripped_block), empty_z_header,
934
1024
                            stripped_block),
935
1025
                         wire_bytes)
937
1027
    def test__wire_bytes(self):
938
1028
        locations, block = self.make_block(self._texts)
939
1029
        manager = groupcompress._LazyGroupContentManager(block)
940
 
        self.add_key_to_manager(('key1',), locations, block, manager)
941
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1030
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1031
        self.add_key_to_manager((b'key4',), locations, block, manager)
942
1032
        block_bytes = block.to_bytes()
943
1033
        wire_bytes = manager._wire_bytes()
944
1034
        (storage_kind, z_header_len, header_len,
945
 
         block_len, rest) = wire_bytes.split('\n', 4)
 
1035
         block_len, rest) = wire_bytes.split(b'\n', 4)
946
1036
        z_header_len = int(z_header_len)
947
1037
        header_len = int(header_len)
948
1038
        block_len = int(block_len)
949
 
        self.assertEqual('groupcompress-block', storage_kind)
 
1039
        self.assertEqual(b'groupcompress-block', storage_kind)
950
1040
        self.assertEqual(34, z_header_len)
951
1041
        self.assertEqual(26, header_len)
952
1042
        self.assertEqual(len(block_bytes), block_len)
953
1043
        z_header = rest[:z_header_len]
954
1044
        header = zlib.decompress(z_header)
955
1045
        self.assertEqual(header_len, len(header))
956
 
        entry1 = locations[('key1',)]
957
 
        entry4 = locations[('key4',)]
958
 
        self.assertEqualDiff('key1\n'
959
 
                             '\n'  # no parents
960
 
                             '%d\n' # start offset
961
 
                             '%d\n' # end offset
962
 
                             'key4\n'
963
 
                             '\n'
964
 
                             '%d\n'
965
 
                             '%d\n'
 
1046
        entry1 = locations[(b'key1',)]
 
1047
        entry4 = locations[(b'key4',)]
 
1048
        self.assertEqualDiff(b'key1\n'
 
1049
                             b'\n'  # no parents
 
1050
                             b'%d\n'  # start offset
 
1051
                             b'%d\n'  # end offset
 
1052
                             b'key4\n'
 
1053
                             b'\n'
 
1054
                             b'%d\n'
 
1055
                             b'%d\n'
966
1056
                             % (entry1[0], entry1[1],
967
1057
                                entry4[0], entry4[1]),
968
 
                            header)
 
1058
                             header)
969
1059
        z_block = rest[z_header_len:]
970
1060
        self.assertEqual(block_bytes, z_block)
971
1061
 
972
1062
    def test_from_bytes(self):
973
1063
        locations, block = self.make_block(self._texts)
974
1064
        manager = groupcompress._LazyGroupContentManager(block)
975
 
        self.add_key_to_manager(('key1',), locations, block, manager)
976
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1065
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1066
        self.add_key_to_manager((b'key4',), locations, block, manager)
977
1067
        wire_bytes = manager._wire_bytes()
978
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
1068
        self.assertStartsWith(wire_bytes, b'groupcompress-block\n')
979
1069
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
980
1070
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
981
1071
        self.assertEqual(2, len(manager._factories))
985
1075
            result_order.append(record.key)
986
1076
            text = self._texts[record.key]
987
1077
            self.assertEqual(text, record.get_bytes_as('fulltext'))
988
 
        self.assertEqual([('key1',), ('key4',)], result_order)
 
1078
        self.assertEqual([(b'key1',), (b'key4',)], result_order)
989
1079
 
990
1080
    def test__check_rebuild_no_changes(self):
991
1081
        block, manager = self.make_block_and_full_manager(self._texts)
996
1086
        locations, block = self.make_block(self._texts)
997
1087
        manager = groupcompress._LazyGroupContentManager(block)
998
1088
        # Request just the first key, which should trigger a 'strip' action
999
 
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1089
        self.add_key_to_manager((b'key1',), locations, block, manager)
1000
1090
        manager._check_rebuild_block()
1001
1091
        self.assertIsNot(block, manager._block)
1002
1092
        self.assertTrue(block._content_length > manager._block._content_length)
1003
1093
        # We should be able to still get the content out of this block, though
1004
1094
        # it should only have 1 entry
1005
1095
        for record in manager.get_record_stream():
1006
 
            self.assertEqual(('key1',), record.key)
 
1096
            self.assertEqual((b'key1',), record.key)
1007
1097
            self.assertEqual(self._texts[record.key],
1008
1098
                             record.get_bytes_as('fulltext'))
1009
1099
 
1011
1101
        locations, block = self.make_block(self._texts)
1012
1102
        manager = groupcompress._LazyGroupContentManager(block)
1013
1103
        # Request a small key in the middle should trigger a 'rebuild'
1014
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1104
        self.add_key_to_manager((b'key4',), locations, block, manager)
1015
1105
        manager._check_rebuild_block()
1016
1106
        self.assertIsNot(block, manager._block)
1017
1107
        self.assertTrue(block._content_length > manager._block._content_length)
1018
1108
        for record in manager.get_record_stream():
1019
 
            self.assertEqual(('key4',), record.key)
 
1109
            self.assertEqual((b'key4',), record.key)
1020
1110
            self.assertEqual(self._texts[record.key],
1021
1111
                             record.get_bytes_as('fulltext'))
1022
1112
 
 
1113
    def test_manager_default_compressor_settings(self):
 
1114
        locations, old_block = self.make_block(self._texts)
 
1115
        manager = groupcompress._LazyGroupContentManager(old_block)
 
1116
        gcvf = groupcompress.GroupCompressVersionedFiles
 
1117
        # It doesn't greedily evaluate _max_bytes_to_index
 
1118
        self.assertIs(None, manager._compressor_settings)
 
1119
        self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
 
1120
                         manager._get_compressor_settings())
 
1121
 
 
1122
    def test_manager_custom_compressor_settings(self):
 
1123
        locations, old_block = self.make_block(self._texts)
 
1124
        called = []
 
1125
 
 
1126
        def compressor_settings():
 
1127
            called.append('called')
 
1128
            return (10,)
 
1129
        manager = groupcompress._LazyGroupContentManager(old_block,
 
1130
                                                         get_compressor_settings=compressor_settings)
 
1131
        gcvf = groupcompress.GroupCompressVersionedFiles
 
1132
        # It doesn't greedily evaluate compressor_settings
 
1133
        self.assertIs(None, manager._compressor_settings)
 
1134
        self.assertEqual((10,), manager._get_compressor_settings())
 
1135
        self.assertEqual((10,), manager._get_compressor_settings())
 
1136
        self.assertEqual((10,), manager._compressor_settings)
 
1137
        # Only called 1 time
 
1138
        self.assertEqual(['called'], called)
 
1139
 
 
1140
    def test__rebuild_handles_compressor_settings(self):
 
1141
        if not isinstance(groupcompress.GroupCompressor,
 
1142
                          groupcompress.PyrexGroupCompressor):
 
1143
            raise tests.TestNotApplicable('pure-python compressor'
 
1144
                                          ' does not handle compressor_settings')
 
1145
        locations, old_block = self.make_block(self._texts)
 
1146
        manager = groupcompress._LazyGroupContentManager(old_block,
 
1147
                                                         get_compressor_settings=lambda: dict(max_bytes_to_index=32))
 
1148
        gc = manager._make_group_compressor()
 
1149
        self.assertEqual(32, gc._delta_index._max_bytes_to_index)
 
1150
        self.add_key_to_manager((b'key3',), locations, old_block, manager)
 
1151
        self.add_key_to_manager((b'key4',), locations, old_block, manager)
 
1152
        action, last_byte, total_bytes = manager._check_rebuild_action()
 
1153
        self.assertEqual('rebuild', action)
 
1154
        manager._rebuild_block()
 
1155
        new_block = manager._block
 
1156
        self.assertIsNot(old_block, new_block)
 
1157
        # Because of the new max_bytes_to_index, we do a poor job of
 
1158
        # rebuilding. This is a side-effect of the change, but at least it does
 
1159
        # show the setting had an effect.
 
1160
        self.assertTrue(old_block._content_length < new_block._content_length)
 
1161
 
1023
1162
    def test_check_is_well_utilized_all_keys(self):
1024
1163
        block, manager = self.make_block_and_full_manager(self._texts)
1025
1164
        self.assertFalse(manager.check_is_well_utilized())
1036
1175
 
1037
1176
    def test_check_is_well_utilized_mixed_keys(self):
1038
1177
        texts = {}
1039
 
        f1k1 = ('f1', 'k1')
1040
 
        f1k2 = ('f1', 'k2')
1041
 
        f2k1 = ('f2', 'k1')
1042
 
        f2k2 = ('f2', 'k2')
1043
 
        texts[f1k1] = self._texts[('key1',)]
1044
 
        texts[f1k2] = self._texts[('key2',)]
1045
 
        texts[f2k1] = self._texts[('key3',)]
1046
 
        texts[f2k2] = self._texts[('key4',)]
 
1178
        f1k1 = (b'f1', b'k1')
 
1179
        f1k2 = (b'f1', b'k2')
 
1180
        f2k1 = (b'f2', b'k1')
 
1181
        f2k2 = (b'f2', b'k2')
 
1182
        texts[f1k1] = self._texts[(b'key1',)]
 
1183
        texts[f1k2] = self._texts[(b'key2',)]
 
1184
        texts[f2k1] = self._texts[(b'key3',)]
 
1185
        texts[f2k2] = self._texts[(b'key4',)]
1047
1186
        block, manager = self.make_block_and_full_manager(texts)
1048
1187
        self.assertFalse(manager.check_is_well_utilized())
1049
1188
        manager._full_enough_block_size = block._content_length
1057
1196
        locations, block = self.make_block(self._texts)
1058
1197
        manager = groupcompress._LazyGroupContentManager(block)
1059
1198
        manager._full_enough_block_size = block._content_length
1060
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1061
 
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1199
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1200
        self.add_key_to_manager((b'key2',), locations, block, manager)
1062
1201
        # Just using the content from key1 and 2 is not enough to be considered
1063
1202
        # 'complete'
1064
1203
        self.assertFalse(manager.check_is_well_utilized())
1065
1204
        # However if we add key3, then we have enough, as we only require 75%
1066
1205
        # consumption
1067
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1206
        self.add_key_to_manager((b'key4',), locations, block, manager)
1068
1207
        self.assertTrue(manager.check_is_well_utilized())
 
1208
 
 
1209
 
 
1210
class Test_GCBuildDetails(tests.TestCase):
 
1211
 
 
1212
    def test_acts_like_tuple(self):
 
1213
        # _GCBuildDetails inlines some of the data that used to be spread out
 
1214
        # across a bunch of tuples
 
1215
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1216
                                           ('INDEX', 10, 20, 0, 5))
 
1217
        self.assertEqual(4, len(bd))
 
1218
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
 
1219
        self.assertEqual(None, bd[1])  # Compression Parent is always None
 
1220
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
 
1221
        self.assertEqual(('group', None), bd[3])  # Record details
 
1222
 
 
1223
    def test__repr__(self):
 
1224
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1225
                                           ('INDEX', 10, 20, 0, 5))
 
1226
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
 
1227
                         " (('parent1',), ('parent2',)))",
 
1228
                         repr(bd))