/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/tests/test_groupcompress.py

  • Committer: Jelmer Vernooij
  • Date: 2018-07-08 14:45:27 UTC
  • mto: This revision was merged to the branch mainline in revision 7036.
  • Revision ID: jelmer@jelmer.uk-20180708144527-codhlvdcdg9y0nji
Fix a bunch of merge tests.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
 
1
# Copyright (C) 2008-2011 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
18
18
 
19
19
import zlib
20
20
 
21
 
from bzrlib import (
22
 
    btree_index,
23
 
    groupcompress,
 
21
from .. import (
 
22
    config,
24
23
    errors,
25
 
    index as _mod_index,
26
24
    osutils,
27
25
    tests,
28
26
    trace,
 
27
    )
 
28
from ..bzr import (
 
29
    btree_index,
 
30
    groupcompress,
 
31
    knit,
 
32
    index as _mod_index,
29
33
    versionedfile,
30
34
    )
31
 
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
33
 
 
34
 
 
35
 
def load_tests(standard_tests, module, loader):
36
 
    """Parameterize tests for all versions of groupcompress."""
37
 
    to_adapt, result = tests.split_suite_by_condition(
38
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
35
from ..osutils import sha_string
 
36
from .test__groupcompress import compiled_groupcompress_feature
 
37
from .scenarios import load_tests_apply_scenarios
 
38
 
 
39
 
 
40
def group_compress_implementation_scenarios():
39
41
    scenarios = [
40
42
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
43
        ]
42
44
    if compiled_groupcompress_feature.available():
43
45
        scenarios.append(('C',
44
46
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
 
    return tests.multiply_tests(to_adapt, scenarios, result)
 
47
    return scenarios
 
48
 
 
49
 
 
50
load_tests = load_tests_apply_scenarios
46
51
 
47
52
 
48
53
class TestGroupCompressor(tests.TestCase):
49
54
 
50
55
    def _chunks_to_repr_lines(self, chunks):
51
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
56
        return '\n'.join(map(repr, b''.join(chunks).split(b'\n')))
52
57
 
53
58
    def assertEqualDiffEncoded(self, expected, actual):
54
59
        """Compare the actual content to the expected content.
66
71
class TestAllGroupCompressors(TestGroupCompressor):
67
72
    """Tests for GroupCompressor"""
68
73
 
69
 
    compressor = None # Set by multiply_tests
 
74
    scenarios = group_compress_implementation_scenarios()
 
75
    compressor = None # Set by scenario
70
76
 
71
77
    def test_empty_delta(self):
72
78
        compressor = self.compressor()
76
82
        # diff against NUKK
77
83
        compressor = self.compressor()
78
84
        sha1, start_point, end_point, _ = compressor.compress(('label',),
79
 
            'strange\ncommon\n', None)
80
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
85
            b'strange\ncommon\n', None)
 
86
        self.assertEqual(sha_string(b'strange\ncommon\n'), sha1)
 
87
        expected_lines = b'f\x0fstrange\ncommon\n'
 
88
        self.assertEqual(expected_lines, b''.join(compressor.chunks))
83
89
        self.assertEqual(0, start_point)
84
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
90
        self.assertEqual(len(expected_lines), end_point)
85
91
 
86
92
    def test_empty_content(self):
87
93
        compressor = self.compressor()
88
94
        # Adding empty bytes should return the 'null' record
89
95
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
 
                                                                 '', None)
 
96
                                                                 b'', None)
91
97
        self.assertEqual(0, start_point)
92
98
        self.assertEqual(0, end_point)
93
99
        self.assertEqual('fulltext', kind)
95
101
        self.assertEqual(0, compressor.endpoint)
96
102
        self.assertEqual([], compressor.chunks)
97
103
        # Even after adding some content
98
 
        compressor.compress(('content',), 'some\nbytes\n', None)
 
104
        compressor.compress(('content',), b'some\nbytes\n', None)
99
105
        self.assertTrue(compressor.endpoint > 0)
100
106
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
 
                                                                 '', None)
 
107
                                                                 b'', None)
102
108
        self.assertEqual(0, start_point)
103
109
        self.assertEqual(0, end_point)
104
110
        self.assertEqual('fulltext', kind)
109
115
        # reading something that is in the compressor stream already.
110
116
        compressor = self.compressor()
111
117
        sha1_1, _, _, _ = compressor.compress(('label',),
112
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
118
            b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
119
        expected_lines = list(compressor.chunks)
114
120
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
121
            b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
122
        # get the first out
117
 
        self.assertEqual(('strange\ncommon long line\n'
118
 
                          'that needs a 16 byte match\n', sha1_1),
 
123
        self.assertEqual((b'strange\ncommon long line\n'
 
124
                          b'that needs a 16 byte match\n', sha1_1),
119
125
                         compressor.extract(('label',)))
120
126
        # and the second
121
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
 
                          'different\n', sha1_2),
 
127
        self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
 
128
                          b'different\n', sha1_2),
123
129
                         compressor.extract(('newlabel',)))
124
130
 
125
131
    def test_pop_last(self):
126
132
        compressor = self.compressor()
127
133
        _, _, _, _ = compressor.compress(('key1',),
128
 
            'some text\nfor the first entry\n', None)
 
134
            b'some text\nfor the first entry\n', None)
129
135
        expected_lines = list(compressor.chunks)
130
136
        _, _, _, _ = compressor.compress(('key2',),
131
 
            'some text\nfor the second entry\n', None)
 
137
            b'some text\nfor the second entry\n', None)
132
138
        compressor.pop_last()
133
139
        self.assertEqual(expected_lines, compressor.chunks)
134
140
 
141
147
    def test_stats(self):
142
148
        compressor = self.compressor()
143
149
        compressor.compress(('label',),
144
 
                            'strange\n'
145
 
                            'common very very long line\n'
146
 
                            'plus more text\n', None)
 
150
                            b'strange\n'
 
151
                            b'common very very long line\n'
 
152
                            b'plus more text\n', None)
147
153
        compressor.compress(('newlabel',),
148
 
                            'common very very long line\n'
149
 
                            'plus more text\n'
150
 
                            'different\n'
151
 
                            'moredifferent\n', None)
 
154
                            b'common very very long line\n'
 
155
                            b'plus more text\n'
 
156
                            b'different\n'
 
157
                            b'moredifferent\n', None)
152
158
        compressor.compress(('label3',),
153
 
                            'new\n'
154
 
                            'common very very long line\n'
155
 
                            'plus more text\n'
156
 
                            'different\n'
157
 
                            'moredifferent\n', None)
 
159
                            b'new\n'
 
160
                            b'common very very long line\n'
 
161
                            b'plus more text\n'
 
162
                            b'different\n'
 
163
                            b'moredifferent\n', None)
158
164
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
159
165
 
160
166
    def test_two_nosha_delta(self):
161
167
        compressor = self.compressor()
162
168
        sha1_1, _, _, _ = compressor.compress(('label',),
163
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
169
            b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
170
        expected_lines = list(compressor.chunks)
165
171
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
 
        self.assertEqual(sha_string('common long line\n'
168
 
                                    'that needs a 16 byte match\n'
169
 
                                    'different\n'), sha1_2)
 
172
            b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
173
        self.assertEqual(sha_string(b'common long line\n'
 
174
                                    b'that needs a 16 byte match\n'
 
175
                                    b'different\n'), sha1_2)
170
176
        expected_lines.extend([
171
177
            # 'delta', delta length
172
 
            'd\x0f',
 
178
            b'd\x0f',
173
179
            # source and target length
174
 
            '\x36',
 
180
            b'\x36',
175
181
            # copy the line common
176
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
182
            b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
183
            # add the line different, and the trailing newline
178
 
            '\x0adifferent\n', # insert 10 bytes
 
184
            b'\x0adifferent\n', # insert 10 bytes
179
185
            ])
180
186
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
187
        self.assertEqual(sum(map(len, expected_lines)), end_point)
185
191
        # both parents.
186
192
        compressor = self.compressor()
187
193
        sha1_1, _, _, _ = compressor.compress(('label',),
188
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
194
            b'strange\ncommon very very long line\nwith some extra text\n', None)
189
195
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
 
            'different\nmoredifferent\nand then some more\n', None)
 
196
            b'different\nmoredifferent\nand then some more\n', None)
191
197
        expected_lines = list(compressor.chunks)
192
198
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
 
            'new\ncommon very very long line\nwith some extra text\n'
194
 
            'different\nmoredifferent\nand then some more\n',
 
199
            b'new\ncommon very very long line\nwith some extra text\n'
 
200
            b'different\nmoredifferent\nand then some more\n',
195
201
            None)
196
202
        self.assertEqual(
197
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
198
 
                       'different\nmoredifferent\nand then some more\n'),
 
203
            sha_string(b'new\ncommon very very long line\nwith some extra text\n'
 
204
                       b'different\nmoredifferent\nand then some more\n'),
199
205
            sha1_3)
200
206
        expected_lines.extend([
201
207
            # 'delta', delta length
202
 
            'd\x0b',
 
208
            b'd\x0b',
203
209
            # source and target length
204
 
            '\x5f'
 
210
            b'\x5f'
205
211
            # insert new
206
 
            '\x03new',
 
212
            b'\x03new',
207
213
            # Copy of first parent 'common' range
208
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
214
            b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
215
            # Copy of second parent 'different' range
210
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
216
            b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
211
217
            ])
212
218
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
219
        self.assertEqual(sum(map(len, expected_lines)), end_point)
220
226
    def test_stats(self):
221
227
        compressor = self.compressor()
222
228
        compressor.compress(('label',),
223
 
                            'strange\n'
224
 
                            'common very very long line\n'
225
 
                            'plus more text\n', None)
 
229
                            b'strange\n'
 
230
                            b'common very very long line\n'
 
231
                            b'plus more text\n', None)
226
232
        compressor.compress(('newlabel',),
227
 
                            'common very very long line\n'
228
 
                            'plus more text\n'
229
 
                            'different\n'
230
 
                            'moredifferent\n', None)
 
233
                            b'common very very long line\n'
 
234
                            b'plus more text\n'
 
235
                            b'different\n'
 
236
                            b'moredifferent\n', None)
231
237
        compressor.compress(('label3',),
232
 
                            'new\n'
233
 
                            'common very very long line\n'
234
 
                            'plus more text\n'
235
 
                            'different\n'
236
 
                            'moredifferent\n', None)
 
238
                            b'new\n'
 
239
                            b'common very very long line\n'
 
240
                            b'plus more text\n'
 
241
                            b'different\n'
 
242
                            b'moredifferent\n', None)
237
243
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
238
244
 
239
245
    def test_two_nosha_delta(self):
240
246
        compressor = self.compressor()
241
247
        sha1_1, _, _, _ = compressor.compress(('label',),
242
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
248
            b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
249
        expected_lines = list(compressor.chunks)
244
250
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
 
        self.assertEqual(sha_string('common long line\n'
247
 
                                    'that needs a 16 byte match\n'
248
 
                                    'different\n'), sha1_2)
 
251
            b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
252
        self.assertEqual(sha_string(b'common long line\n'
 
253
                                    b'that needs a 16 byte match\n'
 
254
                                    b'different\n'), sha1_2)
249
255
        expected_lines.extend([
250
256
            # 'delta', delta length
251
 
            'd\x0f',
 
257
            b'd\x0f',
252
258
            # target length
253
 
            '\x36',
 
259
            b'\x36',
254
260
            # copy the line common
255
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
261
            b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
262
            # add the line different, and the trailing newline
257
 
            '\x0adifferent\n', # insert 10 bytes
 
263
            b'\x0adifferent\n', # insert 10 bytes
258
264
            ])
259
265
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
266
        self.assertEqual(sum(map(len, expected_lines)), end_point)
264
270
        # both parents.
265
271
        compressor = self.compressor()
266
272
        sha1_1, _, _, _ = compressor.compress(('label',),
267
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
273
            b'strange\ncommon very very long line\nwith some extra text\n', None)
268
274
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
 
            'different\nmoredifferent\nand then some more\n', None)
 
275
            b'different\nmoredifferent\nand then some more\n', None)
270
276
        expected_lines = list(compressor.chunks)
271
277
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
 
            'new\ncommon very very long line\nwith some extra text\n'
273
 
            'different\nmoredifferent\nand then some more\n',
 
278
            b'new\ncommon very very long line\nwith some extra text\n'
 
279
            b'different\nmoredifferent\nand then some more\n',
274
280
            None)
275
281
        self.assertEqual(
276
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
277
 
                       'different\nmoredifferent\nand then some more\n'),
 
282
            sha_string(b'new\ncommon very very long line\nwith some extra text\n'
 
283
                       b'different\nmoredifferent\nand then some more\n'),
278
284
            sha1_3)
279
285
        expected_lines.extend([
280
286
            # 'delta', delta length
281
 
            'd\x0c',
 
287
            b'd\x0c',
282
288
            # target length
283
 
            '\x5f'
 
289
            b'\x5f'
284
290
            # insert new
285
 
            '\x04new\n',
 
291
            b'\x04new\n',
286
292
            # Copy of first parent 'common' range
287
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
293
            b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
294
            # Copy of second parent 'different' range
289
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
295
            b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
290
296
            ])
291
297
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
298
        self.assertEqual(sum(map(len, expected_lines)), end_point)
301
307
        for key in sorted(key_to_text):
302
308
            compressor.compress(key, key_to_text[key], None)
303
309
        locs = dict((key, (start, end)) for key, (start, _, end, _)
304
 
                    in compressor.labels_deltas.iteritems())
 
310
                    in compressor.labels_deltas.items())
305
311
        block = compressor.flush()
306
312
        raw_bytes = block.to_bytes()
307
313
        # Go through from_bytes(to_bytes()) so that we start with a compressed
310
316
 
311
317
    def test_from_empty_bytes(self):
312
318
        self.assertRaises(ValueError,
313
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
319
                          groupcompress.GroupCompressBlock.from_bytes, b'')
314
320
 
315
321
    def test_from_minimal_bytes(self):
316
322
        block = groupcompress.GroupCompressBlock.from_bytes(
317
 
            'gcb1z\n0\n0\n')
 
323
            b'gcb1z\n0\n0\n')
318
324
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
325
        self.assertIs(None, block._content)
320
 
        self.assertEqual('', block._z_content)
 
326
        self.assertEqual(b'', block._z_content)
321
327
        block._ensure_content()
322
 
        self.assertEqual('', block._content)
323
 
        self.assertEqual('', block._z_content)
 
328
        self.assertEqual(b'', block._content)
 
329
        self.assertEqual(b'', block._z_content)
324
330
        block._ensure_content() # Ensure content is safe to call 2x
325
331
 
326
332
    def test_from_invalid(self):
327
333
        self.assertRaises(ValueError,
328
334
                          groupcompress.GroupCompressBlock.from_bytes,
329
 
                          'this is not a valid header')
 
335
                          b'this is not a valid header')
330
336
 
331
337
    def test_from_bytes(self):
332
 
        content = ('a tiny bit of content\n')
 
338
        content = (b'a tiny bit of content\n')
333
339
        z_content = zlib.compress(content)
334
340
        z_bytes = (
335
 
            'gcb1z\n' # group compress block v1 plain
336
 
            '%d\n' # Length of compressed content
337
 
            '%d\n' # Length of uncompressed content
338
 
            '%s'   # Compressed content
 
341
            b'gcb1z\n' # group compress block v1 plain
 
342
            b'%d\n' # Length of compressed content
 
343
            b'%d\n' # Length of uncompressed content
 
344
            b'%s'   # Compressed content
339
345
            ) % (len(z_content), len(content), z_content)
340
346
        block = groupcompress.GroupCompressBlock.from_bytes(
341
347
            z_bytes)
347
353
        self.assertEqual(z_content, block._z_content)
348
354
        self.assertEqual(content, block._content)
349
355
 
 
356
    def test_to_chunks(self):
 
357
        content_chunks = [b'this is some content\n',
 
358
                          b'this content will be compressed\n']
 
359
        content_len = sum(map(len, content_chunks))
 
360
        content = b''.join(content_chunks)
 
361
        gcb = groupcompress.GroupCompressBlock()
 
362
        gcb.set_chunked_content(content_chunks, content_len)
 
363
        total_len, block_chunks = gcb.to_chunks()
 
364
        block_bytes = b''.join(block_chunks)
 
365
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
366
        self.assertEqual(total_len, len(block_bytes))
 
367
        self.assertEqual(gcb._content_length, content_len)
 
368
        expected_header =(b'gcb1z\n' # group compress block v1 zlib
 
369
                          b'%d\n' # Length of compressed content
 
370
                          b'%d\n' # Length of uncompressed content
 
371
                         ) % (gcb._z_content_length, gcb._content_length)
 
372
        # The first chunk should be the header chunk. It is small, fixed size,
 
373
        # and there is no compelling reason to split it up
 
374
        self.assertEqual(expected_header, block_chunks[0])
 
375
        self.assertStartsWith(block_bytes, expected_header)
 
376
        remaining_bytes = block_bytes[len(expected_header):]
 
377
        raw_bytes = zlib.decompress(remaining_bytes)
 
378
        self.assertEqual(content, raw_bytes)
 
379
 
350
380
    def test_to_bytes(self):
351
 
        content = ('this is some content\n'
352
 
                   'this content will be compressed\n')
 
381
        content = (b'this is some content\n'
 
382
                   b'this content will be compressed\n')
353
383
        gcb = groupcompress.GroupCompressBlock()
354
384
        gcb.set_content(content)
355
 
        bytes = gcb.to_bytes()
 
385
        data = gcb.to_bytes()
356
386
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
387
        self.assertEqual(gcb._content_length, len(content))
358
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
359
 
                          '%d\n' # Length of compressed content
360
 
                          '%d\n' # Length of uncompressed content
 
388
        expected_header =(b'gcb1z\n' # group compress block v1 zlib
 
389
                          b'%d\n' # Length of compressed content
 
390
                          b'%d\n' # Length of uncompressed content
361
391
                         ) % (gcb._z_content_length, gcb._content_length)
362
 
        self.assertStartsWith(bytes, expected_header)
363
 
        remaining_bytes = bytes[len(expected_header):]
 
392
        self.assertStartsWith(data, expected_header)
 
393
        remaining_bytes = data[len(expected_header):]
364
394
        raw_bytes = zlib.decompress(remaining_bytes)
365
395
        self.assertEqual(content, raw_bytes)
366
396
 
367
397
        # we should get the same results if using the chunked version
368
398
        gcb = groupcompress.GroupCompressBlock()
369
 
        gcb.set_chunked_content(['this is some content\n'
370
 
                                 'this content will be compressed\n'],
 
399
        gcb.set_chunked_content([b'this is some content\n'
 
400
                                 b'this content will be compressed\n'],
371
401
                                 len(content))
372
 
        old_bytes = bytes
373
 
        bytes = gcb.to_bytes()
374
 
        self.assertEqual(old_bytes, bytes)
 
402
        old_data = data
 
403
        data = gcb.to_bytes()
 
404
        self.assertEqual(old_data, data)
375
405
 
376
406
    def test_partial_decomp(self):
377
407
        content_chunks = []
379
409
        # partial decompression to work with. Most auto-generated data
380
410
        # compresses a bit too well, we want a combination, so we combine a sha
381
411
        # hash with compressible data.
382
 
        for i in xrange(2048):
383
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
412
        for i in range(2048):
 
413
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
384
414
            content_chunks.append(next_content)
385
415
            next_sha1 = osutils.sha_string(next_content)
386
 
            content_chunks.append(next_sha1 + '\n')
387
 
        content = ''.join(content_chunks)
 
416
            content_chunks.append(next_sha1 + b'\n')
 
417
        content = b''.join(content_chunks)
388
418
        self.assertEqual(158634, len(content))
389
419
        z_content = zlib.compress(content)
390
420
        self.assertEqual(57182, len(z_content))
391
421
        block = groupcompress.GroupCompressBlock()
392
 
        block._z_content = z_content
 
422
        block._z_content_chunks = (z_content,)
393
423
        block._z_content_length = len(z_content)
394
424
        block._compressor_name = 'zlib'
395
425
        block._content_length = 158634
424
454
        # partial decompression to work with. Most auto-generated data
425
455
        # compresses a bit too well, we want a combination, so we combine a sha
426
456
        # hash with compressible data.
427
 
        for i in xrange(2048):
428
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
457
        for i in range(2048):
 
458
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
429
459
            content_chunks.append(next_content)
430
460
            next_sha1 = osutils.sha_string(next_content)
431
 
            content_chunks.append(next_sha1 + '\n')
432
 
        content = ''.join(content_chunks)
 
461
            content_chunks.append(next_sha1 + b'\n')
 
462
        content = b''.join(content_chunks)
433
463
        self.assertEqual(158634, len(content))
434
464
        z_content = zlib.compress(content)
435
465
        self.assertEqual(57182, len(z_content))
436
466
        block = groupcompress.GroupCompressBlock()
437
 
        block._z_content = z_content
 
467
        block._z_content_chunks = (z_content,)
438
468
        block._z_content_length = len(z_content)
439
469
        block._compressor_name = 'zlib'
440
470
        block._content_length = 158634
447
477
        self.assertIs(None, block._z_content_decompressor)
448
478
 
449
479
    def test__dump(self):
450
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
451
 
        key_to_text = {('1',): dup_content + '1 unique\n',
452
 
                       ('2',): dup_content + '2 extra special\n'}
 
480
        dup_content = b'some duplicate content\nwhich is sufficiently long\n'
 
481
        key_to_text = {(b'1',): dup_content + b'1 unique\n',
 
482
                       (b'2',): dup_content + b'2 extra special\n'}
453
483
        locs, block = self.make_block(key_to_text)
454
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
455
 
                          ('d', 21, len(key_to_text[('2',)]),
456
 
                           [('c', 2, len(dup_content)),
457
 
                            ('i', len('2 extra special\n'), '')
 
484
        self.assertEqual([(b'f', len(key_to_text[(b'1',)])),
 
485
                          (b'd', 21, len(key_to_text[(b'2',)]),
 
486
                           [(b'c', 2, len(dup_content)),
 
487
                            (b'i', len(b'2 extra special\n'), b'')
458
488
                           ]),
459
489
                         ], block._dump())
460
490
 
486
516
        return btree_index.BTreeGraphIndex(trans, name, size)
487
517
 
488
518
    def make_g_index_missing_parent(self):
489
 
        graph_index = self.make_g_index('missing_parent', 1,
490
 
            [(('parent', ), '2 78 2 10', ([],)),
491
 
             (('tip', ), '2 78 2 10',
492
 
              ([('parent', ), ('missing-parent', )],)),
 
519
        graph_index = self.make_g_index(b'missing_parent', 1,
 
520
            [((b'parent', ), b'2 78 2 10', ([],)),
 
521
             ((b'tip', ), b'2 78 2 10',
 
522
              ([(b'parent', ), (b'missing-parent', )],)),
493
523
              ])
494
524
        return graph_index
495
525
 
497
527
        # Consider promoting 'as-requested' to general availability, and
498
528
        # make this a VF interface test
499
529
        vf = self.make_test_vf(False, dir='source')
500
 
        vf.add_lines(('a',), (), ['lines\n'])
501
 
        vf.add_lines(('b',), (), ['lines\n'])
502
 
        vf.add_lines(('c',), (), ['lines\n'])
503
 
        vf.add_lines(('d',), (), ['lines\n'])
 
530
        vf.add_lines((b'a',), (), [b'lines\n'])
 
531
        vf.add_lines((b'b',), (), [b'lines\n'])
 
532
        vf.add_lines((b'c',), (), [b'lines\n'])
 
533
        vf.add_lines((b'd',), (), [b'lines\n'])
504
534
        vf.writer.end()
505
535
        keys = [record.key for record in vf.get_record_stream(
506
 
                    [('a',), ('b',), ('c',), ('d',)],
 
536
                    [(b'a',), (b'b',), (b'c',), (b'd',)],
507
537
                    'as-requested', False)]
508
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
538
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
509
539
        keys = [record.key for record in vf.get_record_stream(
510
 
                    [('b',), ('a',), ('d',), ('c',)],
 
540
                    [(b'b',), (b'a',), (b'd',), (b'c',)],
511
541
                    'as-requested', False)]
512
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
542
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
513
543
 
514
544
        # It should work even after being repacked into another VF
515
545
        vf2 = self.make_test_vf(False, dir='target')
516
546
        vf2.insert_record_stream(vf.get_record_stream(
517
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
547
                    [(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
518
548
        vf2.writer.end()
519
549
 
520
550
        keys = [record.key for record in vf2.get_record_stream(
521
 
                    [('a',), ('b',), ('c',), ('d',)],
 
551
                    [(b'a',), (b'b',), (b'c',), (b'd',)],
522
552
                    'as-requested', False)]
523
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
553
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
524
554
        keys = [record.key for record in vf2.get_record_stream(
525
 
                    [('b',), ('a',), ('d',), ('c',)],
 
555
                    [(b'b',), (b'a',), (b'd',), (b'c',)],
526
556
                    'as-requested', False)]
527
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
557
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
 
558
 
 
559
    def test_get_record_stream_max_bytes_to_index_default(self):
 
560
        vf = self.make_test_vf(True, dir='source')
 
561
        vf.add_lines((b'a',), (), [b'lines\n'])
 
562
        vf.writer.end()
 
563
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
 
564
        self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
 
565
                         record._manager._get_compressor_settings())
 
566
 
 
567
    def test_get_record_stream_accesses_compressor_settings(self):
 
568
        vf = self.make_test_vf(True, dir='source')
 
569
        vf.add_lines((b'a',), (), [b'lines\n'])
 
570
        vf.writer.end()
 
571
        vf._max_bytes_to_index = 1234
 
572
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
 
573
        self.assertEqual(dict(max_bytes_to_index=1234),
 
574
                         record._manager._get_compressor_settings())
 
575
 
 
576
    @staticmethod
 
577
    def grouped_stream(revision_ids, first_parents=()):
 
578
        parents = first_parents
 
579
        for revision_id in revision_ids:
 
580
            key = (revision_id,)
 
581
            record = versionedfile.FulltextContentFactory(
 
582
                key, parents, None,
 
583
                b'some content that is\n'
 
584
                b'identical except for\n'
 
585
                b'revision_id:%s\n' % (revision_id,))
 
586
            yield record
 
587
            parents = (key,)
528
588
 
529
589
    def test_insert_record_stream_reuses_blocks(self):
530
590
        vf = self.make_test_vf(True, dir='source')
531
 
        def grouped_stream(revision_ids, first_parents=()):
532
 
            parents = first_parents
533
 
            for revision_id in revision_ids:
534
 
                key = (revision_id,)
535
 
                record = versionedfile.FulltextContentFactory(
536
 
                    key, parents, None,
537
 
                    'some content that is\n'
538
 
                    'identical except for\n'
539
 
                    'revision_id:%s\n' % (revision_id,))
540
 
                yield record
541
 
                parents = (key,)
542
591
        # One group, a-d
543
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
592
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
544
593
        # Second group, e-h
545
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
546
 
                                               first_parents=(('d',),)))
 
594
        vf.insert_record_stream(self.grouped_stream(
 
595
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
547
596
        block_bytes = {}
548
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
549
 
                                      'unordered', False)
 
597
        stream = vf.get_record_stream(
 
598
            [(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
550
599
        num_records = 0
551
600
        for record in stream:
552
 
            if record.key in [('a',), ('e',)]:
 
601
            if record.key in [(b'a',), (b'e',)]:
553
602
                self.assertEqual('groupcompress-block', record.storage_kind)
554
603
            else:
555
604
                self.assertEqual('groupcompress-block-ref',
558
607
            num_records += 1
559
608
        self.assertEqual(8, num_records)
560
609
        for r in 'abcd':
561
 
            key = (r,)
562
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
563
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
610
            key = (r.encode(),)
 
611
            self.assertIs(block_bytes[key], block_bytes[(b'a',)])
 
612
            self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
564
613
        for r in 'efgh':
565
 
            key = (r,)
566
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
567
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
614
            key = (r.encode(),)
 
615
            self.assertIs(block_bytes[key], block_bytes[(b'e',)])
 
616
            self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
568
617
        # Now copy the blocks into another vf, and ensure that the blocks are
569
618
        # preserved without creating new entries
570
619
        vf2 = self.make_test_vf(True, dir='target')
 
620
        keys = [(r.encode(),) for r in 'abcdefgh']
571
621
        # ordering in 'groupcompress' order, should actually swap the groups in
572
622
        # the target vf, but the groups themselves should not be disturbed.
573
623
        def small_size_stream():
574
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
 
                                               'groupcompress', False):
 
624
            for record in vf.get_record_stream(keys, 'groupcompress', False):
576
625
                record._manager._full_enough_block_size = \
577
626
                    record._manager._block._content_length
578
627
                yield record
579
 
                        
 
628
 
580
629
        vf2.insert_record_stream(small_size_stream())
581
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
 
                                       'groupcompress', False)
 
630
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
583
631
        vf2.writer.end()
584
632
        num_records = 0
585
633
        for record in stream:
590
638
 
591
639
    def test_insert_record_stream_packs_on_the_fly(self):
592
640
        vf = self.make_test_vf(True, dir='source')
593
 
        def grouped_stream(revision_ids, first_parents=()):
594
 
            parents = first_parents
595
 
            for revision_id in revision_ids:
596
 
                key = (revision_id,)
597
 
                record = versionedfile.FulltextContentFactory(
598
 
                    key, parents, None,
599
 
                    'some content that is\n'
600
 
                    'identical except for\n'
601
 
                    'revision_id:%s\n' % (revision_id,))
602
 
                yield record
603
 
                parents = (key,)
604
641
        # One group, a-d
605
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
642
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
606
643
        # Second group, e-h
607
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
 
                                               first_parents=(('d',),)))
 
644
        vf.insert_record_stream(self.grouped_stream(
 
645
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
609
646
        # Now copy the blocks into another vf, and see that the
610
647
        # insert_record_stream rebuilt a new block on-the-fly because of
611
648
        # under-utilization
612
649
        vf2 = self.make_test_vf(True, dir='target')
 
650
        keys = [(r.encode(),) for r in 'abcdefgh']
613
651
        vf2.insert_record_stream(vf.get_record_stream(
614
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
 
                                       'groupcompress', False)
 
652
            keys, 'groupcompress', False))
 
653
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
617
654
        vf2.writer.end()
618
655
        num_records = 0
619
656
        # All of the records should be recombined into a single block
628
665
 
629
666
    def test__insert_record_stream_no_reuse_block(self):
630
667
        vf = self.make_test_vf(True, dir='source')
631
 
        def grouped_stream(revision_ids, first_parents=()):
632
 
            parents = first_parents
633
 
            for revision_id in revision_ids:
634
 
                key = (revision_id,)
635
 
                record = versionedfile.FulltextContentFactory(
636
 
                    key, parents, None,
637
 
                    'some content that is\n'
638
 
                    'identical except for\n'
639
 
                    'revision_id:%s\n' % (revision_id,))
640
 
                yield record
641
 
                parents = (key,)
642
668
        # One group, a-d
643
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
669
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
644
670
        # Second group, e-h
645
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
646
 
                                               first_parents=(('d',),)))
 
671
        vf.insert_record_stream(self.grouped_stream(
 
672
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
647
673
        vf.writer.end()
648
 
        self.assertEqual(8, len(list(vf.get_record_stream(
649
 
                                        [(r,) for r in 'abcdefgh'],
650
 
                                        'unordered', False))))
 
674
        keys = [(r.encode(),) for r in 'abcdefgh']
 
675
        self.assertEqual(8, len(list(
 
676
            vf.get_record_stream(keys, 'unordered', False))))
651
677
        # Now copy the blocks into another vf, and ensure that the blocks are
652
678
        # preserved without creating new entries
653
679
        vf2 = self.make_test_vf(True, dir='target')
654
680
        # ordering in 'groupcompress' order, should actually swap the groups in
655
681
        # the target vf, but the groups themselves should not be disturbed.
656
682
        list(vf2._insert_record_stream(vf.get_record_stream(
657
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
683
            keys, 'groupcompress', False),
658
684
            reuse_blocks=False))
659
685
        vf2.writer.end()
660
686
        # After inserting with reuse_blocks=False, we should have everything in
661
687
        # a single new block.
662
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
 
                                       'groupcompress', False)
 
688
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
664
689
        block = None
665
690
        for record in stream:
666
691
            if block is None:
676
701
            track_external_parent_refs=True)
677
702
        index.scan_unvalidated_index(unvalidated)
678
703
        self.assertEqual(
679
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
704
            frozenset([(b'missing-parent',)]), index.get_missing_parents())
680
705
 
681
706
    def test_track_external_parent_refs(self):
682
707
        g_index = self.make_g_index('empty', 1, [])
687
712
            add_callback=mod_index.add_nodes,
688
713
            track_external_parent_refs=True)
689
714
        index.add_records([
690
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
715
            ((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
691
716
        self.assertEqual(
692
 
            frozenset([('parent-1',), ('parent-2',)]),
 
717
            frozenset([(b'parent-1',), (b'parent-2',)]),
693
718
            index.get_missing_parents())
694
719
 
695
720
    def make_source_with_b(self, a_parent, path):
696
721
        source = self.make_test_vf(True, dir=path)
697
 
        source.add_lines(('a',), (), ['lines\n'])
 
722
        source.add_lines((b'a',), (), [b'lines\n'])
698
723
        if a_parent:
699
 
            b_parents = (('a',),)
 
724
            b_parents = ((b'a',),)
700
725
        else:
701
726
            b_parents = ()
702
 
        source.add_lines(('b',), b_parents, ['lines\n'])
 
727
        source.add_lines((b'b',), b_parents, [b'lines\n'])
703
728
        return source
704
729
 
705
730
    def do_inconsistent_inserts(self, inconsistency_fatal):
708
733
        for x in range(2):
709
734
            source = self.make_source_with_b(x==1, 'source%s' % x)
710
735
            target.insert_record_stream(source.get_record_stream(
711
 
                [('b',)], 'unordered', False))
 
736
                [(b'b',)], 'unordered', False))
712
737
 
713
738
    def test_inconsistent_redundant_inserts_warn(self):
714
739
        """Should not insert a record that is already present."""
721
746
            self.do_inconsistent_inserts(inconsistency_fatal=False)
722
747
        finally:
723
748
            trace.warning = _trace_warning
724
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
726
 
                         warnings)
 
749
        self.assertContainsRe(
 
750
            "\n".join(warnings),
 
751
            r"^inconsistent details in skipped record: \(b?'b',\)"
 
752
            r" \(b?'42 32 0 8', \(\(\),\)\)"
 
753
            r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
727
754
 
728
755
    def test_inconsistent_redundant_inserts_raises(self):
729
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
756
        e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
730
757
                              inconsistency_fatal=True)
731
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
732
 
                              " in add_records:"
733
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
 
                              " 0 8', \(\(\('a',\),\),\)\)")
 
758
        self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
 
759
                              r" in add_records:"
 
760
                              r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
 
761
                              r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
735
762
 
736
763
    def test_clear_cache(self):
737
764
        vf = self.make_source_with_b(True, 'source')
738
765
        vf.writer.end()
739
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
 
766
        for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
740
767
                                           True):
741
768
            pass
742
769
        self.assertTrue(len(vf._group_cache) > 0)
744
771
        self.assertEqual(0, len(vf._group_cache))
745
772
 
746
773
 
 
774
class TestGroupCompressConfig(tests.TestCaseWithTransport):
 
775
 
 
776
    def make_test_vf(self):
 
777
        t = self.get_transport('.')
 
778
        t.ensure_base()
 
779
        factory = groupcompress.make_pack_factory(graph=True,
 
780
            delta=False, keylength=1, inconsistency_fatal=True)
 
781
        vf = factory(t)
 
782
        self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
783
        return vf
 
784
 
 
785
    def test_max_bytes_to_index_default(self):
 
786
        vf = self.make_test_vf()
 
787
        gc = vf._make_group_compressor()
 
788
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
789
                         vf._max_bytes_to_index)
 
790
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
791
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
792
                             gc._delta_index._max_bytes_to_index)
 
793
 
 
794
    def test_max_bytes_to_index_in_config(self):
 
795
        c = config.GlobalConfig()
 
796
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
 
797
        vf = self.make_test_vf()
 
798
        gc = vf._make_group_compressor()
 
799
        self.assertEqual(10000, vf._max_bytes_to_index)
 
800
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
801
            self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
 
802
 
 
803
    def test_max_bytes_to_index_bad_config(self):
 
804
        c = config.GlobalConfig()
 
805
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
 
806
        vf = self.make_test_vf()
 
807
        # TODO: This is triggering a warning, we might want to trap and make
 
808
        #       sure it is readable.
 
809
        gc = vf._make_group_compressor()
 
810
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
811
                         vf._max_bytes_to_index)
 
812
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
813
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
814
                             gc._delta_index._max_bytes_to_index)
 
815
 
747
816
 
748
817
class StubGCVF(object):
749
818
    def __init__(self, canned_get_blocks=None):
819
888
                (read_memo1, groupcompress.GroupCompressBlock()),
820
889
                (read_memo2, groupcompress.GroupCompressBlock())])
821
890
        locations = {
822
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
823
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
891
            ('key1',): (read_memo1 + (0, 0), None, None, None),
 
892
            ('key2',): (read_memo2 + (0, 0), None, None, None)}
824
893
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
825
894
        batcher.add_key(('key1',))
826
895
        batcher.add_key(('key2',))
840
909
        gcvf = StubGCVF()
841
910
        gcvf._group_cache[read_memo] = fake_block
842
911
        locations = {
843
 
            ('key',): (read_memo + (None, None), None, None, None)}
 
912
            ('key',): (read_memo + (0, 0), None, None, None)}
844
913
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
845
914
        batcher.add_key(('key',))
846
915
        self.assertEqual([], list(batcher.yield_factories()))
853
922
class TestLazyGroupCompress(tests.TestCaseWithTransport):
854
923
 
855
924
    _texts = {
856
 
        ('key1',): "this is a text\n"
857
 
                   "with a reasonable amount of compressible bytes\n"
858
 
                   "which can be shared between various other texts\n",
859
 
        ('key2',): "another text\n"
860
 
                   "with a reasonable amount of compressible bytes\n"
861
 
                   "which can be shared between various other texts\n",
862
 
        ('key3',): "yet another text which won't be extracted\n"
863
 
                   "with a reasonable amount of compressible bytes\n"
864
 
                   "which can be shared between various other texts\n",
865
 
        ('key4',): "this will be extracted\n"
866
 
                   "but references most of its bytes from\n"
867
 
                   "yet another text which won't be extracted\n"
868
 
                   "with a reasonable amount of compressible bytes\n"
869
 
                   "which can be shared between various other texts\n",
 
925
        (b'key1',): b"this is a text\n"
 
926
                   b"with a reasonable amount of compressible bytes\n"
 
927
                   b"which can be shared between various other texts\n",
 
928
        (b'key2',): b"another text\n"
 
929
                   b"with a reasonable amount of compressible bytes\n"
 
930
                   b"which can be shared between various other texts\n",
 
931
        (b'key3',): b"yet another text which won't be extracted\n"
 
932
                   b"with a reasonable amount of compressible bytes\n"
 
933
                   b"which can be shared between various other texts\n",
 
934
        (b'key4',): b"this will be extracted\n"
 
935
                   b"but references most of its bytes from\n"
 
936
                   b"yet another text which won't be extracted\n"
 
937
                   b"with a reasonable amount of compressible bytes\n"
 
938
                   b"which can be shared between various other texts\n",
870
939
    }
871
940
    def make_block(self, key_to_text):
872
941
        """Create a GroupCompressBlock, filling it with the given texts."""
875
944
        for key in sorted(key_to_text):
876
945
            compressor.compress(key, key_to_text[key], None)
877
946
        locs = dict((key, (start, end)) for key, (start, _, end, _)
878
 
                    in compressor.labels_deltas.iteritems())
 
947
                    in compressor.labels_deltas.items())
879
948
        block = compressor.flush()
880
949
        raw_bytes = block.to_bytes()
881
950
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
894
963
    def test_get_fulltexts(self):
895
964
        locations, block = self.make_block(self._texts)
896
965
        manager = groupcompress._LazyGroupContentManager(block)
897
 
        self.add_key_to_manager(('key1',), locations, block, manager)
898
 
        self.add_key_to_manager(('key2',), locations, block, manager)
 
966
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
967
        self.add_key_to_manager((b'key2',), locations, block, manager)
899
968
        result_order = []
900
969
        for record in manager.get_record_stream():
901
970
            result_order.append(record.key)
902
971
            text = self._texts[record.key]
903
972
            self.assertEqual(text, record.get_bytes_as('fulltext'))
904
 
        self.assertEqual([('key1',), ('key2',)], result_order)
 
973
        self.assertEqual([(b'key1',), (b'key2',)], result_order)
905
974
 
906
975
        # If we build the manager in the opposite order, we should get them
907
976
        # back in the opposite order
908
977
        manager = groupcompress._LazyGroupContentManager(block)
909
 
        self.add_key_to_manager(('key2',), locations, block, manager)
910
 
        self.add_key_to_manager(('key1',), locations, block, manager)
 
978
        self.add_key_to_manager((b'key2',), locations, block, manager)
 
979
        self.add_key_to_manager((b'key1',), locations, block, manager)
911
980
        result_order = []
912
981
        for record in manager.get_record_stream():
913
982
            result_order.append(record.key)
914
983
            text = self._texts[record.key]
915
984
            self.assertEqual(text, record.get_bytes_as('fulltext'))
916
 
        self.assertEqual([('key2',), ('key1',)], result_order)
 
985
        self.assertEqual([(b'key2',), (b'key1',)], result_order)
917
986
 
918
987
    def test__wire_bytes_no_keys(self):
919
988
        locations, block = self.make_block(self._texts)
923
992
        # We should have triggered a strip, since we aren't using any content
924
993
        stripped_block = manager._block.to_bytes()
925
994
        self.assertTrue(block_length > len(stripped_block))
926
 
        empty_z_header = zlib.compress('')
927
 
        self.assertEqual('groupcompress-block\n'
928
 
                         '8\n' # len(compress(''))
929
 
                         '0\n' # len('')
930
 
                         '%d\n'# compressed block len
931
 
                         '%s'  # zheader
932
 
                         '%s'  # block
 
995
        empty_z_header = zlib.compress(b'')
 
996
        self.assertEqual(b'groupcompress-block\n'
 
997
                         b'8\n' # len(compress(''))
 
998
                         b'0\n' # len('')
 
999
                         b'%d\n'# compressed block len
 
1000
                         b'%s'  # zheader
 
1001
                         b'%s'  # block
933
1002
                         % (len(stripped_block), empty_z_header,
934
1003
                            stripped_block),
935
1004
                         wire_bytes)
937
1006
    def test__wire_bytes(self):
938
1007
        locations, block = self.make_block(self._texts)
939
1008
        manager = groupcompress._LazyGroupContentManager(block)
940
 
        self.add_key_to_manager(('key1',), locations, block, manager)
941
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1009
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1010
        self.add_key_to_manager((b'key4',), locations, block, manager)
942
1011
        block_bytes = block.to_bytes()
943
1012
        wire_bytes = manager._wire_bytes()
944
1013
        (storage_kind, z_header_len, header_len,
945
 
         block_len, rest) = wire_bytes.split('\n', 4)
 
1014
         block_len, rest) = wire_bytes.split(b'\n', 4)
946
1015
        z_header_len = int(z_header_len)
947
1016
        header_len = int(header_len)
948
1017
        block_len = int(block_len)
949
 
        self.assertEqual('groupcompress-block', storage_kind)
 
1018
        self.assertEqual(b'groupcompress-block', storage_kind)
950
1019
        self.assertEqual(34, z_header_len)
951
1020
        self.assertEqual(26, header_len)
952
1021
        self.assertEqual(len(block_bytes), block_len)
953
1022
        z_header = rest[:z_header_len]
954
1023
        header = zlib.decompress(z_header)
955
1024
        self.assertEqual(header_len, len(header))
956
 
        entry1 = locations[('key1',)]
957
 
        entry4 = locations[('key4',)]
958
 
        self.assertEqualDiff('key1\n'
959
 
                             '\n'  # no parents
960
 
                             '%d\n' # start offset
961
 
                             '%d\n' # end offset
962
 
                             'key4\n'
963
 
                             '\n'
964
 
                             '%d\n'
965
 
                             '%d\n'
 
1025
        entry1 = locations[(b'key1',)]
 
1026
        entry4 = locations[(b'key4',)]
 
1027
        self.assertEqualDiff(b'key1\n'
 
1028
                             b'\n'  # no parents
 
1029
                             b'%d\n' # start offset
 
1030
                             b'%d\n' # end offset
 
1031
                             b'key4\n'
 
1032
                             b'\n'
 
1033
                             b'%d\n'
 
1034
                             b'%d\n'
966
1035
                             % (entry1[0], entry1[1],
967
1036
                                entry4[0], entry4[1]),
968
1037
                            header)
972
1041
    def test_from_bytes(self):
973
1042
        locations, block = self.make_block(self._texts)
974
1043
        manager = groupcompress._LazyGroupContentManager(block)
975
 
        self.add_key_to_manager(('key1',), locations, block, manager)
976
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1044
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1045
        self.add_key_to_manager((b'key4',), locations, block, manager)
977
1046
        wire_bytes = manager._wire_bytes()
978
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
1047
        self.assertStartsWith(wire_bytes, b'groupcompress-block\n')
979
1048
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
980
1049
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
981
1050
        self.assertEqual(2, len(manager._factories))
985
1054
            result_order.append(record.key)
986
1055
            text = self._texts[record.key]
987
1056
            self.assertEqual(text, record.get_bytes_as('fulltext'))
988
 
        self.assertEqual([('key1',), ('key4',)], result_order)
 
1057
        self.assertEqual([(b'key1',), (b'key4',)], result_order)
989
1058
 
990
1059
    def test__check_rebuild_no_changes(self):
991
1060
        block, manager = self.make_block_and_full_manager(self._texts)
996
1065
        locations, block = self.make_block(self._texts)
997
1066
        manager = groupcompress._LazyGroupContentManager(block)
998
1067
        # Request just the first key, which should trigger a 'strip' action
999
 
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1068
        self.add_key_to_manager((b'key1',), locations, block, manager)
1000
1069
        manager._check_rebuild_block()
1001
1070
        self.assertIsNot(block, manager._block)
1002
1071
        self.assertTrue(block._content_length > manager._block._content_length)
1003
1072
        # We should be able to still get the content out of this block, though
1004
1073
        # it should only have 1 entry
1005
1074
        for record in manager.get_record_stream():
1006
 
            self.assertEqual(('key1',), record.key)
 
1075
            self.assertEqual((b'key1',), record.key)
1007
1076
            self.assertEqual(self._texts[record.key],
1008
1077
                             record.get_bytes_as('fulltext'))
1009
1078
 
1011
1080
        locations, block = self.make_block(self._texts)
1012
1081
        manager = groupcompress._LazyGroupContentManager(block)
1013
1082
        # Request a small key in the middle should trigger a 'rebuild'
1014
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1083
        self.add_key_to_manager((b'key4',), locations, block, manager)
1015
1084
        manager._check_rebuild_block()
1016
1085
        self.assertIsNot(block, manager._block)
1017
1086
        self.assertTrue(block._content_length > manager._block._content_length)
1018
1087
        for record in manager.get_record_stream():
1019
 
            self.assertEqual(('key4',), record.key)
 
1088
            self.assertEqual((b'key4',), record.key)
1020
1089
            self.assertEqual(self._texts[record.key],
1021
1090
                             record.get_bytes_as('fulltext'))
1022
1091
 
 
1092
    def test_manager_default_compressor_settings(self):
 
1093
        locations, old_block = self.make_block(self._texts)
 
1094
        manager = groupcompress._LazyGroupContentManager(old_block)
 
1095
        gcvf = groupcompress.GroupCompressVersionedFiles
 
1096
        # It doesn't greedily evaluate _max_bytes_to_index
 
1097
        self.assertIs(None, manager._compressor_settings)
 
1098
        self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
 
1099
                         manager._get_compressor_settings())
 
1100
 
 
1101
    def test_manager_custom_compressor_settings(self):
 
1102
        locations, old_block = self.make_block(self._texts)
 
1103
        called = []
 
1104
        def compressor_settings():
 
1105
            called.append('called')
 
1106
            return (10,)
 
1107
        manager = groupcompress._LazyGroupContentManager(old_block,
 
1108
            get_compressor_settings=compressor_settings)
 
1109
        gcvf = groupcompress.GroupCompressVersionedFiles
 
1110
        # It doesn't greedily evaluate compressor_settings
 
1111
        self.assertIs(None, manager._compressor_settings)
 
1112
        self.assertEqual((10,), manager._get_compressor_settings())
 
1113
        self.assertEqual((10,), manager._get_compressor_settings())
 
1114
        self.assertEqual((10,), manager._compressor_settings)
 
1115
        # Only called 1 time
 
1116
        self.assertEqual(['called'], called)
 
1117
 
 
1118
    def test__rebuild_handles_compressor_settings(self):
 
1119
        if not isinstance(groupcompress.GroupCompressor,
 
1120
                          groupcompress.PyrexGroupCompressor):
 
1121
            raise tests.TestNotApplicable('pure-python compressor'
 
1122
                ' does not handle compressor_settings')
 
1123
        locations, old_block = self.make_block(self._texts)
 
1124
        manager = groupcompress._LazyGroupContentManager(old_block,
 
1125
            get_compressor_settings=lambda: dict(max_bytes_to_index=32))
 
1126
        gc = manager._make_group_compressor()
 
1127
        self.assertEqual(32, gc._delta_index._max_bytes_to_index)
 
1128
        self.add_key_to_manager((b'key3',), locations, old_block, manager)
 
1129
        self.add_key_to_manager((b'key4',), locations, old_block, manager)
 
1130
        action, last_byte, total_bytes = manager._check_rebuild_action()
 
1131
        self.assertEqual('rebuild', action)
 
1132
        manager._rebuild_block()
 
1133
        new_block = manager._block
 
1134
        self.assertIsNot(old_block, new_block)
 
1135
        # Because of the new max_bytes_to_index, we do a poor job of
 
1136
        # rebuilding. This is a side-effect of the change, but at least it does
 
1137
        # show the setting had an effect.
 
1138
        self.assertTrue(old_block._content_length < new_block._content_length)
 
1139
 
1023
1140
    def test_check_is_well_utilized_all_keys(self):
1024
1141
        block, manager = self.make_block_and_full_manager(self._texts)
1025
1142
        self.assertFalse(manager.check_is_well_utilized())
1036
1153
 
1037
1154
    def test_check_is_well_utilized_mixed_keys(self):
1038
1155
        texts = {}
1039
 
        f1k1 = ('f1', 'k1')
1040
 
        f1k2 = ('f1', 'k2')
1041
 
        f2k1 = ('f2', 'k1')
1042
 
        f2k2 = ('f2', 'k2')
1043
 
        texts[f1k1] = self._texts[('key1',)]
1044
 
        texts[f1k2] = self._texts[('key2',)]
1045
 
        texts[f2k1] = self._texts[('key3',)]
1046
 
        texts[f2k2] = self._texts[('key4',)]
 
1156
        f1k1 = (b'f1', b'k1')
 
1157
        f1k2 = (b'f1', b'k2')
 
1158
        f2k1 = (b'f2', b'k1')
 
1159
        f2k2 = (b'f2', b'k2')
 
1160
        texts[f1k1] = self._texts[(b'key1',)]
 
1161
        texts[f1k2] = self._texts[(b'key2',)]
 
1162
        texts[f2k1] = self._texts[(b'key3',)]
 
1163
        texts[f2k2] = self._texts[(b'key4',)]
1047
1164
        block, manager = self.make_block_and_full_manager(texts)
1048
1165
        self.assertFalse(manager.check_is_well_utilized())
1049
1166
        manager._full_enough_block_size = block._content_length
1057
1174
        locations, block = self.make_block(self._texts)
1058
1175
        manager = groupcompress._LazyGroupContentManager(block)
1059
1176
        manager._full_enough_block_size = block._content_length
1060
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1061
 
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1177
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1178
        self.add_key_to_manager((b'key2',), locations, block, manager)
1062
1179
        # Just using the content from key1 and 2 is not enough to be considered
1063
1180
        # 'complete'
1064
1181
        self.assertFalse(manager.check_is_well_utilized())
1065
1182
        # However if we add key3, then we have enough, as we only require 75%
1066
1183
        # consumption
1067
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1184
        self.add_key_to_manager((b'key4',), locations, block, manager)
1068
1185
        self.assertTrue(manager.check_is_well_utilized())
 
1186
 
 
1187
 
 
1188
class Test_GCBuildDetails(tests.TestCase):
 
1189
 
 
1190
    def test_acts_like_tuple(self):
 
1191
        # _GCBuildDetails inlines some of the data that used to be spread out
 
1192
        # across a bunch of tuples
 
1193
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1194
            ('INDEX', 10, 20, 0, 5))
 
1195
        self.assertEqual(4, len(bd))
 
1196
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
 
1197
        self.assertEqual(None, bd[1]) # Compression Parent is always None
 
1198
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
 
1199
        self.assertEqual(('group', None), bd[3]) # Record details
 
1200
 
 
1201
    def test__repr__(self):
 
1202
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1203
            ('INDEX', 10, 20, 0, 5))
 
1204
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
 
1205
                         " (('parent1',), ('parent2',)))",
 
1206
                         repr(bd))