/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/tests/test_groupcompress.py

  • Committer: Jelmer Vernooij
  • Date: 2018-11-18 18:23:32 UTC
  • mto: This revision was merged to the branch mainline in revision 7197.
  • Revision ID: jelmer@jelmer.uk-20181118182332-viz1qvqese2mo9i6
Fix some more Bazaar references.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
 
1
# Copyright (C) 2008-2011 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
18
18
 
19
19
import zlib
20
20
 
21
 
from bzrlib import (
22
 
    btree_index,
23
 
    groupcompress,
 
21
from .. import (
 
22
    config,
24
23
    errors,
25
 
    index as _mod_index,
26
24
    osutils,
27
25
    tests,
28
26
    trace,
 
27
    )
 
28
from ..bzr import (
 
29
    btree_index,
 
30
    groupcompress,
 
31
    knit,
 
32
    index as _mod_index,
29
33
    versionedfile,
30
34
    )
31
 
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
33
 
 
34
 
 
35
 
def load_tests(standard_tests, module, loader):
36
 
    """Parameterize tests for all versions of groupcompress."""
37
 
    to_adapt, result = tests.split_suite_by_condition(
38
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
35
from ..osutils import sha_string
 
36
from .test__groupcompress import compiled_groupcompress_feature
 
37
from .scenarios import load_tests_apply_scenarios
 
38
 
 
39
 
 
40
def group_compress_implementation_scenarios():
39
41
    scenarios = [
40
42
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
43
        ]
42
44
    if compiled_groupcompress_feature.available():
43
45
        scenarios.append(('C',
44
 
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
 
    return tests.multiply_tests(to_adapt, scenarios, result)
 
46
                          {'compressor': groupcompress.PyrexGroupCompressor}))
 
47
    return scenarios
 
48
 
 
49
 
 
50
load_tests = load_tests_apply_scenarios
46
51
 
47
52
 
48
53
class TestGroupCompressor(tests.TestCase):
49
54
 
50
55
    def _chunks_to_repr_lines(self, chunks):
51
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
56
        return '\n'.join(map(repr, b''.join(chunks).split(b'\n')))
52
57
 
53
58
    def assertEqualDiffEncoded(self, expected, actual):
54
59
        """Compare the actual content to the expected content.
66
71
class TestAllGroupCompressors(TestGroupCompressor):
67
72
    """Tests for GroupCompressor"""
68
73
 
69
 
    compressor = None # Set by multiply_tests
 
74
    scenarios = group_compress_implementation_scenarios()
 
75
    compressor = None  # Set by scenario
70
76
 
71
77
    def test_empty_delta(self):
72
78
        compressor = self.compressor()
76
82
        # diff against NUKK
77
83
        compressor = self.compressor()
78
84
        sha1, start_point, end_point, _ = compressor.compress(('label',),
79
 
            'strange\ncommon\n', None)
80
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
85
                                                              b'strange\ncommon\n', None)
 
86
        self.assertEqual(sha_string(b'strange\ncommon\n'), sha1)
 
87
        expected_lines = b'f\x0fstrange\ncommon\n'
 
88
        self.assertEqual(expected_lines, b''.join(compressor.chunks))
83
89
        self.assertEqual(0, start_point)
84
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
90
        self.assertEqual(len(expected_lines), end_point)
85
91
 
86
92
    def test_empty_content(self):
87
93
        compressor = self.compressor()
88
94
        # Adding empty bytes should return the 'null' record
89
95
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
 
                                                                 '', None)
 
96
                                                                 b'', None)
91
97
        self.assertEqual(0, start_point)
92
98
        self.assertEqual(0, end_point)
93
99
        self.assertEqual('fulltext', kind)
95
101
        self.assertEqual(0, compressor.endpoint)
96
102
        self.assertEqual([], compressor.chunks)
97
103
        # Even after adding some content
98
 
        compressor.compress(('content',), 'some\nbytes\n', None)
 
104
        compressor.compress(('content',), b'some\nbytes\n', None)
99
105
        self.assertTrue(compressor.endpoint > 0)
100
106
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
 
                                                                 '', None)
 
107
                                                                 b'', None)
102
108
        self.assertEqual(0, start_point)
103
109
        self.assertEqual(0, end_point)
104
110
        self.assertEqual('fulltext', kind)
109
115
        # reading something that is in the compressor stream already.
110
116
        compressor = self.compressor()
111
117
        sha1_1, _, _, _ = compressor.compress(('label',),
112
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
118
                                              b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
119
        expected_lines = list(compressor.chunks)
114
120
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
121
                                                      b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
122
        # get the first out
117
 
        self.assertEqual(('strange\ncommon long line\n'
118
 
                          'that needs a 16 byte match\n', sha1_1),
 
123
        self.assertEqual((b'strange\ncommon long line\n'
 
124
                          b'that needs a 16 byte match\n', sha1_1),
119
125
                         compressor.extract(('label',)))
120
126
        # and the second
121
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
 
                          'different\n', sha1_2),
 
127
        self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
 
128
                          b'different\n', sha1_2),
123
129
                         compressor.extract(('newlabel',)))
124
130
 
125
131
    def test_pop_last(self):
126
132
        compressor = self.compressor()
127
133
        _, _, _, _ = compressor.compress(('key1',),
128
 
            'some text\nfor the first entry\n', None)
 
134
                                         b'some text\nfor the first entry\n', None)
129
135
        expected_lines = list(compressor.chunks)
130
136
        _, _, _, _ = compressor.compress(('key2',),
131
 
            'some text\nfor the second entry\n', None)
 
137
                                         b'some text\nfor the second entry\n', None)
132
138
        compressor.pop_last()
133
139
        self.assertEqual(expected_lines, compressor.chunks)
134
140
 
141
147
    def test_stats(self):
142
148
        compressor = self.compressor()
143
149
        compressor.compress(('label',),
144
 
                            'strange\n'
145
 
                            'common very very long line\n'
146
 
                            'plus more text\n', None)
 
150
                            b'strange\n'
 
151
                            b'common very very long line\n'
 
152
                            b'plus more text\n', None)
147
153
        compressor.compress(('newlabel',),
148
 
                            'common very very long line\n'
149
 
                            'plus more text\n'
150
 
                            'different\n'
151
 
                            'moredifferent\n', None)
 
154
                            b'common very very long line\n'
 
155
                            b'plus more text\n'
 
156
                            b'different\n'
 
157
                            b'moredifferent\n', None)
152
158
        compressor.compress(('label3',),
153
 
                            'new\n'
154
 
                            'common very very long line\n'
155
 
                            'plus more text\n'
156
 
                            'different\n'
157
 
                            'moredifferent\n', None)
 
159
                            b'new\n'
 
160
                            b'common very very long line\n'
 
161
                            b'plus more text\n'
 
162
                            b'different\n'
 
163
                            b'moredifferent\n', None)
158
164
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
159
165
 
160
166
    def test_two_nosha_delta(self):
161
167
        compressor = self.compressor()
162
168
        sha1_1, _, _, _ = compressor.compress(('label',),
163
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
169
                                              b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
170
        expected_lines = list(compressor.chunks)
165
171
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
 
        self.assertEqual(sha_string('common long line\n'
168
 
                                    'that needs a 16 byte match\n'
169
 
                                    'different\n'), sha1_2)
 
172
                                                                b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
173
        self.assertEqual(sha_string(b'common long line\n'
 
174
                                    b'that needs a 16 byte match\n'
 
175
                                    b'different\n'), sha1_2)
170
176
        expected_lines.extend([
171
177
            # 'delta', delta length
172
 
            'd\x0f',
 
178
            b'd\x0f',
173
179
            # source and target length
174
 
            '\x36',
 
180
            b'\x36',
175
181
            # copy the line common
176
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
182
            b'\x91\x0a\x2c',  # copy, offset 0x0a, len 0x2c
177
183
            # add the line different, and the trailing newline
178
 
            '\x0adifferent\n', # insert 10 bytes
 
184
            b'\x0adifferent\n',  # insert 10 bytes
179
185
            ])
180
186
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
187
        self.assertEqual(sum(map(len, expected_lines)), end_point)
185
191
        # both parents.
186
192
        compressor = self.compressor()
187
193
        sha1_1, _, _, _ = compressor.compress(('label',),
188
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
194
                                              b'strange\ncommon very very long line\nwith some extra text\n', None)
189
195
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
 
            'different\nmoredifferent\nand then some more\n', None)
 
196
                                              b'different\nmoredifferent\nand then some more\n', None)
191
197
        expected_lines = list(compressor.chunks)
192
198
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
 
            'new\ncommon very very long line\nwith some extra text\n'
194
 
            'different\nmoredifferent\nand then some more\n',
195
 
            None)
 
199
                                                                b'new\ncommon very very long line\nwith some extra text\n'
 
200
                                                                b'different\nmoredifferent\nand then some more\n',
 
201
                                                                None)
196
202
        self.assertEqual(
197
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
198
 
                       'different\nmoredifferent\nand then some more\n'),
 
203
            sha_string(b'new\ncommon very very long line\nwith some extra text\n'
 
204
                       b'different\nmoredifferent\nand then some more\n'),
199
205
            sha1_3)
200
206
        expected_lines.extend([
201
207
            # 'delta', delta length
202
 
            'd\x0b',
 
208
            b'd\x0b',
203
209
            # source and target length
204
 
            '\x5f'
 
210
            b'\x5f'
205
211
            # insert new
206
 
            '\x03new',
 
212
            b'\x03new',
207
213
            # Copy of first parent 'common' range
208
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
214
            b'\x91\x09\x31'  # copy, offset 0x09, 0x31 bytes
209
215
            # Copy of second parent 'different' range
210
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
216
            b'\x91\x3c\x2b'  # copy, offset 0x3c, 0x2b bytes
211
217
            ])
212
218
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
219
        self.assertEqual(sum(map(len, expected_lines)), end_point)
220
226
    def test_stats(self):
221
227
        compressor = self.compressor()
222
228
        compressor.compress(('label',),
223
 
                            'strange\n'
224
 
                            'common very very long line\n'
225
 
                            'plus more text\n', None)
 
229
                            b'strange\n'
 
230
                            b'common very very long line\n'
 
231
                            b'plus more text\n', None)
226
232
        compressor.compress(('newlabel',),
227
 
                            'common very very long line\n'
228
 
                            'plus more text\n'
229
 
                            'different\n'
230
 
                            'moredifferent\n', None)
 
233
                            b'common very very long line\n'
 
234
                            b'plus more text\n'
 
235
                            b'different\n'
 
236
                            b'moredifferent\n', None)
231
237
        compressor.compress(('label3',),
232
 
                            'new\n'
233
 
                            'common very very long line\n'
234
 
                            'plus more text\n'
235
 
                            'different\n'
236
 
                            'moredifferent\n', None)
 
238
                            b'new\n'
 
239
                            b'common very very long line\n'
 
240
                            b'plus more text\n'
 
241
                            b'different\n'
 
242
                            b'moredifferent\n', None)
237
243
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
238
244
 
239
245
    def test_two_nosha_delta(self):
240
246
        compressor = self.compressor()
241
247
        sha1_1, _, _, _ = compressor.compress(('label',),
242
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
248
                                              b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
249
        expected_lines = list(compressor.chunks)
244
250
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
 
        self.assertEqual(sha_string('common long line\n'
247
 
                                    'that needs a 16 byte match\n'
248
 
                                    'different\n'), sha1_2)
 
251
                                                                b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
252
        self.assertEqual(sha_string(b'common long line\n'
 
253
                                    b'that needs a 16 byte match\n'
 
254
                                    b'different\n'), sha1_2)
249
255
        expected_lines.extend([
250
256
            # 'delta', delta length
251
 
            'd\x0f',
 
257
            b'd\x0f',
252
258
            # target length
253
 
            '\x36',
 
259
            b'\x36',
254
260
            # copy the line common
255
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
261
            b'\x91\x0a\x2c',  # copy, offset 0x0a, len 0x2c
256
262
            # add the line different, and the trailing newline
257
 
            '\x0adifferent\n', # insert 10 bytes
 
263
            b'\x0adifferent\n',  # insert 10 bytes
258
264
            ])
259
265
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
266
        self.assertEqual(sum(map(len, expected_lines)), end_point)
264
270
        # both parents.
265
271
        compressor = self.compressor()
266
272
        sha1_1, _, _, _ = compressor.compress(('label',),
267
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
273
                                              b'strange\ncommon very very long line\nwith some extra text\n', None)
268
274
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
 
            'different\nmoredifferent\nand then some more\n', None)
 
275
                                              b'different\nmoredifferent\nand then some more\n', None)
270
276
        expected_lines = list(compressor.chunks)
271
277
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
 
            'new\ncommon very very long line\nwith some extra text\n'
273
 
            'different\nmoredifferent\nand then some more\n',
274
 
            None)
 
278
                                                                b'new\ncommon very very long line\nwith some extra text\n'
 
279
                                                                b'different\nmoredifferent\nand then some more\n',
 
280
                                                                None)
275
281
        self.assertEqual(
276
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
277
 
                       'different\nmoredifferent\nand then some more\n'),
 
282
            sha_string(b'new\ncommon very very long line\nwith some extra text\n'
 
283
                       b'different\nmoredifferent\nand then some more\n'),
278
284
            sha1_3)
279
285
        expected_lines.extend([
280
286
            # 'delta', delta length
281
 
            'd\x0c',
 
287
            b'd\x0c',
282
288
            # target length
283
 
            '\x5f'
 
289
            b'\x5f'
284
290
            # insert new
285
 
            '\x04new\n',
 
291
            b'\x04new\n',
286
292
            # Copy of first parent 'common' range
287
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
293
            b'\x91\x0a\x30'  # copy, offset 0x0a, 0x30 bytes
288
294
            # Copy of second parent 'different' range
289
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
295
            b'\x91\x3c\x2b'  # copy, offset 0x3c, 0x2b bytes
290
296
            ])
291
297
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
298
        self.assertEqual(sum(map(len, expected_lines)), end_point)
301
307
        for key in sorted(key_to_text):
302
308
            compressor.compress(key, key_to_text[key], None)
303
309
        locs = dict((key, (start, end)) for key, (start, _, end, _)
304
 
                    in compressor.labels_deltas.iteritems())
 
310
                    in compressor.labels_deltas.items())
305
311
        block = compressor.flush()
306
312
        raw_bytes = block.to_bytes()
307
313
        # Go through from_bytes(to_bytes()) so that we start with a compressed
310
316
 
311
317
    def test_from_empty_bytes(self):
312
318
        self.assertRaises(ValueError,
313
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
319
                          groupcompress.GroupCompressBlock.from_bytes, b'')
314
320
 
315
321
    def test_from_minimal_bytes(self):
316
322
        block = groupcompress.GroupCompressBlock.from_bytes(
317
 
            'gcb1z\n0\n0\n')
 
323
            b'gcb1z\n0\n0\n')
318
324
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
325
        self.assertIs(None, block._content)
320
 
        self.assertEqual('', block._z_content)
 
326
        self.assertEqual(b'', block._z_content)
321
327
        block._ensure_content()
322
 
        self.assertEqual('', block._content)
323
 
        self.assertEqual('', block._z_content)
324
 
        block._ensure_content() # Ensure content is safe to call 2x
 
328
        self.assertEqual(b'', block._content)
 
329
        self.assertEqual(b'', block._z_content)
 
330
        block._ensure_content()  # Ensure content is safe to call 2x
325
331
 
326
332
    def test_from_invalid(self):
327
333
        self.assertRaises(ValueError,
328
334
                          groupcompress.GroupCompressBlock.from_bytes,
329
 
                          'this is not a valid header')
 
335
                          b'this is not a valid header')
330
336
 
331
337
    def test_from_bytes(self):
332
 
        content = ('a tiny bit of content\n')
 
338
        content = (b'a tiny bit of content\n')
333
339
        z_content = zlib.compress(content)
334
340
        z_bytes = (
335
 
            'gcb1z\n' # group compress block v1 plain
336
 
            '%d\n' # Length of compressed content
337
 
            '%d\n' # Length of uncompressed content
338
 
            '%s'   # Compressed content
 
341
            b'gcb1z\n'  # group compress block v1 plain
 
342
            b'%d\n'  # Length of compressed content
 
343
            b'%d\n'  # Length of uncompressed content
 
344
            b'%s'   # Compressed content
339
345
            ) % (len(z_content), len(content), z_content)
340
346
        block = groupcompress.GroupCompressBlock.from_bytes(
341
347
            z_bytes)
347
353
        self.assertEqual(z_content, block._z_content)
348
354
        self.assertEqual(content, block._content)
349
355
 
 
356
    def test_to_chunks(self):
 
357
        content_chunks = [b'this is some content\n',
 
358
                          b'this content will be compressed\n']
 
359
        content_len = sum(map(len, content_chunks))
 
360
        content = b''.join(content_chunks)
 
361
        gcb = groupcompress.GroupCompressBlock()
 
362
        gcb.set_chunked_content(content_chunks, content_len)
 
363
        total_len, block_chunks = gcb.to_chunks()
 
364
        block_bytes = b''.join(block_chunks)
 
365
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
366
        self.assertEqual(total_len, len(block_bytes))
 
367
        self.assertEqual(gcb._content_length, content_len)
 
368
        expected_header = (b'gcb1z\n'  # group compress block v1 zlib
 
369
                           b'%d\n'  # Length of compressed content
 
370
                           b'%d\n'  # Length of uncompressed content
 
371
                           ) % (gcb._z_content_length, gcb._content_length)
 
372
        # The first chunk should be the header chunk. It is small, fixed size,
 
373
        # and there is no compelling reason to split it up
 
374
        self.assertEqual(expected_header, block_chunks[0])
 
375
        self.assertStartsWith(block_bytes, expected_header)
 
376
        remaining_bytes = block_bytes[len(expected_header):]
 
377
        raw_bytes = zlib.decompress(remaining_bytes)
 
378
        self.assertEqual(content, raw_bytes)
 
379
 
350
380
    def test_to_bytes(self):
351
 
        content = ('this is some content\n'
352
 
                   'this content will be compressed\n')
 
381
        content = (b'this is some content\n'
 
382
                   b'this content will be compressed\n')
353
383
        gcb = groupcompress.GroupCompressBlock()
354
384
        gcb.set_content(content)
355
 
        bytes = gcb.to_bytes()
 
385
        data = gcb.to_bytes()
356
386
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
387
        self.assertEqual(gcb._content_length, len(content))
358
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
359
 
                          '%d\n' # Length of compressed content
360
 
                          '%d\n' # Length of uncompressed content
361
 
                         ) % (gcb._z_content_length, gcb._content_length)
362
 
        self.assertStartsWith(bytes, expected_header)
363
 
        remaining_bytes = bytes[len(expected_header):]
 
388
        expected_header = (b'gcb1z\n'  # group compress block v1 zlib
 
389
                           b'%d\n'  # Length of compressed content
 
390
                           b'%d\n'  # Length of uncompressed content
 
391
                           ) % (gcb._z_content_length, gcb._content_length)
 
392
        self.assertStartsWith(data, expected_header)
 
393
        remaining_bytes = data[len(expected_header):]
364
394
        raw_bytes = zlib.decompress(remaining_bytes)
365
395
        self.assertEqual(content, raw_bytes)
366
396
 
367
397
        # we should get the same results if using the chunked version
368
398
        gcb = groupcompress.GroupCompressBlock()
369
 
        gcb.set_chunked_content(['this is some content\n'
370
 
                                 'this content will be compressed\n'],
371
 
                                 len(content))
372
 
        old_bytes = bytes
373
 
        bytes = gcb.to_bytes()
374
 
        self.assertEqual(old_bytes, bytes)
 
399
        gcb.set_chunked_content([b'this is some content\n'
 
400
                                 b'this content will be compressed\n'],
 
401
                                len(content))
 
402
        old_data = data
 
403
        data = gcb.to_bytes()
 
404
        self.assertEqual(old_data, data)
375
405
 
376
406
    def test_partial_decomp(self):
377
407
        content_chunks = []
379
409
        # partial decompression to work with. Most auto-generated data
380
410
        # compresses a bit too well, we want a combination, so we combine a sha
381
411
        # hash with compressible data.
382
 
        for i in xrange(2048):
383
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
412
        for i in range(2048):
 
413
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
384
414
            content_chunks.append(next_content)
385
415
            next_sha1 = osutils.sha_string(next_content)
386
 
            content_chunks.append(next_sha1 + '\n')
387
 
        content = ''.join(content_chunks)
 
416
            content_chunks.append(next_sha1 + b'\n')
 
417
        content = b''.join(content_chunks)
388
418
        self.assertEqual(158634, len(content))
389
419
        z_content = zlib.compress(content)
390
420
        self.assertEqual(57182, len(z_content))
391
421
        block = groupcompress.GroupCompressBlock()
392
 
        block._z_content = z_content
 
422
        block._z_content_chunks = (z_content,)
393
423
        block._z_content_length = len(z_content)
394
424
        block._compressor_name = 'zlib'
395
425
        block._content_length = 158634
424
454
        # partial decompression to work with. Most auto-generated data
425
455
        # compresses a bit too well, we want a combination, so we combine a sha
426
456
        # hash with compressible data.
427
 
        for i in xrange(2048):
428
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
457
        for i in range(2048):
 
458
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
429
459
            content_chunks.append(next_content)
430
460
            next_sha1 = osutils.sha_string(next_content)
431
 
            content_chunks.append(next_sha1 + '\n')
432
 
        content = ''.join(content_chunks)
 
461
            content_chunks.append(next_sha1 + b'\n')
 
462
        content = b''.join(content_chunks)
433
463
        self.assertEqual(158634, len(content))
434
464
        z_content = zlib.compress(content)
435
465
        self.assertEqual(57182, len(z_content))
436
466
        block = groupcompress.GroupCompressBlock()
437
 
        block._z_content = z_content
 
467
        block._z_content_chunks = (z_content,)
438
468
        block._z_content_length = len(z_content)
439
469
        block._compressor_name = 'zlib'
440
470
        block._content_length = 158634
447
477
        self.assertIs(None, block._z_content_decompressor)
448
478
 
449
479
    def test__dump(self):
450
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
451
 
        key_to_text = {('1',): dup_content + '1 unique\n',
452
 
                       ('2',): dup_content + '2 extra special\n'}
 
480
        dup_content = b'some duplicate content\nwhich is sufficiently long\n'
 
481
        key_to_text = {(b'1',): dup_content + b'1 unique\n',
 
482
                       (b'2',): dup_content + b'2 extra special\n'}
453
483
        locs, block = self.make_block(key_to_text)
454
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
455
 
                          ('d', 21, len(key_to_text[('2',)]),
456
 
                           [('c', 2, len(dup_content)),
457
 
                            ('i', len('2 extra special\n'), '')
458
 
                           ]),
459
 
                         ], block._dump())
 
484
        self.assertEqual([(b'f', len(key_to_text[(b'1',)])),
 
485
                          (b'd', 21, len(key_to_text[(b'2',)]),
 
486
                           [(b'c', 2, len(dup_content)),
 
487
                            (b'i', len(b'2 extra special\n'), b'')
 
488
                            ]),
 
489
                          ], block._dump())
460
490
 
461
491
 
462
492
class TestCaseWithGroupCompressVersionedFiles(
467
497
        t = self.get_transport(dir)
468
498
        t.ensure_base()
469
499
        vf = groupcompress.make_pack_factory(graph=create_graph,
470
 
            delta=False, keylength=keylength,
471
 
            inconsistency_fatal=inconsistency_fatal)(t)
 
500
                                             delta=False, keylength=keylength,
 
501
                                             inconsistency_fatal=inconsistency_fatal)(t)
472
502
        if do_cleanup:
473
503
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
474
504
        return vf
487
517
 
488
518
    def make_g_index_missing_parent(self):
489
519
        graph_index = self.make_g_index('missing_parent', 1,
490
 
            [(('parent', ), '2 78 2 10', ([],)),
491
 
             (('tip', ), '2 78 2 10',
492
 
              ([('parent', ), ('missing-parent', )],)),
493
 
              ])
 
520
                                        [((b'parent', ), b'2 78 2 10', ([],)),
 
521
                                         ((b'tip', ), b'2 78 2 10',
 
522
                                            ([(b'parent', ), (b'missing-parent', )],)),
 
523
                                         ])
494
524
        return graph_index
495
525
 
496
526
    def test_get_record_stream_as_requested(self):
497
527
        # Consider promoting 'as-requested' to general availability, and
498
528
        # make this a VF interface test
499
529
        vf = self.make_test_vf(False, dir='source')
500
 
        vf.add_lines(('a',), (), ['lines\n'])
501
 
        vf.add_lines(('b',), (), ['lines\n'])
502
 
        vf.add_lines(('c',), (), ['lines\n'])
503
 
        vf.add_lines(('d',), (), ['lines\n'])
 
530
        vf.add_lines((b'a',), (), [b'lines\n'])
 
531
        vf.add_lines((b'b',), (), [b'lines\n'])
 
532
        vf.add_lines((b'c',), (), [b'lines\n'])
 
533
        vf.add_lines((b'd',), (), [b'lines\n'])
504
534
        vf.writer.end()
505
535
        keys = [record.key for record in vf.get_record_stream(
506
 
                    [('a',), ('b',), ('c',), ('d',)],
507
 
                    'as-requested', False)]
508
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
536
            [(b'a',), (b'b',), (b'c',), (b'd',)],
 
537
            'as-requested', False)]
 
538
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
509
539
        keys = [record.key for record in vf.get_record_stream(
510
 
                    [('b',), ('a',), ('d',), ('c',)],
511
 
                    'as-requested', False)]
512
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
540
            [(b'b',), (b'a',), (b'd',), (b'c',)],
 
541
            'as-requested', False)]
 
542
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
513
543
 
514
544
        # It should work even after being repacked into another VF
515
545
        vf2 = self.make_test_vf(False, dir='target')
516
546
        vf2.insert_record_stream(vf.get_record_stream(
517
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
547
            [(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
518
548
        vf2.writer.end()
519
549
 
520
550
        keys = [record.key for record in vf2.get_record_stream(
521
 
                    [('a',), ('b',), ('c',), ('d',)],
522
 
                    'as-requested', False)]
523
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
551
            [(b'a',), (b'b',), (b'c',), (b'd',)],
 
552
            'as-requested', False)]
 
553
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
524
554
        keys = [record.key for record in vf2.get_record_stream(
525
 
                    [('b',), ('a',), ('d',), ('c',)],
526
 
                    'as-requested', False)]
527
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
555
            [(b'b',), (b'a',), (b'd',), (b'c',)],
 
556
            'as-requested', False)]
 
557
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
 
558
 
 
559
    def test_get_record_stream_max_bytes_to_index_default(self):
 
560
        vf = self.make_test_vf(True, dir='source')
 
561
        vf.add_lines((b'a',), (), [b'lines\n'])
 
562
        vf.writer.end()
 
563
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
 
564
        self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
 
565
                         record._manager._get_compressor_settings())
 
566
 
 
567
    def test_get_record_stream_accesses_compressor_settings(self):
 
568
        vf = self.make_test_vf(True, dir='source')
 
569
        vf.add_lines((b'a',), (), [b'lines\n'])
 
570
        vf.writer.end()
 
571
        vf._max_bytes_to_index = 1234
 
572
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
 
573
        self.assertEqual(dict(max_bytes_to_index=1234),
 
574
                         record._manager._get_compressor_settings())
 
575
 
 
576
    @staticmethod
 
577
    def grouped_stream(revision_ids, first_parents=()):
 
578
        parents = first_parents
 
579
        for revision_id in revision_ids:
 
580
            key = (revision_id,)
 
581
            record = versionedfile.FulltextContentFactory(
 
582
                key, parents, None,
 
583
                b'some content that is\n'
 
584
                b'identical except for\n'
 
585
                b'revision_id:%s\n' % (revision_id,))
 
586
            yield record
 
587
            parents = (key,)
528
588
 
529
589
    def test_insert_record_stream_reuses_blocks(self):
530
590
        vf = self.make_test_vf(True, dir='source')
531
 
        def grouped_stream(revision_ids, first_parents=()):
532
 
            parents = first_parents
533
 
            for revision_id in revision_ids:
534
 
                key = (revision_id,)
535
 
                record = versionedfile.FulltextContentFactory(
536
 
                    key, parents, None,
537
 
                    'some content that is\n'
538
 
                    'identical except for\n'
539
 
                    'revision_id:%s\n' % (revision_id,))
540
 
                yield record
541
 
                parents = (key,)
542
591
        # One group, a-d
543
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
592
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
544
593
        # Second group, e-h
545
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
546
 
                                               first_parents=(('d',),)))
 
594
        vf.insert_record_stream(self.grouped_stream(
 
595
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
547
596
        block_bytes = {}
548
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
549
 
                                      'unordered', False)
 
597
        stream = vf.get_record_stream(
 
598
            [(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
550
599
        num_records = 0
551
600
        for record in stream:
552
 
            if record.key in [('a',), ('e',)]:
 
601
            if record.key in [(b'a',), (b'e',)]:
553
602
                self.assertEqual('groupcompress-block', record.storage_kind)
554
603
            else:
555
604
                self.assertEqual('groupcompress-block-ref',
558
607
            num_records += 1
559
608
        self.assertEqual(8, num_records)
560
609
        for r in 'abcd':
561
 
            key = (r,)
562
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
563
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
610
            key = (r.encode(),)
 
611
            self.assertIs(block_bytes[key], block_bytes[(b'a',)])
 
612
            self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
564
613
        for r in 'efgh':
565
 
            key = (r,)
566
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
567
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
614
            key = (r.encode(),)
 
615
            self.assertIs(block_bytes[key], block_bytes[(b'e',)])
 
616
            self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
568
617
        # Now copy the blocks into another vf, and ensure that the blocks are
569
618
        # preserved without creating new entries
570
619
        vf2 = self.make_test_vf(True, dir='target')
 
620
        keys = [(r.encode(),) for r in 'abcdefgh']
571
621
        # ordering in 'groupcompress' order, should actually swap the groups in
572
622
        # the target vf, but the groups themselves should not be disturbed.
 
623
 
573
624
        def small_size_stream():
574
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
 
                                               'groupcompress', False):
 
625
            for record in vf.get_record_stream(keys, 'groupcompress', False):
576
626
                record._manager._full_enough_block_size = \
577
627
                    record._manager._block._content_length
578
628
                yield record
579
 
                        
 
629
 
580
630
        vf2.insert_record_stream(small_size_stream())
581
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
 
                                       'groupcompress', False)
 
631
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
583
632
        vf2.writer.end()
584
633
        num_records = 0
585
634
        for record in stream:
590
639
 
591
640
    def test_insert_record_stream_packs_on_the_fly(self):
592
641
        vf = self.make_test_vf(True, dir='source')
593
 
        def grouped_stream(revision_ids, first_parents=()):
594
 
            parents = first_parents
595
 
            for revision_id in revision_ids:
596
 
                key = (revision_id,)
597
 
                record = versionedfile.FulltextContentFactory(
598
 
                    key, parents, None,
599
 
                    'some content that is\n'
600
 
                    'identical except for\n'
601
 
                    'revision_id:%s\n' % (revision_id,))
602
 
                yield record
603
 
                parents = (key,)
604
642
        # One group, a-d
605
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
643
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
606
644
        # Second group, e-h
607
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
 
                                               first_parents=(('d',),)))
 
645
        vf.insert_record_stream(self.grouped_stream(
 
646
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
609
647
        # Now copy the blocks into another vf, and see that the
610
648
        # insert_record_stream rebuilt a new block on-the-fly because of
611
649
        # under-utilization
612
650
        vf2 = self.make_test_vf(True, dir='target')
 
651
        keys = [(r.encode(),) for r in 'abcdefgh']
613
652
        vf2.insert_record_stream(vf.get_record_stream(
614
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
 
                                       'groupcompress', False)
 
653
            keys, 'groupcompress', False))
 
654
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
617
655
        vf2.writer.end()
618
656
        num_records = 0
619
657
        # All of the records should be recombined into a single block
628
666
 
629
667
    def test__insert_record_stream_no_reuse_block(self):
630
668
        vf = self.make_test_vf(True, dir='source')
631
 
        def grouped_stream(revision_ids, first_parents=()):
632
 
            parents = first_parents
633
 
            for revision_id in revision_ids:
634
 
                key = (revision_id,)
635
 
                record = versionedfile.FulltextContentFactory(
636
 
                    key, parents, None,
637
 
                    'some content that is\n'
638
 
                    'identical except for\n'
639
 
                    'revision_id:%s\n' % (revision_id,))
640
 
                yield record
641
 
                parents = (key,)
642
669
        # One group, a-d
643
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
670
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
644
671
        # Second group, e-h
645
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
646
 
                                               first_parents=(('d',),)))
 
672
        vf.insert_record_stream(self.grouped_stream(
 
673
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
647
674
        vf.writer.end()
648
 
        self.assertEqual(8, len(list(vf.get_record_stream(
649
 
                                        [(r,) for r in 'abcdefgh'],
650
 
                                        'unordered', False))))
 
675
        keys = [(r.encode(),) for r in 'abcdefgh']
 
676
        self.assertEqual(8, len(list(
 
677
            vf.get_record_stream(keys, 'unordered', False))))
651
678
        # Now copy the blocks into another vf, and ensure that the blocks are
652
679
        # preserved without creating new entries
653
680
        vf2 = self.make_test_vf(True, dir='target')
654
681
        # ordering in 'groupcompress' order, should actually swap the groups in
655
682
        # the target vf, but the groups themselves should not be disturbed.
656
683
        list(vf2._insert_record_stream(vf.get_record_stream(
657
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
684
            keys, 'groupcompress', False),
658
685
            reuse_blocks=False))
659
686
        vf2.writer.end()
660
687
        # After inserting with reuse_blocks=False, we should have everything in
661
688
        # a single new block.
662
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
 
                                       'groupcompress', False)
 
689
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
664
690
        block = None
665
691
        for record in stream:
666
692
            if block is None:
672
698
        unvalidated = self.make_g_index_missing_parent()
673
699
        combined = _mod_index.CombinedGraphIndex([unvalidated])
674
700
        index = groupcompress._GCGraphIndex(combined,
675
 
            is_locked=lambda: True, parents=True,
676
 
            track_external_parent_refs=True)
 
701
                                            is_locked=lambda: True, parents=True,
 
702
                                            track_external_parent_refs=True)
677
703
        index.scan_unvalidated_index(unvalidated)
678
704
        self.assertEqual(
679
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
705
            frozenset([(b'missing-parent',)]), index.get_missing_parents())
680
706
 
681
707
    def test_track_external_parent_refs(self):
682
708
        g_index = self.make_g_index('empty', 1, [])
683
709
        mod_index = btree_index.BTreeBuilder(1, 1)
684
710
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
685
711
        index = groupcompress._GCGraphIndex(combined,
686
 
            is_locked=lambda: True, parents=True,
687
 
            add_callback=mod_index.add_nodes,
688
 
            track_external_parent_refs=True)
 
712
                                            is_locked=lambda: True, parents=True,
 
713
                                            add_callback=mod_index.add_nodes,
 
714
                                            track_external_parent_refs=True)
689
715
        index.add_records([
690
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
716
            ((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
691
717
        self.assertEqual(
692
 
            frozenset([('parent-1',), ('parent-2',)]),
 
718
            frozenset([(b'parent-1',), (b'parent-2',)]),
693
719
            index.get_missing_parents())
694
720
 
695
721
    def make_source_with_b(self, a_parent, path):
696
722
        source = self.make_test_vf(True, dir=path)
697
 
        source.add_lines(('a',), (), ['lines\n'])
 
723
        source.add_lines((b'a',), (), [b'lines\n'])
698
724
        if a_parent:
699
 
            b_parents = (('a',),)
 
725
            b_parents = ((b'a',),)
700
726
        else:
701
727
            b_parents = ()
702
 
        source.add_lines(('b',), b_parents, ['lines\n'])
 
728
        source.add_lines((b'b',), b_parents, [b'lines\n'])
703
729
        return source
704
730
 
705
731
    def do_inconsistent_inserts(self, inconsistency_fatal):
706
732
        target = self.make_test_vf(True, dir='target',
707
733
                                   inconsistency_fatal=inconsistency_fatal)
708
734
        for x in range(2):
709
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
735
            source = self.make_source_with_b(x == 1, 'source%s' % x)
710
736
            target.insert_record_stream(source.get_record_stream(
711
 
                [('b',)], 'unordered', False))
 
737
                [(b'b',)], 'unordered', False))
712
738
 
713
739
    def test_inconsistent_redundant_inserts_warn(self):
714
740
        """Should not insert a record that is already present."""
715
741
        warnings = []
 
742
 
716
743
        def warning(template, args):
717
744
            warnings.append(template % args)
718
745
        _trace_warning = trace.warning
721
748
            self.do_inconsistent_inserts(inconsistency_fatal=False)
722
749
        finally:
723
750
            trace.warning = _trace_warning
724
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
726
 
                         warnings)
 
751
        self.assertContainsRe(
 
752
            "\n".join(warnings),
 
753
            r"^inconsistent details in skipped record: \(b?'b',\)"
 
754
            r" \(b?'42 32 0 8', \(\(\),\)\)"
 
755
            r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
727
756
 
728
757
    def test_inconsistent_redundant_inserts_raises(self):
729
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
758
        e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
730
759
                              inconsistency_fatal=True)
731
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
732
 
                              " in add_records:"
733
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
 
                              " 0 8', \(\(\('a',\),\),\)\)")
 
760
        self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
 
761
                              r" in add_records:"
 
762
                              r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
 
763
                              r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
735
764
 
736
765
    def test_clear_cache(self):
737
766
        vf = self.make_source_with_b(True, 'source')
738
767
        vf.writer.end()
739
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
 
768
        for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
740
769
                                           True):
741
770
            pass
742
771
        self.assertTrue(len(vf._group_cache) > 0)
744
773
        self.assertEqual(0, len(vf._group_cache))
745
774
 
746
775
 
 
776
class TestGroupCompressConfig(tests.TestCaseWithTransport):
 
777
 
 
778
    def make_test_vf(self):
 
779
        t = self.get_transport('.')
 
780
        t.ensure_base()
 
781
        factory = groupcompress.make_pack_factory(graph=True,
 
782
                                                  delta=False, keylength=1, inconsistency_fatal=True)
 
783
        vf = factory(t)
 
784
        self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
785
        return vf
 
786
 
 
787
    def test_max_bytes_to_index_default(self):
 
788
        vf = self.make_test_vf()
 
789
        gc = vf._make_group_compressor()
 
790
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
791
                         vf._max_bytes_to_index)
 
792
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
793
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
794
                             gc._delta_index._max_bytes_to_index)
 
795
 
 
796
    def test_max_bytes_to_index_in_config(self):
 
797
        c = config.GlobalConfig()
 
798
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
 
799
        vf = self.make_test_vf()
 
800
        gc = vf._make_group_compressor()
 
801
        self.assertEqual(10000, vf._max_bytes_to_index)
 
802
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
803
            self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
 
804
 
 
805
    def test_max_bytes_to_index_bad_config(self):
 
806
        c = config.GlobalConfig()
 
807
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
 
808
        vf = self.make_test_vf()
 
809
        # TODO: This is triggering a warning, we might want to trap and make
 
810
        #       sure it is readable.
 
811
        gc = vf._make_group_compressor()
 
812
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
813
                         vf._max_bytes_to_index)
 
814
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
815
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
816
                             gc._delta_index._max_bytes_to_index)
 
817
 
747
818
 
748
819
class StubGCVF(object):
749
820
    def __init__(self, canned_get_blocks=None):
750
821
        self._group_cache = {}
751
822
        self._canned_get_blocks = canned_get_blocks or []
 
823
 
752
824
    def _get_blocks(self, read_memos):
753
825
        return iter(self._canned_get_blocks)
754
 
    
 
826
 
755
827
 
756
828
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
829
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
758
 
    
 
830
 
759
831
    def test_add_key_new_read_memo(self):
760
832
        """Adding a key with an uncached read_memo new to this batch adds that
761
833
        read_memo to the list of memos to fetch.
819
891
                (read_memo1, groupcompress.GroupCompressBlock()),
820
892
                (read_memo2, groupcompress.GroupCompressBlock())])
821
893
        locations = {
822
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
823
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
894
            ('key1',): (read_memo1 + (0, 0), None, None, None),
 
895
            ('key2',): (read_memo2 + (0, 0), None, None, None)}
824
896
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
825
897
        batcher.add_key(('key1',))
826
898
        batcher.add_key(('key2',))
840
912
        gcvf = StubGCVF()
841
913
        gcvf._group_cache[read_memo] = fake_block
842
914
        locations = {
843
 
            ('key',): (read_memo + (None, None), None, None, None)}
 
915
            ('key',): (read_memo + (0, 0), None, None, None)}
844
916
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
845
917
        batcher.add_key(('key',))
846
918
        self.assertEqual([], list(batcher.yield_factories()))
853
925
class TestLazyGroupCompress(tests.TestCaseWithTransport):
854
926
 
855
927
    _texts = {
856
 
        ('key1',): "this is a text\n"
857
 
                   "with a reasonable amount of compressible bytes\n"
858
 
                   "which can be shared between various other texts\n",
859
 
        ('key2',): "another text\n"
860
 
                   "with a reasonable amount of compressible bytes\n"
861
 
                   "which can be shared between various other texts\n",
862
 
        ('key3',): "yet another text which won't be extracted\n"
863
 
                   "with a reasonable amount of compressible bytes\n"
864
 
                   "which can be shared between various other texts\n",
865
 
        ('key4',): "this will be extracted\n"
866
 
                   "but references most of its bytes from\n"
867
 
                   "yet another text which won't be extracted\n"
868
 
                   "with a reasonable amount of compressible bytes\n"
869
 
                   "which can be shared between various other texts\n",
 
928
        (b'key1',): b"this is a text\n"
 
929
        b"with a reasonable amount of compressible bytes\n"
 
930
        b"which can be shared between various other texts\n",
 
931
        (b'key2',): b"another text\n"
 
932
        b"with a reasonable amount of compressible bytes\n"
 
933
        b"which can be shared between various other texts\n",
 
934
        (b'key3',): b"yet another text which won't be extracted\n"
 
935
        b"with a reasonable amount of compressible bytes\n"
 
936
        b"which can be shared between various other texts\n",
 
937
        (b'key4',): b"this will be extracted\n"
 
938
        b"but references most of its bytes from\n"
 
939
        b"yet another text which won't be extracted\n"
 
940
        b"with a reasonable amount of compressible bytes\n"
 
941
        b"which can be shared between various other texts\n",
870
942
    }
 
943
 
871
944
    def make_block(self, key_to_text):
872
945
        """Create a GroupCompressBlock, filling it with the given texts."""
873
946
        compressor = groupcompress.GroupCompressor()
875
948
        for key in sorted(key_to_text):
876
949
            compressor.compress(key, key_to_text[key], None)
877
950
        locs = dict((key, (start, end)) for key, (start, _, end, _)
878
 
                    in compressor.labels_deltas.iteritems())
 
951
                    in compressor.labels_deltas.items())
879
952
        block = compressor.flush()
880
953
        raw_bytes = block.to_bytes()
881
954
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
894
967
    def test_get_fulltexts(self):
895
968
        locations, block = self.make_block(self._texts)
896
969
        manager = groupcompress._LazyGroupContentManager(block)
897
 
        self.add_key_to_manager(('key1',), locations, block, manager)
898
 
        self.add_key_to_manager(('key2',), locations, block, manager)
 
970
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
971
        self.add_key_to_manager((b'key2',), locations, block, manager)
899
972
        result_order = []
900
973
        for record in manager.get_record_stream():
901
974
            result_order.append(record.key)
902
975
            text = self._texts[record.key]
903
976
            self.assertEqual(text, record.get_bytes_as('fulltext'))
904
 
        self.assertEqual([('key1',), ('key2',)], result_order)
 
977
        self.assertEqual([(b'key1',), (b'key2',)], result_order)
905
978
 
906
979
        # If we build the manager in the opposite order, we should get them
907
980
        # back in the opposite order
908
981
        manager = groupcompress._LazyGroupContentManager(block)
909
 
        self.add_key_to_manager(('key2',), locations, block, manager)
910
 
        self.add_key_to_manager(('key1',), locations, block, manager)
 
982
        self.add_key_to_manager((b'key2',), locations, block, manager)
 
983
        self.add_key_to_manager((b'key1',), locations, block, manager)
911
984
        result_order = []
912
985
        for record in manager.get_record_stream():
913
986
            result_order.append(record.key)
914
987
            text = self._texts[record.key]
915
988
            self.assertEqual(text, record.get_bytes_as('fulltext'))
916
 
        self.assertEqual([('key2',), ('key1',)], result_order)
 
989
        self.assertEqual([(b'key2',), (b'key1',)], result_order)
917
990
 
918
991
    def test__wire_bytes_no_keys(self):
919
992
        locations, block = self.make_block(self._texts)
923
996
        # We should have triggered a strip, since we aren't using any content
924
997
        stripped_block = manager._block.to_bytes()
925
998
        self.assertTrue(block_length > len(stripped_block))
926
 
        empty_z_header = zlib.compress('')
927
 
        self.assertEqual('groupcompress-block\n'
928
 
                         '8\n' # len(compress(''))
929
 
                         '0\n' # len('')
930
 
                         '%d\n'# compressed block len
931
 
                         '%s'  # zheader
932
 
                         '%s'  # block
 
999
        empty_z_header = zlib.compress(b'')
 
1000
        self.assertEqual(b'groupcompress-block\n'
 
1001
                         b'8\n'  # len(compress(''))
 
1002
                         b'0\n'  # len('')
 
1003
                         b'%d\n'  # compressed block len
 
1004
                         b'%s'  # zheader
 
1005
                         b'%s'  # block
933
1006
                         % (len(stripped_block), empty_z_header,
934
1007
                            stripped_block),
935
1008
                         wire_bytes)
937
1010
    def test__wire_bytes(self):
938
1011
        locations, block = self.make_block(self._texts)
939
1012
        manager = groupcompress._LazyGroupContentManager(block)
940
 
        self.add_key_to_manager(('key1',), locations, block, manager)
941
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1013
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1014
        self.add_key_to_manager((b'key4',), locations, block, manager)
942
1015
        block_bytes = block.to_bytes()
943
1016
        wire_bytes = manager._wire_bytes()
944
1017
        (storage_kind, z_header_len, header_len,
945
 
         block_len, rest) = wire_bytes.split('\n', 4)
 
1018
         block_len, rest) = wire_bytes.split(b'\n', 4)
946
1019
        z_header_len = int(z_header_len)
947
1020
        header_len = int(header_len)
948
1021
        block_len = int(block_len)
949
 
        self.assertEqual('groupcompress-block', storage_kind)
 
1022
        self.assertEqual(b'groupcompress-block', storage_kind)
950
1023
        self.assertEqual(34, z_header_len)
951
1024
        self.assertEqual(26, header_len)
952
1025
        self.assertEqual(len(block_bytes), block_len)
953
1026
        z_header = rest[:z_header_len]
954
1027
        header = zlib.decompress(z_header)
955
1028
        self.assertEqual(header_len, len(header))
956
 
        entry1 = locations[('key1',)]
957
 
        entry4 = locations[('key4',)]
958
 
        self.assertEqualDiff('key1\n'
959
 
                             '\n'  # no parents
960
 
                             '%d\n' # start offset
961
 
                             '%d\n' # end offset
962
 
                             'key4\n'
963
 
                             '\n'
964
 
                             '%d\n'
965
 
                             '%d\n'
 
1029
        entry1 = locations[(b'key1',)]
 
1030
        entry4 = locations[(b'key4',)]
 
1031
        self.assertEqualDiff(b'key1\n'
 
1032
                             b'\n'  # no parents
 
1033
                             b'%d\n'  # start offset
 
1034
                             b'%d\n'  # end offset
 
1035
                             b'key4\n'
 
1036
                             b'\n'
 
1037
                             b'%d\n'
 
1038
                             b'%d\n'
966
1039
                             % (entry1[0], entry1[1],
967
1040
                                entry4[0], entry4[1]),
968
 
                            header)
 
1041
                             header)
969
1042
        z_block = rest[z_header_len:]
970
1043
        self.assertEqual(block_bytes, z_block)
971
1044
 
972
1045
    def test_from_bytes(self):
973
1046
        locations, block = self.make_block(self._texts)
974
1047
        manager = groupcompress._LazyGroupContentManager(block)
975
 
        self.add_key_to_manager(('key1',), locations, block, manager)
976
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1048
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1049
        self.add_key_to_manager((b'key4',), locations, block, manager)
977
1050
        wire_bytes = manager._wire_bytes()
978
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
1051
        self.assertStartsWith(wire_bytes, b'groupcompress-block\n')
979
1052
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
980
1053
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
981
1054
        self.assertEqual(2, len(manager._factories))
985
1058
            result_order.append(record.key)
986
1059
            text = self._texts[record.key]
987
1060
            self.assertEqual(text, record.get_bytes_as('fulltext'))
988
 
        self.assertEqual([('key1',), ('key4',)], result_order)
 
1061
        self.assertEqual([(b'key1',), (b'key4',)], result_order)
989
1062
 
990
1063
    def test__check_rebuild_no_changes(self):
991
1064
        block, manager = self.make_block_and_full_manager(self._texts)
996
1069
        locations, block = self.make_block(self._texts)
997
1070
        manager = groupcompress._LazyGroupContentManager(block)
998
1071
        # Request just the first key, which should trigger a 'strip' action
999
 
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1072
        self.add_key_to_manager((b'key1',), locations, block, manager)
1000
1073
        manager._check_rebuild_block()
1001
1074
        self.assertIsNot(block, manager._block)
1002
1075
        self.assertTrue(block._content_length > manager._block._content_length)
1003
1076
        # We should be able to still get the content out of this block, though
1004
1077
        # it should only have 1 entry
1005
1078
        for record in manager.get_record_stream():
1006
 
            self.assertEqual(('key1',), record.key)
 
1079
            self.assertEqual((b'key1',), record.key)
1007
1080
            self.assertEqual(self._texts[record.key],
1008
1081
                             record.get_bytes_as('fulltext'))
1009
1082
 
1011
1084
        locations, block = self.make_block(self._texts)
1012
1085
        manager = groupcompress._LazyGroupContentManager(block)
1013
1086
        # Request a small key in the middle should trigger a 'rebuild'
1014
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1087
        self.add_key_to_manager((b'key4',), locations, block, manager)
1015
1088
        manager._check_rebuild_block()
1016
1089
        self.assertIsNot(block, manager._block)
1017
1090
        self.assertTrue(block._content_length > manager._block._content_length)
1018
1091
        for record in manager.get_record_stream():
1019
 
            self.assertEqual(('key4',), record.key)
 
1092
            self.assertEqual((b'key4',), record.key)
1020
1093
            self.assertEqual(self._texts[record.key],
1021
1094
                             record.get_bytes_as('fulltext'))
1022
1095
 
 
1096
    def test_manager_default_compressor_settings(self):
 
1097
        locations, old_block = self.make_block(self._texts)
 
1098
        manager = groupcompress._LazyGroupContentManager(old_block)
 
1099
        gcvf = groupcompress.GroupCompressVersionedFiles
 
1100
        # It doesn't greedily evaluate _max_bytes_to_index
 
1101
        self.assertIs(None, manager._compressor_settings)
 
1102
        self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
 
1103
                         manager._get_compressor_settings())
 
1104
 
 
1105
    def test_manager_custom_compressor_settings(self):
 
1106
        locations, old_block = self.make_block(self._texts)
 
1107
        called = []
 
1108
 
 
1109
        def compressor_settings():
 
1110
            called.append('called')
 
1111
            return (10,)
 
1112
        manager = groupcompress._LazyGroupContentManager(old_block,
 
1113
                                                         get_compressor_settings=compressor_settings)
 
1114
        gcvf = groupcompress.GroupCompressVersionedFiles
 
1115
        # It doesn't greedily evaluate compressor_settings
 
1116
        self.assertIs(None, manager._compressor_settings)
 
1117
        self.assertEqual((10,), manager._get_compressor_settings())
 
1118
        self.assertEqual((10,), manager._get_compressor_settings())
 
1119
        self.assertEqual((10,), manager._compressor_settings)
 
1120
        # Only called 1 time
 
1121
        self.assertEqual(['called'], called)
 
1122
 
 
1123
    def test__rebuild_handles_compressor_settings(self):
 
1124
        if not isinstance(groupcompress.GroupCompressor,
 
1125
                          groupcompress.PyrexGroupCompressor):
 
1126
            raise tests.TestNotApplicable('pure-python compressor'
 
1127
                                          ' does not handle compressor_settings')
 
1128
        locations, old_block = self.make_block(self._texts)
 
1129
        manager = groupcompress._LazyGroupContentManager(old_block,
 
1130
                                                         get_compressor_settings=lambda: dict(max_bytes_to_index=32))
 
1131
        gc = manager._make_group_compressor()
 
1132
        self.assertEqual(32, gc._delta_index._max_bytes_to_index)
 
1133
        self.add_key_to_manager((b'key3',), locations, old_block, manager)
 
1134
        self.add_key_to_manager((b'key4',), locations, old_block, manager)
 
1135
        action, last_byte, total_bytes = manager._check_rebuild_action()
 
1136
        self.assertEqual('rebuild', action)
 
1137
        manager._rebuild_block()
 
1138
        new_block = manager._block
 
1139
        self.assertIsNot(old_block, new_block)
 
1140
        # Because of the new max_bytes_to_index, we do a poor job of
 
1141
        # rebuilding. This is a side-effect of the change, but at least it does
 
1142
        # show the setting had an effect.
 
1143
        self.assertTrue(old_block._content_length < new_block._content_length)
 
1144
 
1023
1145
    def test_check_is_well_utilized_all_keys(self):
1024
1146
        block, manager = self.make_block_and_full_manager(self._texts)
1025
1147
        self.assertFalse(manager.check_is_well_utilized())
1036
1158
 
1037
1159
    def test_check_is_well_utilized_mixed_keys(self):
1038
1160
        texts = {}
1039
 
        f1k1 = ('f1', 'k1')
1040
 
        f1k2 = ('f1', 'k2')
1041
 
        f2k1 = ('f2', 'k1')
1042
 
        f2k2 = ('f2', 'k2')
1043
 
        texts[f1k1] = self._texts[('key1',)]
1044
 
        texts[f1k2] = self._texts[('key2',)]
1045
 
        texts[f2k1] = self._texts[('key3',)]
1046
 
        texts[f2k2] = self._texts[('key4',)]
 
1161
        f1k1 = (b'f1', b'k1')
 
1162
        f1k2 = (b'f1', b'k2')
 
1163
        f2k1 = (b'f2', b'k1')
 
1164
        f2k2 = (b'f2', b'k2')
 
1165
        texts[f1k1] = self._texts[(b'key1',)]
 
1166
        texts[f1k2] = self._texts[(b'key2',)]
 
1167
        texts[f2k1] = self._texts[(b'key3',)]
 
1168
        texts[f2k2] = self._texts[(b'key4',)]
1047
1169
        block, manager = self.make_block_and_full_manager(texts)
1048
1170
        self.assertFalse(manager.check_is_well_utilized())
1049
1171
        manager._full_enough_block_size = block._content_length
1057
1179
        locations, block = self.make_block(self._texts)
1058
1180
        manager = groupcompress._LazyGroupContentManager(block)
1059
1181
        manager._full_enough_block_size = block._content_length
1060
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1061
 
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1182
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1183
        self.add_key_to_manager((b'key2',), locations, block, manager)
1062
1184
        # Just using the content from key1 and 2 is not enough to be considered
1063
1185
        # 'complete'
1064
1186
        self.assertFalse(manager.check_is_well_utilized())
1065
1187
        # However if we add key3, then we have enough, as we only require 75%
1066
1188
        # consumption
1067
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1189
        self.add_key_to_manager((b'key4',), locations, block, manager)
1068
1190
        self.assertTrue(manager.check_is_well_utilized())
 
1191
 
 
1192
 
 
1193
class Test_GCBuildDetails(tests.TestCase):
 
1194
 
 
1195
    def test_acts_like_tuple(self):
 
1196
        # _GCBuildDetails inlines some of the data that used to be spread out
 
1197
        # across a bunch of tuples
 
1198
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1199
                                           ('INDEX', 10, 20, 0, 5))
 
1200
        self.assertEqual(4, len(bd))
 
1201
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
 
1202
        self.assertEqual(None, bd[1])  # Compression Parent is always None
 
1203
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
 
1204
        self.assertEqual(('group', None), bd[3])  # Record details
 
1205
 
 
1206
    def test__repr__(self):
 
1207
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1208
                                           ('INDEX', 10, 20, 0, 5))
 
1209
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
 
1210
                         " (('parent1',), ('parent2',)))",
 
1211
                         repr(bd))