/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/tests/test_groupcompress.py

  • Committer: Jelmer Vernooij
  • Date: 2018-04-02 00:52:27 UTC
  • mfrom: (6939 work)
  • mto: This revision was merged to the branch mainline in revision 7274.
  • Revision ID: jelmer@jelmer.uk-20180402005227-pecflp1mvdjrjqd6
Merge trunk.

Show diffs side-by-side

added added

removed removed

Lines of Context:
53
53
class TestGroupCompressor(tests.TestCase):
54
54
 
55
55
    def _chunks_to_repr_lines(self, chunks):
56
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
56
        return '\n'.join(map(repr, b''.join(chunks).split(b'\n')))
57
57
 
58
58
    def assertEqualDiffEncoded(self, expected, actual):
59
59
        """Compare the actual content to the expected content.
82
82
        # diff against NUKK
83
83
        compressor = self.compressor()
84
84
        sha1, start_point, end_point, _ = compressor.compress(('label',),
85
 
            'strange\ncommon\n', None)
86
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
87
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
88
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
85
            b'strange\ncommon\n', None)
 
86
        self.assertEqual(sha_string(b'strange\ncommon\n'), sha1)
 
87
        expected_lines = b'f\x0fstrange\ncommon\n'
 
88
        self.assertEqual(expected_lines, b''.join(compressor.chunks))
89
89
        self.assertEqual(0, start_point)
90
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
90
        self.assertEqual(len(expected_lines), end_point)
91
91
 
92
92
    def test_empty_content(self):
93
93
        compressor = self.compressor()
94
94
        # Adding empty bytes should return the 'null' record
95
95
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
96
 
                                                                 '', None)
 
96
                                                                 b'', None)
97
97
        self.assertEqual(0, start_point)
98
98
        self.assertEqual(0, end_point)
99
99
        self.assertEqual('fulltext', kind)
101
101
        self.assertEqual(0, compressor.endpoint)
102
102
        self.assertEqual([], compressor.chunks)
103
103
        # Even after adding some content
104
 
        compressor.compress(('content',), 'some\nbytes\n', None)
 
104
        compressor.compress(('content',), b'some\nbytes\n', None)
105
105
        self.assertTrue(compressor.endpoint > 0)
106
106
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
107
 
                                                                 '', None)
 
107
                                                                 b'', None)
108
108
        self.assertEqual(0, start_point)
109
109
        self.assertEqual(0, end_point)
110
110
        self.assertEqual('fulltext', kind)
115
115
        # reading something that is in the compressor stream already.
116
116
        compressor = self.compressor()
117
117
        sha1_1, _, _, _ = compressor.compress(('label',),
118
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
118
            b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
119
119
        expected_lines = list(compressor.chunks)
120
120
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
121
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
121
            b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
122
122
        # get the first out
123
 
        self.assertEqual(('strange\ncommon long line\n'
124
 
                          'that needs a 16 byte match\n', sha1_1),
 
123
        self.assertEqual((b'strange\ncommon long line\n'
 
124
                          b'that needs a 16 byte match\n', sha1_1),
125
125
                         compressor.extract(('label',)))
126
126
        # and the second
127
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
128
 
                          'different\n', sha1_2),
 
127
        self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
 
128
                          b'different\n', sha1_2),
129
129
                         compressor.extract(('newlabel',)))
130
130
 
131
131
    def test_pop_last(self):
132
132
        compressor = self.compressor()
133
133
        _, _, _, _ = compressor.compress(('key1',),
134
 
            'some text\nfor the first entry\n', None)
 
134
            b'some text\nfor the first entry\n', None)
135
135
        expected_lines = list(compressor.chunks)
136
136
        _, _, _, _ = compressor.compress(('key2',),
137
 
            'some text\nfor the second entry\n', None)
 
137
            b'some text\nfor the second entry\n', None)
138
138
        compressor.pop_last()
139
139
        self.assertEqual(expected_lines, compressor.chunks)
140
140
 
147
147
    def test_stats(self):
148
148
        compressor = self.compressor()
149
149
        compressor.compress(('label',),
150
 
                            'strange\n'
151
 
                            'common very very long line\n'
152
 
                            'plus more text\n', None)
 
150
                            b'strange\n'
 
151
                            b'common very very long line\n'
 
152
                            b'plus more text\n', None)
153
153
        compressor.compress(('newlabel',),
154
 
                            'common very very long line\n'
155
 
                            'plus more text\n'
156
 
                            'different\n'
157
 
                            'moredifferent\n', None)
 
154
                            b'common very very long line\n'
 
155
                            b'plus more text\n'
 
156
                            b'different\n'
 
157
                            b'moredifferent\n', None)
158
158
        compressor.compress(('label3',),
159
 
                            'new\n'
160
 
                            'common very very long line\n'
161
 
                            'plus more text\n'
162
 
                            'different\n'
163
 
                            'moredifferent\n', None)
 
159
                            b'new\n'
 
160
                            b'common very very long line\n'
 
161
                            b'plus more text\n'
 
162
                            b'different\n'
 
163
                            b'moredifferent\n', None)
164
164
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
165
165
 
166
166
    def test_two_nosha_delta(self):
167
167
        compressor = self.compressor()
168
168
        sha1_1, _, _, _ = compressor.compress(('label',),
169
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
169
            b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
170
170
        expected_lines = list(compressor.chunks)
171
171
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
172
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
 
        self.assertEqual(sha_string('common long line\n'
174
 
                                    'that needs a 16 byte match\n'
175
 
                                    'different\n'), sha1_2)
 
172
            b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
173
        self.assertEqual(sha_string(b'common long line\n'
 
174
                                    b'that needs a 16 byte match\n'
 
175
                                    b'different\n'), sha1_2)
176
176
        expected_lines.extend([
177
177
            # 'delta', delta length
178
 
            'd\x0f',
 
178
            b'd\x0f',
179
179
            # source and target length
180
 
            '\x36',
 
180
            b'\x36',
181
181
            # copy the line common
182
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
182
            b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
183
183
            # add the line different, and the trailing newline
184
 
            '\x0adifferent\n', # insert 10 bytes
 
184
            b'\x0adifferent\n', # insert 10 bytes
185
185
            ])
186
186
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
187
187
        self.assertEqual(sum(map(len, expected_lines)), end_point)
191
191
        # both parents.
192
192
        compressor = self.compressor()
193
193
        sha1_1, _, _, _ = compressor.compress(('label',),
194
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
194
            b'strange\ncommon very very long line\nwith some extra text\n', None)
195
195
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
196
 
            'different\nmoredifferent\nand then some more\n', None)
 
196
            b'different\nmoredifferent\nand then some more\n', None)
197
197
        expected_lines = list(compressor.chunks)
198
198
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
199
 
            'new\ncommon very very long line\nwith some extra text\n'
200
 
            'different\nmoredifferent\nand then some more\n',
 
199
            b'new\ncommon very very long line\nwith some extra text\n'
 
200
            b'different\nmoredifferent\nand then some more\n',
201
201
            None)
202
202
        self.assertEqual(
203
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
204
 
                       'different\nmoredifferent\nand then some more\n'),
 
203
            sha_string(b'new\ncommon very very long line\nwith some extra text\n'
 
204
                       b'different\nmoredifferent\nand then some more\n'),
205
205
            sha1_3)
206
206
        expected_lines.extend([
207
207
            # 'delta', delta length
208
 
            'd\x0b',
 
208
            b'd\x0b',
209
209
            # source and target length
210
 
            '\x5f'
 
210
            b'\x5f'
211
211
            # insert new
212
 
            '\x03new',
 
212
            b'\x03new',
213
213
            # Copy of first parent 'common' range
214
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
214
            b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
215
215
            # Copy of second parent 'different' range
216
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
216
            b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
217
217
            ])
218
218
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
219
219
        self.assertEqual(sum(map(len, expected_lines)), end_point)
226
226
    def test_stats(self):
227
227
        compressor = self.compressor()
228
228
        compressor.compress(('label',),
229
 
                            'strange\n'
230
 
                            'common very very long line\n'
231
 
                            'plus more text\n', None)
 
229
                            b'strange\n'
 
230
                            b'common very very long line\n'
 
231
                            b'plus more text\n', None)
232
232
        compressor.compress(('newlabel',),
233
 
                            'common very very long line\n'
234
 
                            'plus more text\n'
235
 
                            'different\n'
236
 
                            'moredifferent\n', None)
 
233
                            b'common very very long line\n'
 
234
                            b'plus more text\n'
 
235
                            b'different\n'
 
236
                            b'moredifferent\n', None)
237
237
        compressor.compress(('label3',),
238
 
                            'new\n'
239
 
                            'common very very long line\n'
240
 
                            'plus more text\n'
241
 
                            'different\n'
242
 
                            'moredifferent\n', None)
 
238
                            b'new\n'
 
239
                            b'common very very long line\n'
 
240
                            b'plus more text\n'
 
241
                            b'different\n'
 
242
                            b'moredifferent\n', None)
243
243
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
244
244
 
245
245
    def test_two_nosha_delta(self):
246
246
        compressor = self.compressor()
247
247
        sha1_1, _, _, _ = compressor.compress(('label',),
248
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
248
            b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
249
249
        expected_lines = list(compressor.chunks)
250
250
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
251
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
 
        self.assertEqual(sha_string('common long line\n'
253
 
                                    'that needs a 16 byte match\n'
254
 
                                    'different\n'), sha1_2)
 
251
            b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
252
        self.assertEqual(sha_string(b'common long line\n'
 
253
                                    b'that needs a 16 byte match\n'
 
254
                                    b'different\n'), sha1_2)
255
255
        expected_lines.extend([
256
256
            # 'delta', delta length
257
 
            'd\x0f',
 
257
            b'd\x0f',
258
258
            # target length
259
 
            '\x36',
 
259
            b'\x36',
260
260
            # copy the line common
261
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
261
            b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
262
262
            # add the line different, and the trailing newline
263
 
            '\x0adifferent\n', # insert 10 bytes
 
263
            b'\x0adifferent\n', # insert 10 bytes
264
264
            ])
265
265
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
266
266
        self.assertEqual(sum(map(len, expected_lines)), end_point)
270
270
        # both parents.
271
271
        compressor = self.compressor()
272
272
        sha1_1, _, _, _ = compressor.compress(('label',),
273
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
273
            b'strange\ncommon very very long line\nwith some extra text\n', None)
274
274
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
275
 
            'different\nmoredifferent\nand then some more\n', None)
 
275
            b'different\nmoredifferent\nand then some more\n', None)
276
276
        expected_lines = list(compressor.chunks)
277
277
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
278
 
            'new\ncommon very very long line\nwith some extra text\n'
279
 
            'different\nmoredifferent\nand then some more\n',
 
278
            b'new\ncommon very very long line\nwith some extra text\n'
 
279
            b'different\nmoredifferent\nand then some more\n',
280
280
            None)
281
281
        self.assertEqual(
282
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
283
 
                       'different\nmoredifferent\nand then some more\n'),
 
282
            sha_string(b'new\ncommon very very long line\nwith some extra text\n'
 
283
                       b'different\nmoredifferent\nand then some more\n'),
284
284
            sha1_3)
285
285
        expected_lines.extend([
286
286
            # 'delta', delta length
287
 
            'd\x0c',
 
287
            b'd\x0c',
288
288
            # target length
289
 
            '\x5f'
 
289
            b'\x5f'
290
290
            # insert new
291
 
            '\x04new\n',
 
291
            b'\x04new\n',
292
292
            # Copy of first parent 'common' range
293
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
293
            b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
294
294
            # Copy of second parent 'different' range
295
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
295
            b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
296
296
            ])
297
297
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
298
298
        self.assertEqual(sum(map(len, expected_lines)), end_point)
316
316
 
317
317
    def test_from_empty_bytes(self):
318
318
        self.assertRaises(ValueError,
319
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
319
                          groupcompress.GroupCompressBlock.from_bytes, b'')
320
320
 
321
321
    def test_from_minimal_bytes(self):
322
322
        block = groupcompress.GroupCompressBlock.from_bytes(
323
 
            'gcb1z\n0\n0\n')
 
323
            b'gcb1z\n0\n0\n')
324
324
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
325
325
        self.assertIs(None, block._content)
326
 
        self.assertEqual('', block._z_content)
 
326
        self.assertEqual(b'', block._z_content)
327
327
        block._ensure_content()
328
 
        self.assertEqual('', block._content)
329
 
        self.assertEqual('', block._z_content)
 
328
        self.assertEqual(b'', block._content)
 
329
        self.assertEqual(b'', block._z_content)
330
330
        block._ensure_content() # Ensure content is safe to call 2x
331
331
 
332
332
    def test_from_invalid(self):
333
333
        self.assertRaises(ValueError,
334
334
                          groupcompress.GroupCompressBlock.from_bytes,
335
 
                          'this is not a valid header')
 
335
                          b'this is not a valid header')
336
336
 
337
337
    def test_from_bytes(self):
338
 
        content = ('a tiny bit of content\n')
 
338
        content = (b'a tiny bit of content\n')
339
339
        z_content = zlib.compress(content)
340
340
        z_bytes = (
341
 
            'gcb1z\n' # group compress block v1 plain
342
 
            '%d\n' # Length of compressed content
343
 
            '%d\n' # Length of uncompressed content
344
 
            '%s'   # Compressed content
 
341
            b'gcb1z\n' # group compress block v1 plain
 
342
            b'%d\n' # Length of compressed content
 
343
            b'%d\n' # Length of uncompressed content
 
344
            b'%s'   # Compressed content
345
345
            ) % (len(z_content), len(content), z_content)
346
346
        block = groupcompress.GroupCompressBlock.from_bytes(
347
347
            z_bytes)
354
354
        self.assertEqual(content, block._content)
355
355
 
356
356
    def test_to_chunks(self):
357
 
        content_chunks = ['this is some content\n',
358
 
                          'this content will be compressed\n']
 
357
        content_chunks = [b'this is some content\n',
 
358
                          b'this content will be compressed\n']
359
359
        content_len = sum(map(len, content_chunks))
360
 
        content = ''.join(content_chunks)
 
360
        content = b''.join(content_chunks)
361
361
        gcb = groupcompress.GroupCompressBlock()
362
362
        gcb.set_chunked_content(content_chunks, content_len)
363
363
        total_len, block_chunks = gcb.to_chunks()
364
 
        block_bytes = ''.join(block_chunks)
 
364
        block_bytes = b''.join(block_chunks)
365
365
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
366
366
        self.assertEqual(total_len, len(block_bytes))
367
367
        self.assertEqual(gcb._content_length, content_len)
368
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
369
 
                          '%d\n' # Length of compressed content
370
 
                          '%d\n' # Length of uncompressed content
 
368
        expected_header =(b'gcb1z\n' # group compress block v1 zlib
 
369
                          b'%d\n' # Length of compressed content
 
370
                          b'%d\n' # Length of uncompressed content
371
371
                         ) % (gcb._z_content_length, gcb._content_length)
372
372
        # The first chunk should be the header chunk. It is small, fixed size,
373
373
        # and there is no compelling reason to split it up
378
378
        self.assertEqual(content, raw_bytes)
379
379
 
380
380
    def test_to_bytes(self):
381
 
        content = ('this is some content\n'
382
 
                   'this content will be compressed\n')
 
381
        content = (b'this is some content\n'
 
382
                   b'this content will be compressed\n')
383
383
        gcb = groupcompress.GroupCompressBlock()
384
384
        gcb.set_content(content)
385
 
        bytes = gcb.to_bytes()
 
385
        data = gcb.to_bytes()
386
386
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
387
387
        self.assertEqual(gcb._content_length, len(content))
388
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
389
 
                          '%d\n' # Length of compressed content
390
 
                          '%d\n' # Length of uncompressed content
 
388
        expected_header =(b'gcb1z\n' # group compress block v1 zlib
 
389
                          b'%d\n' # Length of compressed content
 
390
                          b'%d\n' # Length of uncompressed content
391
391
                         ) % (gcb._z_content_length, gcb._content_length)
392
 
        self.assertStartsWith(bytes, expected_header)
393
 
        remaining_bytes = bytes[len(expected_header):]
 
392
        self.assertStartsWith(data, expected_header)
 
393
        remaining_bytes = data[len(expected_header):]
394
394
        raw_bytes = zlib.decompress(remaining_bytes)
395
395
        self.assertEqual(content, raw_bytes)
396
396
 
397
397
        # we should get the same results if using the chunked version
398
398
        gcb = groupcompress.GroupCompressBlock()
399
 
        gcb.set_chunked_content(['this is some content\n'
400
 
                                 'this content will be compressed\n'],
 
399
        gcb.set_chunked_content([b'this is some content\n'
 
400
                                 b'this content will be compressed\n'],
401
401
                                 len(content))
402
 
        old_bytes = bytes
403
 
        bytes = gcb.to_bytes()
404
 
        self.assertEqual(old_bytes, bytes)
 
402
        old_data = data
 
403
        data = gcb.to_bytes()
 
404
        self.assertEqual(old_data, data)
405
405
 
406
406
    def test_partial_decomp(self):
407
407
        content_chunks = []
410
410
        # compresses a bit too well, we want a combination, so we combine a sha
411
411
        # hash with compressible data.
412
412
        for i in range(2048):
413
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
413
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
414
414
            content_chunks.append(next_content)
415
415
            next_sha1 = osutils.sha_string(next_content)
416
 
            content_chunks.append(next_sha1 + '\n')
417
 
        content = ''.join(content_chunks)
 
416
            content_chunks.append(next_sha1 + b'\n')
 
417
        content = b''.join(content_chunks)
418
418
        self.assertEqual(158634, len(content))
419
419
        z_content = zlib.compress(content)
420
420
        self.assertEqual(57182, len(z_content))
455
455
        # compresses a bit too well, we want a combination, so we combine a sha
456
456
        # hash with compressible data.
457
457
        for i in range(2048):
458
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
458
            next_content = b'%d\nThis is a bit of duplicate text\n' % (i,)
459
459
            content_chunks.append(next_content)
460
460
            next_sha1 = osutils.sha_string(next_content)
461
 
            content_chunks.append(next_sha1 + '\n')
462
 
        content = ''.join(content_chunks)
 
461
            content_chunks.append(next_sha1 + b'\n')
 
462
        content = b''.join(content_chunks)
463
463
        self.assertEqual(158634, len(content))
464
464
        z_content = zlib.compress(content)
465
465
        self.assertEqual(57182, len(z_content))
477
477
        self.assertIs(None, block._z_content_decompressor)
478
478
 
479
479
    def test__dump(self):
480
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
481
 
        key_to_text = {('1',): dup_content + '1 unique\n',
482
 
                       ('2',): dup_content + '2 extra special\n'}
 
480
        dup_content = b'some duplicate content\nwhich is sufficiently long\n'
 
481
        key_to_text = {(b'1',): dup_content + b'1 unique\n',
 
482
                       (b'2',): dup_content + b'2 extra special\n'}
483
483
        locs, block = self.make_block(key_to_text)
484
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
485
 
                          ('d', 21, len(key_to_text[('2',)]),
486
 
                           [('c', 2, len(dup_content)),
487
 
                            ('i', len('2 extra special\n'), '')
 
484
        self.assertEqual([(b'f', len(key_to_text[(b'1',)])),
 
485
                          (b'd', 21, len(key_to_text[(b'2',)]),
 
486
                           [(b'c', 2, len(dup_content)),
 
487
                            (b'i', len(b'2 extra special\n'), b'')
488
488
                           ]),
489
489
                         ], block._dump())
490
490
 
516
516
        return btree_index.BTreeGraphIndex(trans, name, size)
517
517
 
518
518
    def make_g_index_missing_parent(self):
519
 
        graph_index = self.make_g_index('missing_parent', 1,
520
 
            [(('parent', ), '2 78 2 10', ([],)),
521
 
             (('tip', ), '2 78 2 10',
522
 
              ([('parent', ), ('missing-parent', )],)),
 
519
        graph_index = self.make_g_index(b'missing_parent', 1,
 
520
            [((b'parent', ), b'2 78 2 10', ([],)),
 
521
             ((b'tip', ), b'2 78 2 10',
 
522
              ([(b'parent', ), (b'missing-parent', )],)),
523
523
              ])
524
524
        return graph_index
525
525
 
527
527
        # Consider promoting 'as-requested' to general availability, and
528
528
        # make this a VF interface test
529
529
        vf = self.make_test_vf(False, dir='source')
530
 
        vf.add_lines(('a',), (), ['lines\n'])
531
 
        vf.add_lines(('b',), (), ['lines\n'])
532
 
        vf.add_lines(('c',), (), ['lines\n'])
533
 
        vf.add_lines(('d',), (), ['lines\n'])
 
530
        vf.add_lines((b'a',), (), [b'lines\n'])
 
531
        vf.add_lines((b'b',), (), [b'lines\n'])
 
532
        vf.add_lines((b'c',), (), [b'lines\n'])
 
533
        vf.add_lines((b'd',), (), [b'lines\n'])
534
534
        vf.writer.end()
535
535
        keys = [record.key for record in vf.get_record_stream(
536
 
                    [('a',), ('b',), ('c',), ('d',)],
 
536
                    [(b'a',), (b'b',), (b'c',), (b'd',)],
537
537
                    'as-requested', False)]
538
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
538
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
539
539
        keys = [record.key for record in vf.get_record_stream(
540
 
                    [('b',), ('a',), ('d',), ('c',)],
 
540
                    [(b'b',), (b'a',), (b'd',), (b'c',)],
541
541
                    'as-requested', False)]
542
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
542
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
543
543
 
544
544
        # It should work even after being repacked into another VF
545
545
        vf2 = self.make_test_vf(False, dir='target')
546
546
        vf2.insert_record_stream(vf.get_record_stream(
547
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
547
                    [(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
548
548
        vf2.writer.end()
549
549
 
550
550
        keys = [record.key for record in vf2.get_record_stream(
551
 
                    [('a',), ('b',), ('c',), ('d',)],
 
551
                    [(b'a',), (b'b',), (b'c',), (b'd',)],
552
552
                    'as-requested', False)]
553
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
553
        self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
554
554
        keys = [record.key for record in vf2.get_record_stream(
555
 
                    [('b',), ('a',), ('d',), ('c',)],
 
555
                    [(b'b',), (b'a',), (b'd',), (b'c',)],
556
556
                    'as-requested', False)]
557
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
557
        self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
558
558
 
559
559
    def test_get_record_stream_max_bytes_to_index_default(self):
560
560
        vf = self.make_test_vf(True, dir='source')
561
 
        vf.add_lines(('a',), (), ['lines\n'])
 
561
        vf.add_lines((b'a',), (), [b'lines\n'])
562
562
        vf.writer.end()
563
 
        record = next(vf.get_record_stream([('a',)], 'unordered', True))
 
563
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
564
564
        self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
565
565
                         record._manager._get_compressor_settings())
566
566
 
567
567
    def test_get_record_stream_accesses_compressor_settings(self):
568
568
        vf = self.make_test_vf(True, dir='source')
569
 
        vf.add_lines(('a',), (), ['lines\n'])
 
569
        vf.add_lines((b'a',), (), [b'lines\n'])
570
570
        vf.writer.end()
571
571
        vf._max_bytes_to_index = 1234
572
 
        record = next(vf.get_record_stream([('a',)], 'unordered', True))
 
572
        record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
573
573
        self.assertEqual(dict(max_bytes_to_index=1234),
574
574
                         record._manager._get_compressor_settings())
575
575
 
 
576
    @staticmethod
 
577
    def grouped_stream(revision_ids, first_parents=()):
 
578
        parents = first_parents
 
579
        for revision_id in revision_ids:
 
580
            key = (revision_id,)
 
581
            record = versionedfile.FulltextContentFactory(
 
582
                key, parents, None,
 
583
                b'some content that is\n'
 
584
                b'identical except for\n'
 
585
                b'revision_id:%s\n' % (revision_id,))
 
586
            yield record
 
587
            parents = (key,)
 
588
 
576
589
    def test_insert_record_stream_reuses_blocks(self):
577
590
        vf = self.make_test_vf(True, dir='source')
578
 
        def grouped_stream(revision_ids, first_parents=()):
579
 
            parents = first_parents
580
 
            for revision_id in revision_ids:
581
 
                key = (revision_id,)
582
 
                record = versionedfile.FulltextContentFactory(
583
 
                    key, parents, None,
584
 
                    'some content that is\n'
585
 
                    'identical except for\n'
586
 
                    'revision_id:%s\n' % (revision_id,))
587
 
                yield record
588
 
                parents = (key,)
589
591
        # One group, a-d
590
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
592
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
591
593
        # Second group, e-h
592
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
593
 
                                               first_parents=(('d',),)))
 
594
        vf.insert_record_stream(self.grouped_stream(
 
595
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
594
596
        block_bytes = {}
595
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
596
 
                                      'unordered', False)
 
597
        stream = vf.get_record_stream(
 
598
            [(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
597
599
        num_records = 0
598
600
        for record in stream:
599
 
            if record.key in [('a',), ('e',)]:
 
601
            if record.key in [(b'a',), (b'e',)]:
600
602
                self.assertEqual('groupcompress-block', record.storage_kind)
601
603
            else:
602
604
                self.assertEqual('groupcompress-block-ref',
605
607
            num_records += 1
606
608
        self.assertEqual(8, num_records)
607
609
        for r in 'abcd':
608
 
            key = (r,)
609
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
610
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
610
            key = (r.encode(),)
 
611
            self.assertIs(block_bytes[key], block_bytes[(b'a',)])
 
612
            self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
611
613
        for r in 'efgh':
612
 
            key = (r,)
613
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
614
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
614
            key = (r.encode(),)
 
615
            self.assertIs(block_bytes[key], block_bytes[(b'e',)])
 
616
            self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
615
617
        # Now copy the blocks into another vf, and ensure that the blocks are
616
618
        # preserved without creating new entries
617
619
        vf2 = self.make_test_vf(True, dir='target')
 
620
        keys = [(r.encode(),) for r in 'abcdefgh']
618
621
        # ordering in 'groupcompress' order, should actually swap the groups in
619
622
        # the target vf, but the groups themselves should not be disturbed.
620
623
        def small_size_stream():
621
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
622
 
                                               'groupcompress', False):
 
624
            for record in vf.get_record_stream(keys, 'groupcompress', False):
623
625
                record._manager._full_enough_block_size = \
624
626
                    record._manager._block._content_length
625
627
                yield record
626
 
                        
 
628
 
627
629
        vf2.insert_record_stream(small_size_stream())
628
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
629
 
                                       'groupcompress', False)
 
630
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
630
631
        vf2.writer.end()
631
632
        num_records = 0
632
633
        for record in stream:
637
638
 
638
639
    def test_insert_record_stream_packs_on_the_fly(self):
639
640
        vf = self.make_test_vf(True, dir='source')
640
 
        def grouped_stream(revision_ids, first_parents=()):
641
 
            parents = first_parents
642
 
            for revision_id in revision_ids:
643
 
                key = (revision_id,)
644
 
                record = versionedfile.FulltextContentFactory(
645
 
                    key, parents, None,
646
 
                    'some content that is\n'
647
 
                    'identical except for\n'
648
 
                    'revision_id:%s\n' % (revision_id,))
649
 
                yield record
650
 
                parents = (key,)
651
641
        # One group, a-d
652
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
642
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
653
643
        # Second group, e-h
654
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
655
 
                                               first_parents=(('d',),)))
 
644
        vf.insert_record_stream(self.grouped_stream(
 
645
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
656
646
        # Now copy the blocks into another vf, and see that the
657
647
        # insert_record_stream rebuilt a new block on-the-fly because of
658
648
        # under-utilization
659
649
        vf2 = self.make_test_vf(True, dir='target')
 
650
        keys = [(r.encode(),) for r in 'abcdefgh']
660
651
        vf2.insert_record_stream(vf.get_record_stream(
661
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
662
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
 
                                       'groupcompress', False)
 
652
            keys, 'groupcompress', False))
 
653
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
664
654
        vf2.writer.end()
665
655
        num_records = 0
666
656
        # All of the records should be recombined into a single block
675
665
 
676
666
    def test__insert_record_stream_no_reuse_block(self):
677
667
        vf = self.make_test_vf(True, dir='source')
678
 
        def grouped_stream(revision_ids, first_parents=()):
679
 
            parents = first_parents
680
 
            for revision_id in revision_ids:
681
 
                key = (revision_id,)
682
 
                record = versionedfile.FulltextContentFactory(
683
 
                    key, parents, None,
684
 
                    'some content that is\n'
685
 
                    'identical except for\n'
686
 
                    'revision_id:%s\n' % (revision_id,))
687
 
                yield record
688
 
                parents = (key,)
689
668
        # One group, a-d
690
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
669
        vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
691
670
        # Second group, e-h
692
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
693
 
                                               first_parents=(('d',),)))
 
671
        vf.insert_record_stream(self.grouped_stream(
 
672
            [b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
694
673
        vf.writer.end()
695
 
        self.assertEqual(8, len(list(vf.get_record_stream(
696
 
                                        [(r,) for r in 'abcdefgh'],
697
 
                                        'unordered', False))))
 
674
        keys = [(r.encode(),) for r in 'abcdefgh']
 
675
        self.assertEqual(8, len(list(
 
676
            vf.get_record_stream(keys, 'unordered', False))))
698
677
        # Now copy the blocks into another vf, and ensure that the blocks are
699
678
        # preserved without creating new entries
700
679
        vf2 = self.make_test_vf(True, dir='target')
701
680
        # ordering in 'groupcompress' order, should actually swap the groups in
702
681
        # the target vf, but the groups themselves should not be disturbed.
703
682
        list(vf2._insert_record_stream(vf.get_record_stream(
704
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
683
            keys, 'groupcompress', False),
705
684
            reuse_blocks=False))
706
685
        vf2.writer.end()
707
686
        # After inserting with reuse_blocks=False, we should have everything in
708
687
        # a single new block.
709
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
710
 
                                       'groupcompress', False)
 
688
        stream = vf2.get_record_stream(keys, 'groupcompress', False)
711
689
        block = None
712
690
        for record in stream:
713
691
            if block is None:
723
701
            track_external_parent_refs=True)
724
702
        index.scan_unvalidated_index(unvalidated)
725
703
        self.assertEqual(
726
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
704
            frozenset([(b'missing-parent',)]), index.get_missing_parents())
727
705
 
728
706
    def test_track_external_parent_refs(self):
729
707
        g_index = self.make_g_index('empty', 1, [])
734
712
            add_callback=mod_index.add_nodes,
735
713
            track_external_parent_refs=True)
736
714
        index.add_records([
737
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
715
            ((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
738
716
        self.assertEqual(
739
 
            frozenset([('parent-1',), ('parent-2',)]),
 
717
            frozenset([(b'parent-1',), (b'parent-2',)]),
740
718
            index.get_missing_parents())
741
719
 
742
720
    def make_source_with_b(self, a_parent, path):
743
721
        source = self.make_test_vf(True, dir=path)
744
 
        source.add_lines(('a',), (), ['lines\n'])
 
722
        source.add_lines((b'a',), (), [b'lines\n'])
745
723
        if a_parent:
746
 
            b_parents = (('a',),)
 
724
            b_parents = ((b'a',),)
747
725
        else:
748
726
            b_parents = ()
749
 
        source.add_lines(('b',), b_parents, ['lines\n'])
 
727
        source.add_lines((b'b',), b_parents, [b'lines\n'])
750
728
        return source
751
729
 
752
730
    def do_inconsistent_inserts(self, inconsistency_fatal):
755
733
        for x in range(2):
756
734
            source = self.make_source_with_b(x==1, 'source%s' % x)
757
735
            target.insert_record_stream(source.get_record_stream(
758
 
                [('b',)], 'unordered', False))
 
736
                [(b'b',)], 'unordered', False))
759
737
 
760
738
    def test_inconsistent_redundant_inserts_warn(self):
761
739
        """Should not insert a record that is already present."""
768
746
            self.do_inconsistent_inserts(inconsistency_fatal=False)
769
747
        finally:
770
748
            trace.warning = _trace_warning
771
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
772
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
773
 
                         warnings)
 
749
        self.assertContainsRe(
 
750
            "\n".join(warnings),
 
751
            r"^inconsistent details in skipped record: \(b?'b',\)"
 
752
            r" \(b?'42 32 0 8', \(\(\),\)\)"
 
753
            r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
774
754
 
775
755
    def test_inconsistent_redundant_inserts_raises(self):
776
756
        e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
777
757
                              inconsistency_fatal=True)
778
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
779
 
                              " in add_records:"
780
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
781
 
                              " 0 8', \(\(\('a',\),\),\)\)")
 
758
        self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
 
759
                              r" in add_records:"
 
760
                              r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
 
761
                              r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
782
762
 
783
763
    def test_clear_cache(self):
784
764
        vf = self.make_source_with_b(True, 'source')
785
765
        vf.writer.end()
786
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
 
766
        for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
787
767
                                           True):
788
768
            pass
789
769
        self.assertTrue(len(vf._group_cache) > 0)
908
888
                (read_memo1, groupcompress.GroupCompressBlock()),
909
889
                (read_memo2, groupcompress.GroupCompressBlock())])
910
890
        locations = {
911
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
912
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
891
            ('key1',): (read_memo1 + (0, 0), None, None, None),
 
892
            ('key2',): (read_memo2 + (0, 0), None, None, None)}
913
893
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
914
894
        batcher.add_key(('key1',))
915
895
        batcher.add_key(('key2',))
929
909
        gcvf = StubGCVF()
930
910
        gcvf._group_cache[read_memo] = fake_block
931
911
        locations = {
932
 
            ('key',): (read_memo + (None, None), None, None, None)}
 
912
            ('key',): (read_memo + (0, 0), None, None, None)}
933
913
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
934
914
        batcher.add_key(('key',))
935
915
        self.assertEqual([], list(batcher.yield_factories()))
942
922
class TestLazyGroupCompress(tests.TestCaseWithTransport):
943
923
 
944
924
    _texts = {
945
 
        ('key1',): "this is a text\n"
946
 
                   "with a reasonable amount of compressible bytes\n"
947
 
                   "which can be shared between various other texts\n",
948
 
        ('key2',): "another text\n"
949
 
                   "with a reasonable amount of compressible bytes\n"
950
 
                   "which can be shared between various other texts\n",
951
 
        ('key3',): "yet another text which won't be extracted\n"
952
 
                   "with a reasonable amount of compressible bytes\n"
953
 
                   "which can be shared between various other texts\n",
954
 
        ('key4',): "this will be extracted\n"
955
 
                   "but references most of its bytes from\n"
956
 
                   "yet another text which won't be extracted\n"
957
 
                   "with a reasonable amount of compressible bytes\n"
958
 
                   "which can be shared between various other texts\n",
 
925
        (b'key1',): b"this is a text\n"
 
926
                   b"with a reasonable amount of compressible bytes\n"
 
927
                   b"which can be shared between various other texts\n",
 
928
        (b'key2',): b"another text\n"
 
929
                   b"with a reasonable amount of compressible bytes\n"
 
930
                   b"which can be shared between various other texts\n",
 
931
        (b'key3',): b"yet another text which won't be extracted\n"
 
932
                   b"with a reasonable amount of compressible bytes\n"
 
933
                   b"which can be shared between various other texts\n",
 
934
        (b'key4',): b"this will be extracted\n"
 
935
                   b"but references most of its bytes from\n"
 
936
                   b"yet another text which won't be extracted\n"
 
937
                   b"with a reasonable amount of compressible bytes\n"
 
938
                   b"which can be shared between various other texts\n",
959
939
    }
960
940
    def make_block(self, key_to_text):
961
941
        """Create a GroupCompressBlock, filling it with the given texts."""
983
963
    def test_get_fulltexts(self):
984
964
        locations, block = self.make_block(self._texts)
985
965
        manager = groupcompress._LazyGroupContentManager(block)
986
 
        self.add_key_to_manager(('key1',), locations, block, manager)
987
 
        self.add_key_to_manager(('key2',), locations, block, manager)
 
966
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
967
        self.add_key_to_manager((b'key2',), locations, block, manager)
988
968
        result_order = []
989
969
        for record in manager.get_record_stream():
990
970
            result_order.append(record.key)
991
971
            text = self._texts[record.key]
992
972
            self.assertEqual(text, record.get_bytes_as('fulltext'))
993
 
        self.assertEqual([('key1',), ('key2',)], result_order)
 
973
        self.assertEqual([(b'key1',), (b'key2',)], result_order)
994
974
 
995
975
        # If we build the manager in the opposite order, we should get them
996
976
        # back in the opposite order
997
977
        manager = groupcompress._LazyGroupContentManager(block)
998
 
        self.add_key_to_manager(('key2',), locations, block, manager)
999
 
        self.add_key_to_manager(('key1',), locations, block, manager)
 
978
        self.add_key_to_manager((b'key2',), locations, block, manager)
 
979
        self.add_key_to_manager((b'key1',), locations, block, manager)
1000
980
        result_order = []
1001
981
        for record in manager.get_record_stream():
1002
982
            result_order.append(record.key)
1003
983
            text = self._texts[record.key]
1004
984
            self.assertEqual(text, record.get_bytes_as('fulltext'))
1005
 
        self.assertEqual([('key2',), ('key1',)], result_order)
 
985
        self.assertEqual([(b'key2',), (b'key1',)], result_order)
1006
986
 
1007
987
    def test__wire_bytes_no_keys(self):
1008
988
        locations, block = self.make_block(self._texts)
1012
992
        # We should have triggered a strip, since we aren't using any content
1013
993
        stripped_block = manager._block.to_bytes()
1014
994
        self.assertTrue(block_length > len(stripped_block))
1015
 
        empty_z_header = zlib.compress('')
1016
 
        self.assertEqual('groupcompress-block\n'
1017
 
                         '8\n' # len(compress(''))
1018
 
                         '0\n' # len('')
1019
 
                         '%d\n'# compressed block len
1020
 
                         '%s'  # zheader
1021
 
                         '%s'  # block
 
995
        empty_z_header = zlib.compress(b'')
 
996
        self.assertEqual(b'groupcompress-block\n'
 
997
                         b'8\n' # len(compress(''))
 
998
                         b'0\n' # len('')
 
999
                         b'%d\n'# compressed block len
 
1000
                         b'%s'  # zheader
 
1001
                         b'%s'  # block
1022
1002
                         % (len(stripped_block), empty_z_header,
1023
1003
                            stripped_block),
1024
1004
                         wire_bytes)
1026
1006
    def test__wire_bytes(self):
1027
1007
        locations, block = self.make_block(self._texts)
1028
1008
        manager = groupcompress._LazyGroupContentManager(block)
1029
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1030
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1009
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1010
        self.add_key_to_manager((b'key4',), locations, block, manager)
1031
1011
        block_bytes = block.to_bytes()
1032
1012
        wire_bytes = manager._wire_bytes()
1033
1013
        (storage_kind, z_header_len, header_len,
1034
 
         block_len, rest) = wire_bytes.split('\n', 4)
 
1014
         block_len, rest) = wire_bytes.split(b'\n', 4)
1035
1015
        z_header_len = int(z_header_len)
1036
1016
        header_len = int(header_len)
1037
1017
        block_len = int(block_len)
1038
 
        self.assertEqual('groupcompress-block', storage_kind)
 
1018
        self.assertEqual(b'groupcompress-block', storage_kind)
1039
1019
        self.assertEqual(34, z_header_len)
1040
1020
        self.assertEqual(26, header_len)
1041
1021
        self.assertEqual(len(block_bytes), block_len)
1042
1022
        z_header = rest[:z_header_len]
1043
1023
        header = zlib.decompress(z_header)
1044
1024
        self.assertEqual(header_len, len(header))
1045
 
        entry1 = locations[('key1',)]
1046
 
        entry4 = locations[('key4',)]
1047
 
        self.assertEqualDiff('key1\n'
1048
 
                             '\n'  # no parents
1049
 
                             '%d\n' # start offset
1050
 
                             '%d\n' # end offset
1051
 
                             'key4\n'
1052
 
                             '\n'
1053
 
                             '%d\n'
1054
 
                             '%d\n'
 
1025
        entry1 = locations[(b'key1',)]
 
1026
        entry4 = locations[(b'key4',)]
 
1027
        self.assertEqualDiff(b'key1\n'
 
1028
                             b'\n'  # no parents
 
1029
                             b'%d\n' # start offset
 
1030
                             b'%d\n' # end offset
 
1031
                             b'key4\n'
 
1032
                             b'\n'
 
1033
                             b'%d\n'
 
1034
                             b'%d\n'
1055
1035
                             % (entry1[0], entry1[1],
1056
1036
                                entry4[0], entry4[1]),
1057
1037
                            header)
1061
1041
    def test_from_bytes(self):
1062
1042
        locations, block = self.make_block(self._texts)
1063
1043
        manager = groupcompress._LazyGroupContentManager(block)
1064
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1065
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1044
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1045
        self.add_key_to_manager((b'key4',), locations, block, manager)
1066
1046
        wire_bytes = manager._wire_bytes()
1067
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
1047
        self.assertStartsWith(wire_bytes, b'groupcompress-block\n')
1068
1048
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
1069
1049
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
1070
1050
        self.assertEqual(2, len(manager._factories))
1074
1054
            result_order.append(record.key)
1075
1055
            text = self._texts[record.key]
1076
1056
            self.assertEqual(text, record.get_bytes_as('fulltext'))
1077
 
        self.assertEqual([('key1',), ('key4',)], result_order)
 
1057
        self.assertEqual([(b'key1',), (b'key4',)], result_order)
1078
1058
 
1079
1059
    def test__check_rebuild_no_changes(self):
1080
1060
        block, manager = self.make_block_and_full_manager(self._texts)
1085
1065
        locations, block = self.make_block(self._texts)
1086
1066
        manager = groupcompress._LazyGroupContentManager(block)
1087
1067
        # Request just the first key, which should trigger a 'strip' action
1088
 
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1068
        self.add_key_to_manager((b'key1',), locations, block, manager)
1089
1069
        manager._check_rebuild_block()
1090
1070
        self.assertIsNot(block, manager._block)
1091
1071
        self.assertTrue(block._content_length > manager._block._content_length)
1092
1072
        # We should be able to still get the content out of this block, though
1093
1073
        # it should only have 1 entry
1094
1074
        for record in manager.get_record_stream():
1095
 
            self.assertEqual(('key1',), record.key)
 
1075
            self.assertEqual((b'key1',), record.key)
1096
1076
            self.assertEqual(self._texts[record.key],
1097
1077
                             record.get_bytes_as('fulltext'))
1098
1078
 
1100
1080
        locations, block = self.make_block(self._texts)
1101
1081
        manager = groupcompress._LazyGroupContentManager(block)
1102
1082
        # Request a small key in the middle should trigger a 'rebuild'
1103
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1083
        self.add_key_to_manager((b'key4',), locations, block, manager)
1104
1084
        manager._check_rebuild_block()
1105
1085
        self.assertIsNot(block, manager._block)
1106
1086
        self.assertTrue(block._content_length > manager._block._content_length)
1107
1087
        for record in manager.get_record_stream():
1108
 
            self.assertEqual(('key4',), record.key)
 
1088
            self.assertEqual((b'key4',), record.key)
1109
1089
            self.assertEqual(self._texts[record.key],
1110
1090
                             record.get_bytes_as('fulltext'))
1111
1091
 
1145
1125
            get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1146
1126
        gc = manager._make_group_compressor()
1147
1127
        self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1148
 
        self.add_key_to_manager(('key3',), locations, old_block, manager)
1149
 
        self.add_key_to_manager(('key4',), locations, old_block, manager)
 
1128
        self.add_key_to_manager((b'key3',), locations, old_block, manager)
 
1129
        self.add_key_to_manager((b'key4',), locations, old_block, manager)
1150
1130
        action, last_byte, total_bytes = manager._check_rebuild_action()
1151
1131
        self.assertEqual('rebuild', action)
1152
1132
        manager._rebuild_block()
1173
1153
 
1174
1154
    def test_check_is_well_utilized_mixed_keys(self):
1175
1155
        texts = {}
1176
 
        f1k1 = ('f1', 'k1')
1177
 
        f1k2 = ('f1', 'k2')
1178
 
        f2k1 = ('f2', 'k1')
1179
 
        f2k2 = ('f2', 'k2')
1180
 
        texts[f1k1] = self._texts[('key1',)]
1181
 
        texts[f1k2] = self._texts[('key2',)]
1182
 
        texts[f2k1] = self._texts[('key3',)]
1183
 
        texts[f2k2] = self._texts[('key4',)]
 
1156
        f1k1 = (b'f1', b'k1')
 
1157
        f1k2 = (b'f1', b'k2')
 
1158
        f2k1 = (b'f2', b'k1')
 
1159
        f2k2 = (b'f2', b'k2')
 
1160
        texts[f1k1] = self._texts[(b'key1',)]
 
1161
        texts[f1k2] = self._texts[(b'key2',)]
 
1162
        texts[f2k1] = self._texts[(b'key3',)]
 
1163
        texts[f2k2] = self._texts[(b'key4',)]
1184
1164
        block, manager = self.make_block_and_full_manager(texts)
1185
1165
        self.assertFalse(manager.check_is_well_utilized())
1186
1166
        manager._full_enough_block_size = block._content_length
1194
1174
        locations, block = self.make_block(self._texts)
1195
1175
        manager = groupcompress._LazyGroupContentManager(block)
1196
1176
        manager._full_enough_block_size = block._content_length
1197
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1198
 
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1177
        self.add_key_to_manager((b'key1',), locations, block, manager)
 
1178
        self.add_key_to_manager((b'key2',), locations, block, manager)
1199
1179
        # Just using the content from key1 and 2 is not enough to be considered
1200
1180
        # 'complete'
1201
1181
        self.assertFalse(manager.check_is_well_utilized())
1202
1182
        # However if we add key3, then we have enough, as we only require 75%
1203
1183
        # consumption
1204
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1184
        self.add_key_to_manager((b'key4',), locations, block, manager)
1205
1185
        self.assertTrue(manager.check_is_well_utilized())
1206
1186
 
1207
1187
 
1224
1204
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1225
1205
                         " (('parent1',), ('parent2',)))",
1226
1206
                         repr(bd))
1227