109
115
# reading something that is in the compressor stream already.
110
116
compressor = self.compressor()
111
117
sha1_1, _, _, _ = compressor.compress(('label',),
112
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
118
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
119
expected_lines = list(compressor.chunks)
114
120
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
121
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
122
# get the first out
117
self.assertEqual(('strange\ncommon long line\n'
118
'that needs a 16 byte match\n', sha1_1),
123
self.assertEqual((b'strange\ncommon long line\n'
124
b'that needs a 16 byte match\n', sha1_1),
119
125
compressor.extract(('label',)))
121
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
'different\n', sha1_2),
127
self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
128
b'different\n', sha1_2),
123
129
compressor.extract(('newlabel',)))
125
131
def test_pop_last(self):
126
132
compressor = self.compressor()
127
133
_, _, _, _ = compressor.compress(('key1',),
128
'some text\nfor the first entry\n', None)
134
b'some text\nfor the first entry\n', None)
129
135
expected_lines = list(compressor.chunks)
130
136
_, _, _, _ = compressor.compress(('key2',),
131
'some text\nfor the second entry\n', None)
137
b'some text\nfor the second entry\n', None)
132
138
compressor.pop_last()
133
139
self.assertEqual(expected_lines, compressor.chunks)
141
147
def test_stats(self):
142
148
compressor = self.compressor()
143
149
compressor.compress(('label',),
145
'common very very long line\n'
146
'plus more text\n', None)
151
b'common very very long line\n'
152
b'plus more text\n', None)
147
153
compressor.compress(('newlabel',),
148
'common very very long line\n'
151
'moredifferent\n', None)
154
b'common very very long line\n'
157
b'moredifferent\n', None)
152
158
compressor.compress(('label3',),
154
'common very very long line\n'
157
'moredifferent\n', None)
160
b'common very very long line\n'
163
b'moredifferent\n', None)
158
164
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
160
166
def test_two_nosha_delta(self):
161
167
compressor = self.compressor()
162
168
sha1_1, _, _, _ = compressor.compress(('label',),
163
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
169
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
170
expected_lines = list(compressor.chunks)
165
171
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
self.assertEqual(sha_string('common long line\n'
168
'that needs a 16 byte match\n'
169
'different\n'), sha1_2)
172
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
self.assertEqual(sha_string(b'common long line\n'
174
b'that needs a 16 byte match\n'
175
b'different\n'), sha1_2)
170
176
expected_lines.extend([
171
177
# 'delta', delta length
173
179
# source and target length
175
181
# copy the line common
176
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
182
b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
183
# add the line different, and the trailing newline
178
'\x0adifferent\n', # insert 10 bytes
184
b'\x0adifferent\n', # insert 10 bytes
180
186
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
187
self.assertEqual(sum(map(len, expected_lines)), end_point)
186
192
compressor = self.compressor()
187
193
sha1_1, _, _, _ = compressor.compress(('label',),
188
'strange\ncommon very very long line\nwith some extra text\n', None)
194
b'strange\ncommon very very long line\nwith some extra text\n', None)
189
195
sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
'different\nmoredifferent\nand then some more\n', None)
196
b'different\nmoredifferent\nand then some more\n', None)
191
197
expected_lines = list(compressor.chunks)
192
198
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
'new\ncommon very very long line\nwith some extra text\n'
194
'different\nmoredifferent\nand then some more\n',
199
b'new\ncommon very very long line\nwith some extra text\n'
200
b'different\nmoredifferent\nand then some more\n',
196
202
self.assertEqual(
197
sha_string('new\ncommon very very long line\nwith some extra text\n'
198
'different\nmoredifferent\nand then some more\n'),
203
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
204
b'different\nmoredifferent\nand then some more\n'),
200
206
expected_lines.extend([
201
207
# 'delta', delta length
203
209
# source and target length
207
213
# Copy of first parent 'common' range
208
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
214
b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
215
# Copy of second parent 'different' range
210
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
216
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
212
218
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
219
self.assertEqual(sum(map(len, expected_lines)), end_point)
220
226
def test_stats(self):
221
227
compressor = self.compressor()
222
228
compressor.compress(('label',),
224
'common very very long line\n'
225
'plus more text\n', None)
230
b'common very very long line\n'
231
b'plus more text\n', None)
226
232
compressor.compress(('newlabel',),
227
'common very very long line\n'
230
'moredifferent\n', None)
233
b'common very very long line\n'
236
b'moredifferent\n', None)
231
237
compressor.compress(('label3',),
233
'common very very long line\n'
236
'moredifferent\n', None)
239
b'common very very long line\n'
242
b'moredifferent\n', None)
237
243
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
239
245
def test_two_nosha_delta(self):
240
246
compressor = self.compressor()
241
247
sha1_1, _, _, _ = compressor.compress(('label',),
242
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
248
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
249
expected_lines = list(compressor.chunks)
244
250
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
self.assertEqual(sha_string('common long line\n'
247
'that needs a 16 byte match\n'
248
'different\n'), sha1_2)
251
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
self.assertEqual(sha_string(b'common long line\n'
253
b'that needs a 16 byte match\n'
254
b'different\n'), sha1_2)
249
255
expected_lines.extend([
250
256
# 'delta', delta length
254
260
# copy the line common
255
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
261
b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
262
# add the line different, and the trailing newline
257
'\x0adifferent\n', # insert 10 bytes
263
b'\x0adifferent\n', # insert 10 bytes
259
265
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
266
self.assertEqual(sum(map(len, expected_lines)), end_point)
265
271
compressor = self.compressor()
266
272
sha1_1, _, _, _ = compressor.compress(('label',),
267
'strange\ncommon very very long line\nwith some extra text\n', None)
273
b'strange\ncommon very very long line\nwith some extra text\n', None)
268
274
sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
'different\nmoredifferent\nand then some more\n', None)
275
b'different\nmoredifferent\nand then some more\n', None)
270
276
expected_lines = list(compressor.chunks)
271
277
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
'new\ncommon very very long line\nwith some extra text\n'
273
'different\nmoredifferent\nand then some more\n',
278
b'new\ncommon very very long line\nwith some extra text\n'
279
b'different\nmoredifferent\nand then some more\n',
275
281
self.assertEqual(
276
sha_string('new\ncommon very very long line\nwith some extra text\n'
277
'different\nmoredifferent\nand then some more\n'),
282
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
283
b'different\nmoredifferent\nand then some more\n'),
279
285
expected_lines.extend([
280
286
# 'delta', delta length
286
292
# Copy of first parent 'common' range
287
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
293
b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
294
# Copy of second parent 'different' range
289
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
295
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
291
297
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
298
self.assertEqual(sum(map(len, expected_lines)), end_point)
311
317
def test_from_empty_bytes(self):
312
318
self.assertRaises(ValueError,
313
groupcompress.GroupCompressBlock.from_bytes, '')
319
groupcompress.GroupCompressBlock.from_bytes, b'')
315
321
def test_from_minimal_bytes(self):
316
322
block = groupcompress.GroupCompressBlock.from_bytes(
318
324
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
325
self.assertIs(None, block._content)
320
self.assertEqual('', block._z_content)
326
self.assertEqual(b'', block._z_content)
321
327
block._ensure_content()
322
self.assertEqual('', block._content)
323
self.assertEqual('', block._z_content)
328
self.assertEqual(b'', block._content)
329
self.assertEqual(b'', block._z_content)
324
330
block._ensure_content() # Ensure content is safe to call 2x
326
332
def test_from_invalid(self):
327
333
self.assertRaises(ValueError,
328
334
groupcompress.GroupCompressBlock.from_bytes,
329
'this is not a valid header')
335
b'this is not a valid header')
331
337
def test_from_bytes(self):
332
content = ('a tiny bit of content\n')
338
content = (b'a tiny bit of content\n')
333
339
z_content = zlib.compress(content)
335
'gcb1z\n' # group compress block v1 plain
336
'%d\n' # Length of compressed content
337
'%d\n' # Length of uncompressed content
338
'%s' # Compressed content
341
b'gcb1z\n' # group compress block v1 plain
342
b'%d\n' # Length of compressed content
343
b'%d\n' # Length of uncompressed content
344
b'%s' # Compressed content
339
345
) % (len(z_content), len(content), z_content)
340
346
block = groupcompress.GroupCompressBlock.from_bytes(
347
353
self.assertEqual(z_content, block._z_content)
348
354
self.assertEqual(content, block._content)
356
def test_to_chunks(self):
357
content_chunks = [b'this is some content\n',
358
b'this content will be compressed\n']
359
content_len = sum(map(len, content_chunks))
360
content = b''.join(content_chunks)
361
gcb = groupcompress.GroupCompressBlock()
362
gcb.set_chunked_content(content_chunks, content_len)
363
total_len, block_chunks = gcb.to_chunks()
364
block_bytes = b''.join(block_chunks)
365
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
366
self.assertEqual(total_len, len(block_bytes))
367
self.assertEqual(gcb._content_length, content_len)
368
expected_header =(b'gcb1z\n' # group compress block v1 zlib
369
b'%d\n' # Length of compressed content
370
b'%d\n' # Length of uncompressed content
371
) % (gcb._z_content_length, gcb._content_length)
372
# The first chunk should be the header chunk. It is small, fixed size,
373
# and there is no compelling reason to split it up
374
self.assertEqual(expected_header, block_chunks[0])
375
self.assertStartsWith(block_bytes, expected_header)
376
remaining_bytes = block_bytes[len(expected_header):]
377
raw_bytes = zlib.decompress(remaining_bytes)
378
self.assertEqual(content, raw_bytes)
350
380
def test_to_bytes(self):
351
content = ('this is some content\n'
352
'this content will be compressed\n')
381
content = (b'this is some content\n'
382
b'this content will be compressed\n')
353
383
gcb = groupcompress.GroupCompressBlock()
354
384
gcb.set_content(content)
355
bytes = gcb.to_bytes()
385
data = gcb.to_bytes()
356
386
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
387
self.assertEqual(gcb._content_length, len(content))
358
expected_header =('gcb1z\n' # group compress block v1 zlib
359
'%d\n' # Length of compressed content
360
'%d\n' # Length of uncompressed content
388
expected_header =(b'gcb1z\n' # group compress block v1 zlib
389
b'%d\n' # Length of compressed content
390
b'%d\n' # Length of uncompressed content
361
391
) % (gcb._z_content_length, gcb._content_length)
362
self.assertStartsWith(bytes, expected_header)
363
remaining_bytes = bytes[len(expected_header):]
392
self.assertStartsWith(data, expected_header)
393
remaining_bytes = data[len(expected_header):]
364
394
raw_bytes = zlib.decompress(remaining_bytes)
365
395
self.assertEqual(content, raw_bytes)
367
397
# we should get the same results if using the chunked version
368
398
gcb = groupcompress.GroupCompressBlock()
369
gcb.set_chunked_content(['this is some content\n'
370
'this content will be compressed\n'],
399
gcb.set_chunked_content([b'this is some content\n'
400
b'this content will be compressed\n'],
373
bytes = gcb.to_bytes()
374
self.assertEqual(old_bytes, bytes)
403
data = gcb.to_bytes()
404
self.assertEqual(old_data, data)
376
406
def test_partial_decomp(self):
377
407
content_chunks = []
497
527
# Consider promoting 'as-requested' to general availability, and
498
528
# make this a VF interface test
499
529
vf = self.make_test_vf(False, dir='source')
500
vf.add_lines(('a',), (), ['lines\n'])
501
vf.add_lines(('b',), (), ['lines\n'])
502
vf.add_lines(('c',), (), ['lines\n'])
503
vf.add_lines(('d',), (), ['lines\n'])
530
vf.add_lines((b'a',), (), [b'lines\n'])
531
vf.add_lines((b'b',), (), [b'lines\n'])
532
vf.add_lines((b'c',), (), [b'lines\n'])
533
vf.add_lines((b'd',), (), [b'lines\n'])
505
535
keys = [record.key for record in vf.get_record_stream(
506
[('a',), ('b',), ('c',), ('d',)],
536
[(b'a',), (b'b',), (b'c',), (b'd',)],
507
537
'as-requested', False)]
508
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
538
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
509
539
keys = [record.key for record in vf.get_record_stream(
510
[('b',), ('a',), ('d',), ('c',)],
540
[(b'b',), (b'a',), (b'd',), (b'c',)],
511
541
'as-requested', False)]
512
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
542
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
514
544
# It should work even after being repacked into another VF
515
545
vf2 = self.make_test_vf(False, dir='target')
516
546
vf2.insert_record_stream(vf.get_record_stream(
517
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
547
[(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
520
550
keys = [record.key for record in vf2.get_record_stream(
521
[('a',), ('b',), ('c',), ('d',)],
551
[(b'a',), (b'b',), (b'c',), (b'd',)],
522
552
'as-requested', False)]
523
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
553
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
524
554
keys = [record.key for record in vf2.get_record_stream(
525
[('b',), ('a',), ('d',), ('c',)],
555
[(b'b',), (b'a',), (b'd',), (b'c',)],
526
556
'as-requested', False)]
527
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
557
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
559
def test_get_record_stream_max_bytes_to_index_default(self):
560
vf = self.make_test_vf(True, dir='source')
561
vf.add_lines((b'a',), (), [b'lines\n'])
563
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
564
self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
565
record._manager._get_compressor_settings())
567
def test_get_record_stream_accesses_compressor_settings(self):
568
vf = self.make_test_vf(True, dir='source')
569
vf.add_lines((b'a',), (), [b'lines\n'])
571
vf._max_bytes_to_index = 1234
572
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
573
self.assertEqual(dict(max_bytes_to_index=1234),
574
record._manager._get_compressor_settings())
577
def grouped_stream(revision_ids, first_parents=()):
578
parents = first_parents
579
for revision_id in revision_ids:
581
record = versionedfile.FulltextContentFactory(
583
b'some content that is\n'
584
b'identical except for\n'
585
b'revision_id:%s\n' % (revision_id,))
529
589
def test_insert_record_stream_reuses_blocks(self):
530
590
vf = self.make_test_vf(True, dir='source')
531
def grouped_stream(revision_ids, first_parents=()):
532
parents = first_parents
533
for revision_id in revision_ids:
535
record = versionedfile.FulltextContentFactory(
537
'some content that is\n'
538
'identical except for\n'
539
'revision_id:%s\n' % (revision_id,))
543
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
592
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
544
593
# Second group, e-h
545
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
546
first_parents=(('d',),)))
594
vf.insert_record_stream(self.grouped_stream(
595
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
548
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
597
stream = vf.get_record_stream(
598
[(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
551
600
for record in stream:
552
if record.key in [('a',), ('e',)]:
601
if record.key in [(b'a',), (b'e',)]:
553
602
self.assertEqual('groupcompress-block', record.storage_kind)
555
604
self.assertEqual('groupcompress-block-ref',
559
608
self.assertEqual(8, num_records)
562
self.assertIs(block_bytes[key], block_bytes[('a',)])
563
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
611
self.assertIs(block_bytes[key], block_bytes[(b'a',)])
612
self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
566
self.assertIs(block_bytes[key], block_bytes[('e',)])
567
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
615
self.assertIs(block_bytes[key], block_bytes[(b'e',)])
616
self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
568
617
# Now copy the blocks into another vf, and ensure that the blocks are
569
618
# preserved without creating new entries
570
619
vf2 = self.make_test_vf(True, dir='target')
620
keys = [(r.encode(),) for r in 'abcdefgh']
571
621
# ordering in 'groupcompress' order, should actually swap the groups in
572
622
# the target vf, but the groups themselves should not be disturbed.
573
623
def small_size_stream():
574
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
'groupcompress', False):
624
for record in vf.get_record_stream(keys, 'groupcompress', False):
576
625
record._manager._full_enough_block_size = \
577
626
record._manager._block._content_length
580
629
vf2.insert_record_stream(small_size_stream())
581
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
'groupcompress', False)
630
stream = vf2.get_record_stream(keys, 'groupcompress', False)
585
633
for record in stream:
591
639
def test_insert_record_stream_packs_on_the_fly(self):
592
640
vf = self.make_test_vf(True, dir='source')
593
def grouped_stream(revision_ids, first_parents=()):
594
parents = first_parents
595
for revision_id in revision_ids:
597
record = versionedfile.FulltextContentFactory(
599
'some content that is\n'
600
'identical except for\n'
601
'revision_id:%s\n' % (revision_id,))
605
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
642
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
606
643
# Second group, e-h
607
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
first_parents=(('d',),)))
644
vf.insert_record_stream(self.grouped_stream(
645
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
609
646
# Now copy the blocks into another vf, and see that the
610
647
# insert_record_stream rebuilt a new block on-the-fly because of
611
648
# under-utilization
612
649
vf2 = self.make_test_vf(True, dir='target')
650
keys = [(r.encode(),) for r in 'abcdefgh']
613
651
vf2.insert_record_stream(vf.get_record_stream(
614
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
'groupcompress', False)
652
keys, 'groupcompress', False))
653
stream = vf2.get_record_stream(keys, 'groupcompress', False)
619
656
# All of the records should be recombined into a single block
629
666
def test__insert_record_stream_no_reuse_block(self):
630
667
vf = self.make_test_vf(True, dir='source')
631
def grouped_stream(revision_ids, first_parents=()):
632
parents = first_parents
633
for revision_id in revision_ids:
635
record = versionedfile.FulltextContentFactory(
637
'some content that is\n'
638
'identical except for\n'
639
'revision_id:%s\n' % (revision_id,))
643
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
669
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
644
670
# Second group, e-h
645
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
646
first_parents=(('d',),)))
671
vf.insert_record_stream(self.grouped_stream(
672
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
648
self.assertEqual(8, len(list(vf.get_record_stream(
649
[(r,) for r in 'abcdefgh'],
650
'unordered', False))))
674
keys = [(r.encode(),) for r in 'abcdefgh']
675
self.assertEqual(8, len(list(
676
vf.get_record_stream(keys, 'unordered', False))))
651
677
# Now copy the blocks into another vf, and ensure that the blocks are
652
678
# preserved without creating new entries
653
679
vf2 = self.make_test_vf(True, dir='target')
654
680
# ordering in 'groupcompress' order, should actually swap the groups in
655
681
# the target vf, but the groups themselves should not be disturbed.
656
682
list(vf2._insert_record_stream(vf.get_record_stream(
657
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
683
keys, 'groupcompress', False),
658
684
reuse_blocks=False))
660
686
# After inserting with reuse_blocks=False, we should have everything in
661
687
# a single new block.
662
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
'groupcompress', False)
688
stream = vf2.get_record_stream(keys, 'groupcompress', False)
665
690
for record in stream:
666
691
if block is None:
721
746
self.do_inconsistent_inserts(inconsistency_fatal=False)
723
748
trace.warning = _trace_warning
724
self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
749
self.assertContainsRe(
751
r"^inconsistent details in skipped record: \(b?'b',\)"
752
r" \(b?'42 32 0 8', \(\(\),\)\)"
753
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
728
755
def test_inconsistent_redundant_inserts_raises(self):
729
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
756
e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
730
757
inconsistency_fatal=True)
731
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
733
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
" 0 8', \(\(\('a',\),\),\)\)")
758
self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
760
r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
761
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
736
763
def test_clear_cache(self):
737
764
vf = self.make_source_with_b(True, 'source')
739
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
766
for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
742
769
self.assertTrue(len(vf._group_cache) > 0)
744
771
self.assertEqual(0, len(vf._group_cache))
774
class TestGroupCompressConfig(tests.TestCaseWithTransport):
776
def make_test_vf(self):
777
t = self.get_transport('.')
779
factory = groupcompress.make_pack_factory(graph=True,
780
delta=False, keylength=1, inconsistency_fatal=True)
782
self.addCleanup(groupcompress.cleanup_pack_group, vf)
785
def test_max_bytes_to_index_default(self):
786
vf = self.make_test_vf()
787
gc = vf._make_group_compressor()
788
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
789
vf._max_bytes_to_index)
790
if isinstance(gc, groupcompress.PyrexGroupCompressor):
791
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
792
gc._delta_index._max_bytes_to_index)
794
def test_max_bytes_to_index_in_config(self):
795
c = config.GlobalConfig()
796
c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
797
vf = self.make_test_vf()
798
gc = vf._make_group_compressor()
799
self.assertEqual(10000, vf._max_bytes_to_index)
800
if isinstance(gc, groupcompress.PyrexGroupCompressor):
801
self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
803
def test_max_bytes_to_index_bad_config(self):
804
c = config.GlobalConfig()
805
c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
806
vf = self.make_test_vf()
807
# TODO: This is triggering a warning, we might want to trap and make
808
# sure it is readable.
809
gc = vf._make_group_compressor()
810
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
811
vf._max_bytes_to_index)
812
if isinstance(gc, groupcompress.PyrexGroupCompressor):
813
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
814
gc._delta_index._max_bytes_to_index)
748
817
class StubGCVF(object):
749
818
def __init__(self, canned_get_blocks=None):
853
922
class TestLazyGroupCompress(tests.TestCaseWithTransport):
856
('key1',): "this is a text\n"
857
"with a reasonable amount of compressible bytes\n"
858
"which can be shared between various other texts\n",
859
('key2',): "another text\n"
860
"with a reasonable amount of compressible bytes\n"
861
"which can be shared between various other texts\n",
862
('key3',): "yet another text which won't be extracted\n"
863
"with a reasonable amount of compressible bytes\n"
864
"which can be shared between various other texts\n",
865
('key4',): "this will be extracted\n"
866
"but references most of its bytes from\n"
867
"yet another text which won't be extracted\n"
868
"with a reasonable amount of compressible bytes\n"
869
"which can be shared between various other texts\n",
925
(b'key1',): b"this is a text\n"
926
b"with a reasonable amount of compressible bytes\n"
927
b"which can be shared between various other texts\n",
928
(b'key2',): b"another text\n"
929
b"with a reasonable amount of compressible bytes\n"
930
b"which can be shared between various other texts\n",
931
(b'key3',): b"yet another text which won't be extracted\n"
932
b"with a reasonable amount of compressible bytes\n"
933
b"which can be shared between various other texts\n",
934
(b'key4',): b"this will be extracted\n"
935
b"but references most of its bytes from\n"
936
b"yet another text which won't be extracted\n"
937
b"with a reasonable amount of compressible bytes\n"
938
b"which can be shared between various other texts\n",
871
940
def make_block(self, key_to_text):
872
941
"""Create a GroupCompressBlock, filling it with the given texts."""
894
963
def test_get_fulltexts(self):
895
964
locations, block = self.make_block(self._texts)
896
965
manager = groupcompress._LazyGroupContentManager(block)
897
self.add_key_to_manager(('key1',), locations, block, manager)
898
self.add_key_to_manager(('key2',), locations, block, manager)
966
self.add_key_to_manager((b'key1',), locations, block, manager)
967
self.add_key_to_manager((b'key2',), locations, block, manager)
899
968
result_order = []
900
969
for record in manager.get_record_stream():
901
970
result_order.append(record.key)
902
971
text = self._texts[record.key]
903
972
self.assertEqual(text, record.get_bytes_as('fulltext'))
904
self.assertEqual([('key1',), ('key2',)], result_order)
973
self.assertEqual([(b'key1',), (b'key2',)], result_order)
906
975
# If we build the manager in the opposite order, we should get them
907
976
# back in the opposite order
908
977
manager = groupcompress._LazyGroupContentManager(block)
909
self.add_key_to_manager(('key2',), locations, block, manager)
910
self.add_key_to_manager(('key1',), locations, block, manager)
978
self.add_key_to_manager((b'key2',), locations, block, manager)
979
self.add_key_to_manager((b'key1',), locations, block, manager)
911
980
result_order = []
912
981
for record in manager.get_record_stream():
913
982
result_order.append(record.key)
914
983
text = self._texts[record.key]
915
984
self.assertEqual(text, record.get_bytes_as('fulltext'))
916
self.assertEqual([('key2',), ('key1',)], result_order)
985
self.assertEqual([(b'key2',), (b'key1',)], result_order)
918
987
def test__wire_bytes_no_keys(self):
919
988
locations, block = self.make_block(self._texts)
937
1006
def test__wire_bytes(self):
938
1007
locations, block = self.make_block(self._texts)
939
1008
manager = groupcompress._LazyGroupContentManager(block)
940
self.add_key_to_manager(('key1',), locations, block, manager)
941
self.add_key_to_manager(('key4',), locations, block, manager)
1009
self.add_key_to_manager((b'key1',), locations, block, manager)
1010
self.add_key_to_manager((b'key4',), locations, block, manager)
942
1011
block_bytes = block.to_bytes()
943
1012
wire_bytes = manager._wire_bytes()
944
1013
(storage_kind, z_header_len, header_len,
945
block_len, rest) = wire_bytes.split('\n', 4)
1014
block_len, rest) = wire_bytes.split(b'\n', 4)
946
1015
z_header_len = int(z_header_len)
947
1016
header_len = int(header_len)
948
1017
block_len = int(block_len)
949
self.assertEqual('groupcompress-block', storage_kind)
1018
self.assertEqual(b'groupcompress-block', storage_kind)
950
1019
self.assertEqual(34, z_header_len)
951
1020
self.assertEqual(26, header_len)
952
1021
self.assertEqual(len(block_bytes), block_len)
953
1022
z_header = rest[:z_header_len]
954
1023
header = zlib.decompress(z_header)
955
1024
self.assertEqual(header_len, len(header))
956
entry1 = locations[('key1',)]
957
entry4 = locations[('key4',)]
958
self.assertEqualDiff('key1\n'
960
'%d\n' # start offset
1025
entry1 = locations[(b'key1',)]
1026
entry4 = locations[(b'key4',)]
1027
self.assertEqualDiff(b'key1\n'
1029
b'%d\n' # start offset
1030
b'%d\n' # end offset
966
1035
% (entry1[0], entry1[1],
967
1036
entry4[0], entry4[1]),
1011
1080
locations, block = self.make_block(self._texts)
1012
1081
manager = groupcompress._LazyGroupContentManager(block)
1013
1082
# Request a small key in the middle should trigger a 'rebuild'
1014
self.add_key_to_manager(('key4',), locations, block, manager)
1083
self.add_key_to_manager((b'key4',), locations, block, manager)
1015
1084
manager._check_rebuild_block()
1016
1085
self.assertIsNot(block, manager._block)
1017
1086
self.assertTrue(block._content_length > manager._block._content_length)
1018
1087
for record in manager.get_record_stream():
1019
self.assertEqual(('key4',), record.key)
1088
self.assertEqual((b'key4',), record.key)
1020
1089
self.assertEqual(self._texts[record.key],
1021
1090
record.get_bytes_as('fulltext'))
1092
def test_manager_default_compressor_settings(self):
1093
locations, old_block = self.make_block(self._texts)
1094
manager = groupcompress._LazyGroupContentManager(old_block)
1095
gcvf = groupcompress.GroupCompressVersionedFiles
1096
# It doesn't greedily evaluate _max_bytes_to_index
1097
self.assertIs(None, manager._compressor_settings)
1098
self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1099
manager._get_compressor_settings())
1101
def test_manager_custom_compressor_settings(self):
1102
locations, old_block = self.make_block(self._texts)
1104
def compressor_settings():
1105
called.append('called')
1107
manager = groupcompress._LazyGroupContentManager(old_block,
1108
get_compressor_settings=compressor_settings)
1109
gcvf = groupcompress.GroupCompressVersionedFiles
1110
# It doesn't greedily evaluate compressor_settings
1111
self.assertIs(None, manager._compressor_settings)
1112
self.assertEqual((10,), manager._get_compressor_settings())
1113
self.assertEqual((10,), manager._get_compressor_settings())
1114
self.assertEqual((10,), manager._compressor_settings)
1115
# Only called 1 time
1116
self.assertEqual(['called'], called)
1118
def test__rebuild_handles_compressor_settings(self):
1119
if not isinstance(groupcompress.GroupCompressor,
1120
groupcompress.PyrexGroupCompressor):
1121
raise tests.TestNotApplicable('pure-python compressor'
1122
' does not handle compressor_settings')
1123
locations, old_block = self.make_block(self._texts)
1124
manager = groupcompress._LazyGroupContentManager(old_block,
1125
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1126
gc = manager._make_group_compressor()
1127
self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1128
self.add_key_to_manager((b'key3',), locations, old_block, manager)
1129
self.add_key_to_manager((b'key4',), locations, old_block, manager)
1130
action, last_byte, total_bytes = manager._check_rebuild_action()
1131
self.assertEqual('rebuild', action)
1132
manager._rebuild_block()
1133
new_block = manager._block
1134
self.assertIsNot(old_block, new_block)
1135
# Because of the new max_bytes_to_index, we do a poor job of
1136
# rebuilding. This is a side-effect of the change, but at least it does
1137
# show the setting had an effect.
1138
self.assertTrue(old_block._content_length < new_block._content_length)
1023
1140
def test_check_is_well_utilized_all_keys(self):
1024
1141
block, manager = self.make_block_and_full_manager(self._texts)
1025
1142
self.assertFalse(manager.check_is_well_utilized())
1057
1174
locations, block = self.make_block(self._texts)
1058
1175
manager = groupcompress._LazyGroupContentManager(block)
1059
1176
manager._full_enough_block_size = block._content_length
1060
self.add_key_to_manager(('key1',), locations, block, manager)
1061
self.add_key_to_manager(('key2',), locations, block, manager)
1177
self.add_key_to_manager((b'key1',), locations, block, manager)
1178
self.add_key_to_manager((b'key2',), locations, block, manager)
1062
1179
# Just using the content from key1 and 2 is not enough to be considered
1064
1181
self.assertFalse(manager.check_is_well_utilized())
1065
1182
# However if we add key3, then we have enough, as we only require 75%
1067
self.add_key_to_manager(('key4',), locations, block, manager)
1184
self.add_key_to_manager((b'key4',), locations, block, manager)
1068
1185
self.assertTrue(manager.check_is_well_utilized())
1188
class Test_GCBuildDetails(tests.TestCase):
1190
def test_acts_like_tuple(self):
1191
# _GCBuildDetails inlines some of the data that used to be spread out
1192
# across a bunch of tuples
1193
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1194
('INDEX', 10, 20, 0, 5))
1195
self.assertEqual(4, len(bd))
1196
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1197
self.assertEqual(None, bd[1]) # Compression Parent is always None
1198
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1199
self.assertEqual(('group', None), bd[3]) # Record details
1201
def test__repr__(self):
1202
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1203
('INDEX', 10, 20, 0, 5))
1204
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1205
" (('parent1',), ('parent2',)))",