109
115
# reading something that is in the compressor stream already.
110
116
compressor = self.compressor()
111
117
sha1_1, _, _, _ = compressor.compress(('label',),
112
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
118
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
119
expected_lines = list(compressor.chunks)
114
120
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
121
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
122
# get the first out
117
self.assertEqual(('strange\ncommon long line\n'
118
'that needs a 16 byte match\n', sha1_1),
123
self.assertEqual((b'strange\ncommon long line\n'
124
b'that needs a 16 byte match\n', sha1_1),
119
125
compressor.extract(('label',)))
121
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
'different\n', sha1_2),
127
self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
128
b'different\n', sha1_2),
123
129
compressor.extract(('newlabel',)))
125
131
def test_pop_last(self):
126
132
compressor = self.compressor()
127
133
_, _, _, _ = compressor.compress(('key1',),
128
'some text\nfor the first entry\n', None)
134
b'some text\nfor the first entry\n', None)
129
135
expected_lines = list(compressor.chunks)
130
136
_, _, _, _ = compressor.compress(('key2',),
131
'some text\nfor the second entry\n', None)
137
b'some text\nfor the second entry\n', None)
132
138
compressor.pop_last()
133
139
self.assertEqual(expected_lines, compressor.chunks)
141
147
def test_stats(self):
142
148
compressor = self.compressor()
143
149
compressor.compress(('label',),
145
'common very very long line\n'
146
'plus more text\n', None)
151
b'common very very long line\n'
152
b'plus more text\n', None)
147
153
compressor.compress(('newlabel',),
148
'common very very long line\n'
151
'moredifferent\n', None)
154
b'common very very long line\n'
157
b'moredifferent\n', None)
152
158
compressor.compress(('label3',),
154
'common very very long line\n'
157
'moredifferent\n', None)
160
b'common very very long line\n'
163
b'moredifferent\n', None)
158
164
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
160
166
def test_two_nosha_delta(self):
161
167
compressor = self.compressor()
162
168
sha1_1, _, _, _ = compressor.compress(('label',),
163
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
169
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
170
expected_lines = list(compressor.chunks)
165
171
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
self.assertEqual(sha_string('common long line\n'
168
'that needs a 16 byte match\n'
169
'different\n'), sha1_2)
172
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
self.assertEqual(sha_string(b'common long line\n'
174
b'that needs a 16 byte match\n'
175
b'different\n'), sha1_2)
170
176
expected_lines.extend([
171
177
# 'delta', delta length
173
179
# source and target length
175
181
# copy the line common
176
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
182
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
177
183
# add the line different, and the trailing newline
178
'\x0adifferent\n', # insert 10 bytes
184
b'\x0adifferent\n', # insert 10 bytes
180
186
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
187
self.assertEqual(sum(map(len, expected_lines)), end_point)
186
192
compressor = self.compressor()
187
193
sha1_1, _, _, _ = compressor.compress(('label',),
188
'strange\ncommon very very long line\nwith some extra text\n', None)
194
b'strange\ncommon very very long line\nwith some extra text\n', None)
189
195
sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
'different\nmoredifferent\nand then some more\n', None)
196
b'different\nmoredifferent\nand then some more\n', None)
191
197
expected_lines = list(compressor.chunks)
192
198
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
'new\ncommon very very long line\nwith some extra text\n'
194
'different\nmoredifferent\nand then some more\n',
199
b'new\ncommon very very long line\nwith some extra text\n'
200
b'different\nmoredifferent\nand then some more\n',
196
202
self.assertEqual(
197
sha_string('new\ncommon very very long line\nwith some extra text\n'
198
'different\nmoredifferent\nand then some more\n'),
203
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
204
b'different\nmoredifferent\nand then some more\n'),
200
206
expected_lines.extend([
201
207
# 'delta', delta length
203
209
# source and target length
207
213
# Copy of first parent 'common' range
208
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
214
b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
215
# Copy of second parent 'different' range
210
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
216
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
212
218
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
219
self.assertEqual(sum(map(len, expected_lines)), end_point)
220
226
def test_stats(self):
221
227
compressor = self.compressor()
222
228
compressor.compress(('label',),
224
'common very very long line\n'
225
'plus more text\n', None)
230
b'common very very long line\n'
231
b'plus more text\n', None)
226
232
compressor.compress(('newlabel',),
227
'common very very long line\n'
230
'moredifferent\n', None)
233
b'common very very long line\n'
236
b'moredifferent\n', None)
231
237
compressor.compress(('label3',),
233
'common very very long line\n'
236
'moredifferent\n', None)
239
b'common very very long line\n'
242
b'moredifferent\n', None)
237
243
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
239
245
def test_two_nosha_delta(self):
240
246
compressor = self.compressor()
241
247
sha1_1, _, _, _ = compressor.compress(('label',),
242
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
248
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
249
expected_lines = list(compressor.chunks)
244
250
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
self.assertEqual(sha_string('common long line\n'
247
'that needs a 16 byte match\n'
248
'different\n'), sha1_2)
251
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
self.assertEqual(sha_string(b'common long line\n'
253
b'that needs a 16 byte match\n'
254
b'different\n'), sha1_2)
249
255
expected_lines.extend([
250
256
# 'delta', delta length
254
260
# copy the line common
255
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
261
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
256
262
# add the line different, and the trailing newline
257
'\x0adifferent\n', # insert 10 bytes
263
b'\x0adifferent\n', # insert 10 bytes
259
265
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
266
self.assertEqual(sum(map(len, expected_lines)), end_point)
265
271
compressor = self.compressor()
266
272
sha1_1, _, _, _ = compressor.compress(('label',),
267
'strange\ncommon very very long line\nwith some extra text\n', None)
273
b'strange\ncommon very very long line\nwith some extra text\n', None)
268
274
sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
'different\nmoredifferent\nand then some more\n', None)
275
b'different\nmoredifferent\nand then some more\n', None)
270
276
expected_lines = list(compressor.chunks)
271
277
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
'new\ncommon very very long line\nwith some extra text\n'
273
'different\nmoredifferent\nand then some more\n',
278
b'new\ncommon very very long line\nwith some extra text\n'
279
b'different\nmoredifferent\nand then some more\n',
275
281
self.assertEqual(
276
sha_string('new\ncommon very very long line\nwith some extra text\n'
277
'different\nmoredifferent\nand then some more\n'),
282
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
283
b'different\nmoredifferent\nand then some more\n'),
279
285
expected_lines.extend([
280
286
# 'delta', delta length
286
292
# Copy of first parent 'common' range
287
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
293
b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
294
# Copy of second parent 'different' range
289
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
295
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
291
297
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
298
self.assertEqual(sum(map(len, expected_lines)), end_point)
311
317
def test_from_empty_bytes(self):
312
318
self.assertRaises(ValueError,
313
groupcompress.GroupCompressBlock.from_bytes, '')
319
groupcompress.GroupCompressBlock.from_bytes, b'')
315
321
def test_from_minimal_bytes(self):
316
322
block = groupcompress.GroupCompressBlock.from_bytes(
318
324
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
325
self.assertIs(None, block._content)
320
self.assertEqual('', block._z_content)
326
self.assertEqual(b'', block._z_content)
321
327
block._ensure_content()
322
self.assertEqual('', block._content)
323
self.assertEqual('', block._z_content)
324
block._ensure_content() # Ensure content is safe to call 2x
328
self.assertEqual(b'', block._content)
329
self.assertEqual(b'', block._z_content)
330
block._ensure_content() # Ensure content is safe to call 2x
326
332
def test_from_invalid(self):
327
333
self.assertRaises(ValueError,
328
334
groupcompress.GroupCompressBlock.from_bytes,
329
'this is not a valid header')
335
b'this is not a valid header')
331
337
def test_from_bytes(self):
332
content = ('a tiny bit of content\n')
338
content = (b'a tiny bit of content\n')
333
339
z_content = zlib.compress(content)
335
'gcb1z\n' # group compress block v1 plain
336
'%d\n' # Length of compressed content
337
'%d\n' # Length of uncompressed content
338
'%s' # Compressed content
341
b'gcb1z\n' # group compress block v1 plain
342
b'%d\n' # Length of compressed content
343
b'%d\n' # Length of uncompressed content
344
b'%s' # Compressed content
339
345
) % (len(z_content), len(content), z_content)
340
346
block = groupcompress.GroupCompressBlock.from_bytes(
347
353
self.assertEqual(z_content, block._z_content)
348
354
self.assertEqual(content, block._content)
356
def test_to_chunks(self):
357
content_chunks = [b'this is some content\n',
358
b'this content will be compressed\n']
359
content_len = sum(map(len, content_chunks))
360
content = b''.join(content_chunks)
361
gcb = groupcompress.GroupCompressBlock()
362
gcb.set_chunked_content(content_chunks, content_len)
363
total_len, block_chunks = gcb.to_chunks()
364
block_bytes = b''.join(block_chunks)
365
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
366
self.assertEqual(total_len, len(block_bytes))
367
self.assertEqual(gcb._content_length, content_len)
368
expected_header = (b'gcb1z\n' # group compress block v1 zlib
369
b'%d\n' # Length of compressed content
370
b'%d\n' # Length of uncompressed content
371
) % (gcb._z_content_length, gcb._content_length)
372
# The first chunk should be the header chunk. It is small, fixed size,
373
# and there is no compelling reason to split it up
374
self.assertEqual(expected_header, block_chunks[0])
375
self.assertStartsWith(block_bytes, expected_header)
376
remaining_bytes = block_bytes[len(expected_header):]
377
raw_bytes = zlib.decompress(remaining_bytes)
378
self.assertEqual(content, raw_bytes)
350
380
def test_to_bytes(self):
351
content = ('this is some content\n'
352
'this content will be compressed\n')
381
content = (b'this is some content\n'
382
b'this content will be compressed\n')
353
383
gcb = groupcompress.GroupCompressBlock()
354
384
gcb.set_content(content)
355
bytes = gcb.to_bytes()
385
data = gcb.to_bytes()
356
386
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
387
self.assertEqual(gcb._content_length, len(content))
358
expected_header =('gcb1z\n' # group compress block v1 zlib
359
'%d\n' # Length of compressed content
360
'%d\n' # Length of uncompressed content
361
) % (gcb._z_content_length, gcb._content_length)
362
self.assertStartsWith(bytes, expected_header)
363
remaining_bytes = bytes[len(expected_header):]
388
expected_header = (b'gcb1z\n' # group compress block v1 zlib
389
b'%d\n' # Length of compressed content
390
b'%d\n' # Length of uncompressed content
391
) % (gcb._z_content_length, gcb._content_length)
392
self.assertStartsWith(data, expected_header)
393
remaining_bytes = data[len(expected_header):]
364
394
raw_bytes = zlib.decompress(remaining_bytes)
365
395
self.assertEqual(content, raw_bytes)
367
397
# we should get the same results if using the chunked version
368
398
gcb = groupcompress.GroupCompressBlock()
369
gcb.set_chunked_content(['this is some content\n'
370
'this content will be compressed\n'],
373
bytes = gcb.to_bytes()
374
self.assertEqual(old_bytes, bytes)
399
gcb.set_chunked_content([b'this is some content\n'
400
b'this content will be compressed\n'],
403
data = gcb.to_bytes()
404
self.assertEqual(old_data, data)
376
406
def test_partial_decomp(self):
377
407
content_chunks = []
488
518
def make_g_index_missing_parent(self):
489
519
graph_index = self.make_g_index('missing_parent', 1,
490
[(('parent', ), '2 78 2 10', ([],)),
491
(('tip', ), '2 78 2 10',
492
([('parent', ), ('missing-parent', )],)),
520
[((b'parent', ), b'2 78 2 10', ([],)),
521
((b'tip', ), b'2 78 2 10',
522
([(b'parent', ), (b'missing-parent', )],)),
494
524
return graph_index
496
526
def test_get_record_stream_as_requested(self):
497
527
# Consider promoting 'as-requested' to general availability, and
498
528
# make this a VF interface test
499
529
vf = self.make_test_vf(False, dir='source')
500
vf.add_lines(('a',), (), ['lines\n'])
501
vf.add_lines(('b',), (), ['lines\n'])
502
vf.add_lines(('c',), (), ['lines\n'])
503
vf.add_lines(('d',), (), ['lines\n'])
530
vf.add_lines((b'a',), (), [b'lines\n'])
531
vf.add_lines((b'b',), (), [b'lines\n'])
532
vf.add_lines((b'c',), (), [b'lines\n'])
533
vf.add_lines((b'd',), (), [b'lines\n'])
505
535
keys = [record.key for record in vf.get_record_stream(
506
[('a',), ('b',), ('c',), ('d',)],
507
'as-requested', False)]
508
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
536
[(b'a',), (b'b',), (b'c',), (b'd',)],
537
'as-requested', False)]
538
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
509
539
keys = [record.key for record in vf.get_record_stream(
510
[('b',), ('a',), ('d',), ('c',)],
511
'as-requested', False)]
512
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
540
[(b'b',), (b'a',), (b'd',), (b'c',)],
541
'as-requested', False)]
542
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
514
544
# It should work even after being repacked into another VF
515
545
vf2 = self.make_test_vf(False, dir='target')
516
546
vf2.insert_record_stream(vf.get_record_stream(
517
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
547
[(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
520
550
keys = [record.key for record in vf2.get_record_stream(
521
[('a',), ('b',), ('c',), ('d',)],
522
'as-requested', False)]
523
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
551
[(b'a',), (b'b',), (b'c',), (b'd',)],
552
'as-requested', False)]
553
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
524
554
keys = [record.key for record in vf2.get_record_stream(
525
[('b',), ('a',), ('d',), ('c',)],
526
'as-requested', False)]
527
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
555
[(b'b',), (b'a',), (b'd',), (b'c',)],
556
'as-requested', False)]
557
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
559
def test_get_record_stream_max_bytes_to_index_default(self):
560
vf = self.make_test_vf(True, dir='source')
561
vf.add_lines((b'a',), (), [b'lines\n'])
563
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
564
self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
565
record._manager._get_compressor_settings())
567
def test_get_record_stream_accesses_compressor_settings(self):
568
vf = self.make_test_vf(True, dir='source')
569
vf.add_lines((b'a',), (), [b'lines\n'])
571
vf._max_bytes_to_index = 1234
572
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
573
self.assertEqual(dict(max_bytes_to_index=1234),
574
record._manager._get_compressor_settings())
577
def grouped_stream(revision_ids, first_parents=()):
578
parents = first_parents
579
for revision_id in revision_ids:
581
record = versionedfile.FulltextContentFactory(
583
b'some content that is\n'
584
b'identical except for\n'
585
b'revision_id:%s\n' % (revision_id,))
529
589
def test_insert_record_stream_reuses_blocks(self):
530
590
vf = self.make_test_vf(True, dir='source')
531
def grouped_stream(revision_ids, first_parents=()):
532
parents = first_parents
533
for revision_id in revision_ids:
535
record = versionedfile.FulltextContentFactory(
537
'some content that is\n'
538
'identical except for\n'
539
'revision_id:%s\n' % (revision_id,))
543
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
592
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
544
593
# Second group, e-h
545
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
546
first_parents=(('d',),)))
594
vf.insert_record_stream(self.grouped_stream(
595
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
548
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
597
stream = vf.get_record_stream(
598
[(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
551
600
for record in stream:
552
if record.key in [('a',), ('e',)]:
601
if record.key in [(b'a',), (b'e',)]:
553
602
self.assertEqual('groupcompress-block', record.storage_kind)
555
604
self.assertEqual('groupcompress-block-ref',
559
608
self.assertEqual(8, num_records)
562
self.assertIs(block_bytes[key], block_bytes[('a',)])
563
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
611
self.assertIs(block_bytes[key], block_bytes[(b'a',)])
612
self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
566
self.assertIs(block_bytes[key], block_bytes[('e',)])
567
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
615
self.assertIs(block_bytes[key], block_bytes[(b'e',)])
616
self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
568
617
# Now copy the blocks into another vf, and ensure that the blocks are
569
618
# preserved without creating new entries
570
619
vf2 = self.make_test_vf(True, dir='target')
620
keys = [(r.encode(),) for r in 'abcdefgh']
571
621
# ordering in 'groupcompress' order, should actually swap the groups in
572
622
# the target vf, but the groups themselves should not be disturbed.
573
624
def small_size_stream():
574
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
'groupcompress', False):
625
for record in vf.get_record_stream(keys, 'groupcompress', False):
576
626
record._manager._full_enough_block_size = \
577
627
record._manager._block._content_length
580
630
vf2.insert_record_stream(small_size_stream())
581
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
'groupcompress', False)
631
stream = vf2.get_record_stream(keys, 'groupcompress', False)
585
634
for record in stream:
591
640
def test_insert_record_stream_packs_on_the_fly(self):
592
641
vf = self.make_test_vf(True, dir='source')
593
def grouped_stream(revision_ids, first_parents=()):
594
parents = first_parents
595
for revision_id in revision_ids:
597
record = versionedfile.FulltextContentFactory(
599
'some content that is\n'
600
'identical except for\n'
601
'revision_id:%s\n' % (revision_id,))
605
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
643
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
606
644
# Second group, e-h
607
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
first_parents=(('d',),)))
645
vf.insert_record_stream(self.grouped_stream(
646
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
609
647
# Now copy the blocks into another vf, and see that the
610
648
# insert_record_stream rebuilt a new block on-the-fly because of
611
649
# under-utilization
612
650
vf2 = self.make_test_vf(True, dir='target')
651
keys = [(r.encode(),) for r in 'abcdefgh']
613
652
vf2.insert_record_stream(vf.get_record_stream(
614
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
'groupcompress', False)
653
keys, 'groupcompress', False))
654
stream = vf2.get_record_stream(keys, 'groupcompress', False)
619
657
# All of the records should be recombined into a single block
629
667
def test__insert_record_stream_no_reuse_block(self):
630
668
vf = self.make_test_vf(True, dir='source')
631
def grouped_stream(revision_ids, first_parents=()):
632
parents = first_parents
633
for revision_id in revision_ids:
635
record = versionedfile.FulltextContentFactory(
637
'some content that is\n'
638
'identical except for\n'
639
'revision_id:%s\n' % (revision_id,))
643
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
670
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
644
671
# Second group, e-h
645
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
646
first_parents=(('d',),)))
672
vf.insert_record_stream(self.grouped_stream(
673
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
648
self.assertEqual(8, len(list(vf.get_record_stream(
649
[(r,) for r in 'abcdefgh'],
650
'unordered', False))))
675
keys = [(r.encode(),) for r in 'abcdefgh']
676
self.assertEqual(8, len(list(
677
vf.get_record_stream(keys, 'unordered', False))))
651
678
# Now copy the blocks into another vf, and ensure that the blocks are
652
679
# preserved without creating new entries
653
680
vf2 = self.make_test_vf(True, dir='target')
654
681
# ordering in 'groupcompress' order, should actually swap the groups in
655
682
# the target vf, but the groups themselves should not be disturbed.
656
683
list(vf2._insert_record_stream(vf.get_record_stream(
657
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
684
keys, 'groupcompress', False),
658
685
reuse_blocks=False))
660
687
# After inserting with reuse_blocks=False, we should have everything in
661
688
# a single new block.
662
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
'groupcompress', False)
689
stream = vf2.get_record_stream(keys, 'groupcompress', False)
665
691
for record in stream:
666
692
if block is None:
672
698
unvalidated = self.make_g_index_missing_parent()
673
699
combined = _mod_index.CombinedGraphIndex([unvalidated])
674
700
index = groupcompress._GCGraphIndex(combined,
675
is_locked=lambda: True, parents=True,
676
track_external_parent_refs=True)
701
is_locked=lambda: True, parents=True,
702
track_external_parent_refs=True)
677
703
index.scan_unvalidated_index(unvalidated)
678
704
self.assertEqual(
679
frozenset([('missing-parent',)]), index.get_missing_parents())
705
frozenset([(b'missing-parent',)]), index.get_missing_parents())
681
707
def test_track_external_parent_refs(self):
682
708
g_index = self.make_g_index('empty', 1, [])
683
709
mod_index = btree_index.BTreeBuilder(1, 1)
684
710
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
685
711
index = groupcompress._GCGraphIndex(combined,
686
is_locked=lambda: True, parents=True,
687
add_callback=mod_index.add_nodes,
688
track_external_parent_refs=True)
712
is_locked=lambda: True, parents=True,
713
add_callback=mod_index.add_nodes,
714
track_external_parent_refs=True)
689
715
index.add_records([
690
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
716
((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
691
717
self.assertEqual(
692
frozenset([('parent-1',), ('parent-2',)]),
718
frozenset([(b'parent-1',), (b'parent-2',)]),
693
719
index.get_missing_parents())
695
721
def make_source_with_b(self, a_parent, path):
696
722
source = self.make_test_vf(True, dir=path)
697
source.add_lines(('a',), (), ['lines\n'])
723
source.add_lines((b'a',), (), [b'lines\n'])
699
b_parents = (('a',),)
725
b_parents = ((b'a',),)
702
source.add_lines(('b',), b_parents, ['lines\n'])
728
source.add_lines((b'b',), b_parents, [b'lines\n'])
705
731
def do_inconsistent_inserts(self, inconsistency_fatal):
706
732
target = self.make_test_vf(True, dir='target',
707
733
inconsistency_fatal=inconsistency_fatal)
708
734
for x in range(2):
709
source = self.make_source_with_b(x==1, 'source%s' % x)
735
source = self.make_source_with_b(x == 1, 'source%s' % x)
710
736
target.insert_record_stream(source.get_record_stream(
711
[('b',)], 'unordered', False))
737
[(b'b',)], 'unordered', False))
713
739
def test_inconsistent_redundant_inserts_warn(self):
714
740
"""Should not insert a record that is already present."""
716
743
def warning(template, args):
717
744
warnings.append(template % args)
718
745
_trace_warning = trace.warning
721
748
self.do_inconsistent_inserts(inconsistency_fatal=False)
723
750
trace.warning = _trace_warning
724
self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
751
self.assertContainsRe(
753
r"^inconsistent details in skipped record: \(b?'b',\)"
754
r" \(b?'42 32 0 8', \(\(\),\)\)"
755
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
728
757
def test_inconsistent_redundant_inserts_raises(self):
729
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
758
e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
730
759
inconsistency_fatal=True)
731
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
733
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
" 0 8', \(\(\('a',\),\),\)\)")
760
self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
762
r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
763
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
736
765
def test_clear_cache(self):
737
766
vf = self.make_source_with_b(True, 'source')
739
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
768
for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
742
771
self.assertTrue(len(vf._group_cache) > 0)
744
773
self.assertEqual(0, len(vf._group_cache))
776
class TestGroupCompressConfig(tests.TestCaseWithTransport):
778
def make_test_vf(self):
779
t = self.get_transport('.')
781
factory = groupcompress.make_pack_factory(graph=True,
782
delta=False, keylength=1, inconsistency_fatal=True)
784
self.addCleanup(groupcompress.cleanup_pack_group, vf)
787
def test_max_bytes_to_index_default(self):
788
vf = self.make_test_vf()
789
gc = vf._make_group_compressor()
790
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
791
vf._max_bytes_to_index)
792
if isinstance(gc, groupcompress.PyrexGroupCompressor):
793
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
794
gc._delta_index._max_bytes_to_index)
796
def test_max_bytes_to_index_in_config(self):
797
c = config.GlobalConfig()
798
c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
799
vf = self.make_test_vf()
800
gc = vf._make_group_compressor()
801
self.assertEqual(10000, vf._max_bytes_to_index)
802
if isinstance(gc, groupcompress.PyrexGroupCompressor):
803
self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
805
def test_max_bytes_to_index_bad_config(self):
806
c = config.GlobalConfig()
807
c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
808
vf = self.make_test_vf()
809
# TODO: This is triggering a warning, we might want to trap and make
810
# sure it is readable.
811
gc = vf._make_group_compressor()
812
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
813
vf._max_bytes_to_index)
814
if isinstance(gc, groupcompress.PyrexGroupCompressor):
815
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
816
gc._delta_index._max_bytes_to_index)
748
819
class StubGCVF(object):
749
820
def __init__(self, canned_get_blocks=None):
750
821
self._group_cache = {}
751
822
self._canned_get_blocks = canned_get_blocks or []
752
824
def _get_blocks(self, read_memos):
753
825
return iter(self._canned_get_blocks)
756
828
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
829
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
759
831
def test_add_key_new_read_memo(self):
760
832
"""Adding a key with an uncached read_memo new to this batch adds that
761
833
read_memo to the list of memos to fetch.
853
925
class TestLazyGroupCompress(tests.TestCaseWithTransport):
856
('key1',): "this is a text\n"
857
"with a reasonable amount of compressible bytes\n"
858
"which can be shared between various other texts\n",
859
('key2',): "another text\n"
860
"with a reasonable amount of compressible bytes\n"
861
"which can be shared between various other texts\n",
862
('key3',): "yet another text which won't be extracted\n"
863
"with a reasonable amount of compressible bytes\n"
864
"which can be shared between various other texts\n",
865
('key4',): "this will be extracted\n"
866
"but references most of its bytes from\n"
867
"yet another text which won't be extracted\n"
868
"with a reasonable amount of compressible bytes\n"
869
"which can be shared between various other texts\n",
928
(b'key1',): b"this is a text\n"
929
b"with a reasonable amount of compressible bytes\n"
930
b"which can be shared between various other texts\n",
931
(b'key2',): b"another text\n"
932
b"with a reasonable amount of compressible bytes\n"
933
b"which can be shared between various other texts\n",
934
(b'key3',): b"yet another text which won't be extracted\n"
935
b"with a reasonable amount of compressible bytes\n"
936
b"which can be shared between various other texts\n",
937
(b'key4',): b"this will be extracted\n"
938
b"but references most of its bytes from\n"
939
b"yet another text which won't be extracted\n"
940
b"with a reasonable amount of compressible bytes\n"
941
b"which can be shared between various other texts\n",
871
944
def make_block(self, key_to_text):
872
945
"""Create a GroupCompressBlock, filling it with the given texts."""
873
946
compressor = groupcompress.GroupCompressor()
894
967
def test_get_fulltexts(self):
895
968
locations, block = self.make_block(self._texts)
896
969
manager = groupcompress._LazyGroupContentManager(block)
897
self.add_key_to_manager(('key1',), locations, block, manager)
898
self.add_key_to_manager(('key2',), locations, block, manager)
970
self.add_key_to_manager((b'key1',), locations, block, manager)
971
self.add_key_to_manager((b'key2',), locations, block, manager)
899
972
result_order = []
900
973
for record in manager.get_record_stream():
901
974
result_order.append(record.key)
902
975
text = self._texts[record.key]
903
976
self.assertEqual(text, record.get_bytes_as('fulltext'))
904
self.assertEqual([('key1',), ('key2',)], result_order)
977
self.assertEqual([(b'key1',), (b'key2',)], result_order)
906
979
# If we build the manager in the opposite order, we should get them
907
980
# back in the opposite order
908
981
manager = groupcompress._LazyGroupContentManager(block)
909
self.add_key_to_manager(('key2',), locations, block, manager)
910
self.add_key_to_manager(('key1',), locations, block, manager)
982
self.add_key_to_manager((b'key2',), locations, block, manager)
983
self.add_key_to_manager((b'key1',), locations, block, manager)
911
984
result_order = []
912
985
for record in manager.get_record_stream():
913
986
result_order.append(record.key)
914
987
text = self._texts[record.key]
915
988
self.assertEqual(text, record.get_bytes_as('fulltext'))
916
self.assertEqual([('key2',), ('key1',)], result_order)
989
self.assertEqual([(b'key2',), (b'key1',)], result_order)
918
991
def test__wire_bytes_no_keys(self):
919
992
locations, block = self.make_block(self._texts)
937
1010
def test__wire_bytes(self):
938
1011
locations, block = self.make_block(self._texts)
939
1012
manager = groupcompress._LazyGroupContentManager(block)
940
self.add_key_to_manager(('key1',), locations, block, manager)
941
self.add_key_to_manager(('key4',), locations, block, manager)
1013
self.add_key_to_manager((b'key1',), locations, block, manager)
1014
self.add_key_to_manager((b'key4',), locations, block, manager)
942
1015
block_bytes = block.to_bytes()
943
1016
wire_bytes = manager._wire_bytes()
944
1017
(storage_kind, z_header_len, header_len,
945
block_len, rest) = wire_bytes.split('\n', 4)
1018
block_len, rest) = wire_bytes.split(b'\n', 4)
946
1019
z_header_len = int(z_header_len)
947
1020
header_len = int(header_len)
948
1021
block_len = int(block_len)
949
self.assertEqual('groupcompress-block', storage_kind)
1022
self.assertEqual(b'groupcompress-block', storage_kind)
950
1023
self.assertEqual(34, z_header_len)
951
1024
self.assertEqual(26, header_len)
952
1025
self.assertEqual(len(block_bytes), block_len)
953
1026
z_header = rest[:z_header_len]
954
1027
header = zlib.decompress(z_header)
955
1028
self.assertEqual(header_len, len(header))
956
entry1 = locations[('key1',)]
957
entry4 = locations[('key4',)]
958
self.assertEqualDiff('key1\n'
960
'%d\n' # start offset
1029
entry1 = locations[(b'key1',)]
1030
entry4 = locations[(b'key4',)]
1031
self.assertEqualDiff(b'key1\n'
1033
b'%d\n' # start offset
1034
b'%d\n' # end offset
966
1039
% (entry1[0], entry1[1],
967
1040
entry4[0], entry4[1]),
969
1042
z_block = rest[z_header_len:]
970
1043
self.assertEqual(block_bytes, z_block)
972
1045
def test_from_bytes(self):
973
1046
locations, block = self.make_block(self._texts)
974
1047
manager = groupcompress._LazyGroupContentManager(block)
975
self.add_key_to_manager(('key1',), locations, block, manager)
976
self.add_key_to_manager(('key4',), locations, block, manager)
1048
self.add_key_to_manager((b'key1',), locations, block, manager)
1049
self.add_key_to_manager((b'key4',), locations, block, manager)
977
1050
wire_bytes = manager._wire_bytes()
978
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1051
self.assertStartsWith(wire_bytes, b'groupcompress-block\n')
979
1052
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
980
1053
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
981
1054
self.assertEqual(2, len(manager._factories))
1011
1084
locations, block = self.make_block(self._texts)
1012
1085
manager = groupcompress._LazyGroupContentManager(block)
1013
1086
# Request a small key in the middle should trigger a 'rebuild'
1014
self.add_key_to_manager(('key4',), locations, block, manager)
1087
self.add_key_to_manager((b'key4',), locations, block, manager)
1015
1088
manager._check_rebuild_block()
1016
1089
self.assertIsNot(block, manager._block)
1017
1090
self.assertTrue(block._content_length > manager._block._content_length)
1018
1091
for record in manager.get_record_stream():
1019
self.assertEqual(('key4',), record.key)
1092
self.assertEqual((b'key4',), record.key)
1020
1093
self.assertEqual(self._texts[record.key],
1021
1094
record.get_bytes_as('fulltext'))
1096
def test_manager_default_compressor_settings(self):
1097
locations, old_block = self.make_block(self._texts)
1098
manager = groupcompress._LazyGroupContentManager(old_block)
1099
gcvf = groupcompress.GroupCompressVersionedFiles
1100
# It doesn't greedily evaluate _max_bytes_to_index
1101
self.assertIs(None, manager._compressor_settings)
1102
self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1103
manager._get_compressor_settings())
1105
def test_manager_custom_compressor_settings(self):
1106
locations, old_block = self.make_block(self._texts)
1109
def compressor_settings():
1110
called.append('called')
1112
manager = groupcompress._LazyGroupContentManager(old_block,
1113
get_compressor_settings=compressor_settings)
1114
gcvf = groupcompress.GroupCompressVersionedFiles
1115
# It doesn't greedily evaluate compressor_settings
1116
self.assertIs(None, manager._compressor_settings)
1117
self.assertEqual((10,), manager._get_compressor_settings())
1118
self.assertEqual((10,), manager._get_compressor_settings())
1119
self.assertEqual((10,), manager._compressor_settings)
1120
# Only called 1 time
1121
self.assertEqual(['called'], called)
1123
def test__rebuild_handles_compressor_settings(self):
1124
if not isinstance(groupcompress.GroupCompressor,
1125
groupcompress.PyrexGroupCompressor):
1126
raise tests.TestNotApplicable('pure-python compressor'
1127
' does not handle compressor_settings')
1128
locations, old_block = self.make_block(self._texts)
1129
manager = groupcompress._LazyGroupContentManager(old_block,
1130
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1131
gc = manager._make_group_compressor()
1132
self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1133
self.add_key_to_manager((b'key3',), locations, old_block, manager)
1134
self.add_key_to_manager((b'key4',), locations, old_block, manager)
1135
action, last_byte, total_bytes = manager._check_rebuild_action()
1136
self.assertEqual('rebuild', action)
1137
manager._rebuild_block()
1138
new_block = manager._block
1139
self.assertIsNot(old_block, new_block)
1140
# Because of the new max_bytes_to_index, we do a poor job of
1141
# rebuilding. This is a side-effect of the change, but at least it does
1142
# show the setting had an effect.
1143
self.assertTrue(old_block._content_length < new_block._content_length)
1023
1145
def test_check_is_well_utilized_all_keys(self):
1024
1146
block, manager = self.make_block_and_full_manager(self._texts)
1025
1147
self.assertFalse(manager.check_is_well_utilized())
1057
1179
locations, block = self.make_block(self._texts)
1058
1180
manager = groupcompress._LazyGroupContentManager(block)
1059
1181
manager._full_enough_block_size = block._content_length
1060
self.add_key_to_manager(('key1',), locations, block, manager)
1061
self.add_key_to_manager(('key2',), locations, block, manager)
1182
self.add_key_to_manager((b'key1',), locations, block, manager)
1183
self.add_key_to_manager((b'key2',), locations, block, manager)
1062
1184
# Just using the content from key1 and 2 is not enough to be considered
1064
1186
self.assertFalse(manager.check_is_well_utilized())
1065
1187
# However if we add key3, then we have enough, as we only require 75%
1067
self.add_key_to_manager(('key4',), locations, block, manager)
1189
self.add_key_to_manager((b'key4',), locations, block, manager)
1068
1190
self.assertTrue(manager.check_is_well_utilized())
1193
class Test_GCBuildDetails(tests.TestCase):
1195
def test_acts_like_tuple(self):
1196
# _GCBuildDetails inlines some of the data that used to be spread out
1197
# across a bunch of tuples
1198
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1199
('INDEX', 10, 20, 0, 5))
1200
self.assertEqual(4, len(bd))
1201
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1202
self.assertEqual(None, bd[1]) # Compression Parent is always None
1203
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1204
self.assertEqual(('group', None), bd[3]) # Record details
1206
def test__repr__(self):
1207
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1208
('INDEX', 10, 20, 0, 5))
1209
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1210
" (('parent1',), ('parent2',)))",