115
109
# reading something that is in the compressor stream already.
116
110
compressor = self.compressor()
117
111
sha1_1, _, _, _ = compressor.compress(('label',),
118
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
112
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
119
113
expected_lines = list(compressor.chunks)
120
114
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
121
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
115
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
122
116
# get the first out
123
self.assertEqual((b'strange\ncommon long line\n'
124
b'that needs a 16 byte match\n', sha1_1),
117
self.assertEqual(('strange\ncommon long line\n'
118
'that needs a 16 byte match\n', sha1_1),
125
119
compressor.extract(('label',)))
127
self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
128
b'different\n', sha1_2),
121
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
'different\n', sha1_2),
129
123
compressor.extract(('newlabel',)))
131
125
def test_pop_last(self):
132
126
compressor = self.compressor()
133
127
_, _, _, _ = compressor.compress(('key1',),
134
b'some text\nfor the first entry\n', None)
128
'some text\nfor the first entry\n', None)
135
129
expected_lines = list(compressor.chunks)
136
130
_, _, _, _ = compressor.compress(('key2',),
137
b'some text\nfor the second entry\n', None)
131
'some text\nfor the second entry\n', None)
138
132
compressor.pop_last()
139
133
self.assertEqual(expected_lines, compressor.chunks)
147
141
def test_stats(self):
148
142
compressor = self.compressor()
149
143
compressor.compress(('label',),
151
b'common very very long line\n'
152
b'plus more text\n', None)
145
'common very very long line\n'
146
'plus more text\n', None)
153
147
compressor.compress(('newlabel',),
154
b'common very very long line\n'
157
b'moredifferent\n', None)
148
'common very very long line\n'
151
'moredifferent\n', None)
158
152
compressor.compress(('label3',),
160
b'common very very long line\n'
163
b'moredifferent\n', None)
154
'common very very long line\n'
157
'moredifferent\n', None)
164
158
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
166
160
def test_two_nosha_delta(self):
167
161
compressor = self.compressor()
168
162
sha1_1, _, _, _ = compressor.compress(('label',),
169
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
163
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
170
164
expected_lines = list(compressor.chunks)
171
165
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
172
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
self.assertEqual(sha_string(b'common long line\n'
174
b'that needs a 16 byte match\n'
175
b'different\n'), sha1_2)
166
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
self.assertEqual(sha_string('common long line\n'
168
'that needs a 16 byte match\n'
169
'different\n'), sha1_2)
176
170
expected_lines.extend([
177
171
# 'delta', delta length
179
173
# source and target length
181
175
# copy the line common
182
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
176
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
183
177
# add the line different, and the trailing newline
184
b'\x0adifferent\n', # insert 10 bytes
178
'\x0adifferent\n', # insert 10 bytes
186
180
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
187
181
self.assertEqual(sum(map(len, expected_lines)), end_point)
192
186
compressor = self.compressor()
193
187
sha1_1, _, _, _ = compressor.compress(('label',),
194
b'strange\ncommon very very long line\nwith some extra text\n', None)
188
'strange\ncommon very very long line\nwith some extra text\n', None)
195
189
sha1_2, _, _, _ = compressor.compress(('newlabel',),
196
b'different\nmoredifferent\nand then some more\n', None)
190
'different\nmoredifferent\nand then some more\n', None)
197
191
expected_lines = list(compressor.chunks)
198
192
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
199
b'new\ncommon very very long line\nwith some extra text\n'
200
b'different\nmoredifferent\nand then some more\n',
193
'new\ncommon very very long line\nwith some extra text\n'
194
'different\nmoredifferent\nand then some more\n',
202
196
self.assertEqual(
203
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
204
b'different\nmoredifferent\nand then some more\n'),
197
sha_string('new\ncommon very very long line\nwith some extra text\n'
198
'different\nmoredifferent\nand then some more\n'),
206
200
expected_lines.extend([
207
201
# 'delta', delta length
209
203
# source and target length
213
207
# Copy of first parent 'common' range
214
b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
208
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
215
209
# Copy of second parent 'different' range
216
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
210
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
218
212
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
219
213
self.assertEqual(sum(map(len, expected_lines)), end_point)
226
220
def test_stats(self):
227
221
compressor = self.compressor()
228
222
compressor.compress(('label',),
230
b'common very very long line\n'
231
b'plus more text\n', None)
224
'common very very long line\n'
225
'plus more text\n', None)
232
226
compressor.compress(('newlabel',),
233
b'common very very long line\n'
236
b'moredifferent\n', None)
227
'common very very long line\n'
230
'moredifferent\n', None)
237
231
compressor.compress(('label3',),
239
b'common very very long line\n'
242
b'moredifferent\n', None)
233
'common very very long line\n'
236
'moredifferent\n', None)
243
237
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
245
239
def test_two_nosha_delta(self):
246
240
compressor = self.compressor()
247
241
sha1_1, _, _, _ = compressor.compress(('label',),
248
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
242
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
249
243
expected_lines = list(compressor.chunks)
250
244
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
251
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
self.assertEqual(sha_string(b'common long line\n'
253
b'that needs a 16 byte match\n'
254
b'different\n'), sha1_2)
245
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
self.assertEqual(sha_string('common long line\n'
247
'that needs a 16 byte match\n'
248
'different\n'), sha1_2)
255
249
expected_lines.extend([
256
250
# 'delta', delta length
260
254
# copy the line common
261
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
255
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
262
256
# add the line different, and the trailing newline
263
b'\x0adifferent\n', # insert 10 bytes
257
'\x0adifferent\n', # insert 10 bytes
265
259
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
266
260
self.assertEqual(sum(map(len, expected_lines)), end_point)
271
265
compressor = self.compressor()
272
266
sha1_1, _, _, _ = compressor.compress(('label',),
273
b'strange\ncommon very very long line\nwith some extra text\n', None)
267
'strange\ncommon very very long line\nwith some extra text\n', None)
274
268
sha1_2, _, _, _ = compressor.compress(('newlabel',),
275
b'different\nmoredifferent\nand then some more\n', None)
269
'different\nmoredifferent\nand then some more\n', None)
276
270
expected_lines = list(compressor.chunks)
277
271
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
278
b'new\ncommon very very long line\nwith some extra text\n'
279
b'different\nmoredifferent\nand then some more\n',
272
'new\ncommon very very long line\nwith some extra text\n'
273
'different\nmoredifferent\nand then some more\n',
281
275
self.assertEqual(
282
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
283
b'different\nmoredifferent\nand then some more\n'),
276
sha_string('new\ncommon very very long line\nwith some extra text\n'
277
'different\nmoredifferent\nand then some more\n'),
285
279
expected_lines.extend([
286
280
# 'delta', delta length
292
286
# Copy of first parent 'common' range
293
b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
287
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
294
288
# Copy of second parent 'different' range
295
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
289
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
297
291
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
298
292
self.assertEqual(sum(map(len, expected_lines)), end_point)
317
311
def test_from_empty_bytes(self):
318
312
self.assertRaises(ValueError,
319
groupcompress.GroupCompressBlock.from_bytes, b'')
313
groupcompress.GroupCompressBlock.from_bytes, '')
321
315
def test_from_minimal_bytes(self):
322
316
block = groupcompress.GroupCompressBlock.from_bytes(
324
318
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
325
319
self.assertIs(None, block._content)
326
self.assertEqual(b'', block._z_content)
320
self.assertEqual('', block._z_content)
327
321
block._ensure_content()
328
self.assertEqual(b'', block._content)
329
self.assertEqual(b'', block._z_content)
330
block._ensure_content() # Ensure content is safe to call 2x
322
self.assertEqual('', block._content)
323
self.assertEqual('', block._z_content)
324
block._ensure_content() # Ensure content is safe to call 2x
332
326
def test_from_invalid(self):
333
327
self.assertRaises(ValueError,
334
328
groupcompress.GroupCompressBlock.from_bytes,
335
b'this is not a valid header')
329
'this is not a valid header')
337
331
def test_from_bytes(self):
338
content = (b'a tiny bit of content\n')
332
content = ('a tiny bit of content\n')
339
333
z_content = zlib.compress(content)
341
b'gcb1z\n' # group compress block v1 plain
342
b'%d\n' # Length of compressed content
343
b'%d\n' # Length of uncompressed content
344
b'%s' # Compressed content
335
'gcb1z\n' # group compress block v1 plain
336
'%d\n' # Length of compressed content
337
'%d\n' # Length of uncompressed content
338
'%s' # Compressed content
345
339
) % (len(z_content), len(content), z_content)
346
340
block = groupcompress.GroupCompressBlock.from_bytes(
353
347
self.assertEqual(z_content, block._z_content)
354
348
self.assertEqual(content, block._content)
356
def test_to_chunks(self):
357
content_chunks = [b'this is some content\n',
358
b'this content will be compressed\n']
359
content_len = sum(map(len, content_chunks))
360
content = b''.join(content_chunks)
361
gcb = groupcompress.GroupCompressBlock()
362
gcb.set_chunked_content(content_chunks, content_len)
363
total_len, block_chunks = gcb.to_chunks()
364
block_bytes = b''.join(block_chunks)
365
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
366
self.assertEqual(total_len, len(block_bytes))
367
self.assertEqual(gcb._content_length, content_len)
368
expected_header = (b'gcb1z\n' # group compress block v1 zlib
369
b'%d\n' # Length of compressed content
370
b'%d\n' # Length of uncompressed content
371
) % (gcb._z_content_length, gcb._content_length)
372
# The first chunk should be the header chunk. It is small, fixed size,
373
# and there is no compelling reason to split it up
374
self.assertEqual(expected_header, block_chunks[0])
375
self.assertStartsWith(block_bytes, expected_header)
376
remaining_bytes = block_bytes[len(expected_header):]
377
raw_bytes = zlib.decompress(remaining_bytes)
378
self.assertEqual(content, raw_bytes)
380
350
def test_to_bytes(self):
381
content = (b'this is some content\n'
382
b'this content will be compressed\n')
351
content = ('this is some content\n'
352
'this content will be compressed\n')
383
353
gcb = groupcompress.GroupCompressBlock()
384
354
gcb.set_content(content)
385
data = gcb.to_bytes()
355
bytes = gcb.to_bytes()
386
356
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
387
357
self.assertEqual(gcb._content_length, len(content))
388
expected_header = (b'gcb1z\n' # group compress block v1 zlib
389
b'%d\n' # Length of compressed content
390
b'%d\n' # Length of uncompressed content
391
) % (gcb._z_content_length, gcb._content_length)
392
self.assertStartsWith(data, expected_header)
393
remaining_bytes = data[len(expected_header):]
358
expected_header =('gcb1z\n' # group compress block v1 zlib
359
'%d\n' # Length of compressed content
360
'%d\n' # Length of uncompressed content
361
) % (gcb._z_content_length, gcb._content_length)
362
self.assertStartsWith(bytes, expected_header)
363
remaining_bytes = bytes[len(expected_header):]
394
364
raw_bytes = zlib.decompress(remaining_bytes)
395
365
self.assertEqual(content, raw_bytes)
397
367
# we should get the same results if using the chunked version
398
368
gcb = groupcompress.GroupCompressBlock()
399
gcb.set_chunked_content([b'this is some content\n'
400
b'this content will be compressed\n'],
403
data = gcb.to_bytes()
404
self.assertEqual(old_data, data)
369
gcb.set_chunked_content(['this is some content\n'
370
'this content will be compressed\n'],
373
bytes = gcb.to_bytes()
374
self.assertEqual(old_bytes, bytes)
406
376
def test_partial_decomp(self):
407
377
content_chunks = []
518
488
def make_g_index_missing_parent(self):
519
489
graph_index = self.make_g_index('missing_parent', 1,
520
[((b'parent', ), b'2 78 2 10', ([],)),
521
((b'tip', ), b'2 78 2 10',
522
([(b'parent', ), (b'missing-parent', )],)),
490
[(('parent', ), '2 78 2 10', ([],)),
491
(('tip', ), '2 78 2 10',
492
([('parent', ), ('missing-parent', )],)),
524
494
return graph_index
526
496
def test_get_record_stream_as_requested(self):
527
497
# Consider promoting 'as-requested' to general availability, and
528
498
# make this a VF interface test
529
499
vf = self.make_test_vf(False, dir='source')
530
vf.add_lines((b'a',), (), [b'lines\n'])
531
vf.add_lines((b'b',), (), [b'lines\n'])
532
vf.add_lines((b'c',), (), [b'lines\n'])
533
vf.add_lines((b'd',), (), [b'lines\n'])
500
vf.add_lines(('a',), (), ['lines\n'])
501
vf.add_lines(('b',), (), ['lines\n'])
502
vf.add_lines(('c',), (), ['lines\n'])
503
vf.add_lines(('d',), (), ['lines\n'])
535
505
keys = [record.key for record in vf.get_record_stream(
536
[(b'a',), (b'b',), (b'c',), (b'd',)],
537
'as-requested', False)]
538
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
506
[('a',), ('b',), ('c',), ('d',)],
507
'as-requested', False)]
508
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
539
509
keys = [record.key for record in vf.get_record_stream(
540
[(b'b',), (b'a',), (b'd',), (b'c',)],
541
'as-requested', False)]
542
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
510
[('b',), ('a',), ('d',), ('c',)],
511
'as-requested', False)]
512
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
544
514
# It should work even after being repacked into another VF
545
515
vf2 = self.make_test_vf(False, dir='target')
546
516
vf2.insert_record_stream(vf.get_record_stream(
547
[(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
517
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
550
520
keys = [record.key for record in vf2.get_record_stream(
551
[(b'a',), (b'b',), (b'c',), (b'd',)],
552
'as-requested', False)]
553
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
521
[('a',), ('b',), ('c',), ('d',)],
522
'as-requested', False)]
523
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
554
524
keys = [record.key for record in vf2.get_record_stream(
555
[(b'b',), (b'a',), (b'd',), (b'c',)],
556
'as-requested', False)]
557
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
559
def test_get_record_stream_max_bytes_to_index_default(self):
560
vf = self.make_test_vf(True, dir='source')
561
vf.add_lines((b'a',), (), [b'lines\n'])
563
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
564
self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
565
record._manager._get_compressor_settings())
567
def test_get_record_stream_accesses_compressor_settings(self):
568
vf = self.make_test_vf(True, dir='source')
569
vf.add_lines((b'a',), (), [b'lines\n'])
571
vf._max_bytes_to_index = 1234
572
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
573
self.assertEqual(dict(max_bytes_to_index=1234),
574
record._manager._get_compressor_settings())
577
def grouped_stream(revision_ids, first_parents=()):
578
parents = first_parents
579
for revision_id in revision_ids:
581
record = versionedfile.FulltextContentFactory(
583
b'some content that is\n'
584
b'identical except for\n'
585
b'revision_id:%s\n' % (revision_id,))
525
[('b',), ('a',), ('d',), ('c',)],
526
'as-requested', False)]
527
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
589
529
def test_insert_record_stream_reuses_blocks(self):
590
530
vf = self.make_test_vf(True, dir='source')
531
def grouped_stream(revision_ids, first_parents=()):
532
parents = first_parents
533
for revision_id in revision_ids:
535
record = versionedfile.FulltextContentFactory(
537
'some content that is\n'
538
'identical except for\n'
539
'revision_id:%s\n' % (revision_id,))
592
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
543
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
593
544
# Second group, e-h
594
vf.insert_record_stream(self.grouped_stream(
595
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
545
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
546
first_parents=(('d',),)))
597
stream = vf.get_record_stream(
598
[(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
548
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
600
551
for record in stream:
601
if record.key in [(b'a',), (b'e',)]:
552
if record.key in [('a',), ('e',)]:
602
553
self.assertEqual('groupcompress-block', record.storage_kind)
604
555
self.assertEqual('groupcompress-block-ref',
608
559
self.assertEqual(8, num_records)
611
self.assertIs(block_bytes[key], block_bytes[(b'a',)])
612
self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
562
self.assertIs(block_bytes[key], block_bytes[('a',)])
563
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
615
self.assertIs(block_bytes[key], block_bytes[(b'e',)])
616
self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
566
self.assertIs(block_bytes[key], block_bytes[('e',)])
567
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
617
568
# Now copy the blocks into another vf, and ensure that the blocks are
618
569
# preserved without creating new entries
619
570
vf2 = self.make_test_vf(True, dir='target')
620
keys = [(r.encode(),) for r in 'abcdefgh']
621
571
# ordering in 'groupcompress' order, should actually swap the groups in
622
572
# the target vf, but the groups themselves should not be disturbed.
624
573
def small_size_stream():
625
for record in vf.get_record_stream(keys, 'groupcompress', False):
574
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
'groupcompress', False):
626
576
record._manager._full_enough_block_size = \
627
577
record._manager._block._content_length
630
580
vf2.insert_record_stream(small_size_stream())
631
stream = vf2.get_record_stream(keys, 'groupcompress', False)
581
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
'groupcompress', False)
634
585
for record in stream:
640
591
def test_insert_record_stream_packs_on_the_fly(self):
641
592
vf = self.make_test_vf(True, dir='source')
593
def grouped_stream(revision_ids, first_parents=()):
594
parents = first_parents
595
for revision_id in revision_ids:
597
record = versionedfile.FulltextContentFactory(
599
'some content that is\n'
600
'identical except for\n'
601
'revision_id:%s\n' % (revision_id,))
643
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
605
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
644
606
# Second group, e-h
645
vf.insert_record_stream(self.grouped_stream(
646
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
607
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
first_parents=(('d',),)))
647
609
# Now copy the blocks into another vf, and see that the
648
610
# insert_record_stream rebuilt a new block on-the-fly because of
649
611
# under-utilization
650
612
vf2 = self.make_test_vf(True, dir='target')
651
keys = [(r.encode(),) for r in 'abcdefgh']
652
613
vf2.insert_record_stream(vf.get_record_stream(
653
keys, 'groupcompress', False))
654
stream = vf2.get_record_stream(keys, 'groupcompress', False)
614
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
'groupcompress', False)
657
619
# All of the records should be recombined into a single block
667
629
def test__insert_record_stream_no_reuse_block(self):
668
630
vf = self.make_test_vf(True, dir='source')
631
def grouped_stream(revision_ids, first_parents=()):
632
parents = first_parents
633
for revision_id in revision_ids:
635
record = versionedfile.FulltextContentFactory(
637
'some content that is\n'
638
'identical except for\n'
639
'revision_id:%s\n' % (revision_id,))
670
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
643
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
671
644
# Second group, e-h
672
vf.insert_record_stream(self.grouped_stream(
673
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
645
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
646
first_parents=(('d',),)))
675
keys = [(r.encode(),) for r in 'abcdefgh']
676
self.assertEqual(8, len(list(
677
vf.get_record_stream(keys, 'unordered', False))))
648
self.assertEqual(8, len(list(vf.get_record_stream(
649
[(r,) for r in 'abcdefgh'],
650
'unordered', False))))
678
651
# Now copy the blocks into another vf, and ensure that the blocks are
679
652
# preserved without creating new entries
680
653
vf2 = self.make_test_vf(True, dir='target')
681
654
# ordering in 'groupcompress' order, should actually swap the groups in
682
655
# the target vf, but the groups themselves should not be disturbed.
683
656
list(vf2._insert_record_stream(vf.get_record_stream(
684
keys, 'groupcompress', False),
657
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
685
658
reuse_blocks=False))
687
660
# After inserting with reuse_blocks=False, we should have everything in
688
661
# a single new block.
689
stream = vf2.get_record_stream(keys, 'groupcompress', False)
662
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
'groupcompress', False)
691
665
for record in stream:
692
666
if block is None:
698
672
unvalidated = self.make_g_index_missing_parent()
699
673
combined = _mod_index.CombinedGraphIndex([unvalidated])
700
674
index = groupcompress._GCGraphIndex(combined,
701
is_locked=lambda: True, parents=True,
702
track_external_parent_refs=True)
675
is_locked=lambda: True, parents=True,
676
track_external_parent_refs=True)
703
677
index.scan_unvalidated_index(unvalidated)
704
678
self.assertEqual(
705
frozenset([(b'missing-parent',)]), index.get_missing_parents())
679
frozenset([('missing-parent',)]), index.get_missing_parents())
707
681
def test_track_external_parent_refs(self):
708
682
g_index = self.make_g_index('empty', 1, [])
709
683
mod_index = btree_index.BTreeBuilder(1, 1)
710
684
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
711
685
index = groupcompress._GCGraphIndex(combined,
712
is_locked=lambda: True, parents=True,
713
add_callback=mod_index.add_nodes,
714
track_external_parent_refs=True)
686
is_locked=lambda: True, parents=True,
687
add_callback=mod_index.add_nodes,
688
track_external_parent_refs=True)
715
689
index.add_records([
716
((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
690
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
717
691
self.assertEqual(
718
frozenset([(b'parent-1',), (b'parent-2',)]),
692
frozenset([('parent-1',), ('parent-2',)]),
719
693
index.get_missing_parents())
721
695
def make_source_with_b(self, a_parent, path):
722
696
source = self.make_test_vf(True, dir=path)
723
source.add_lines((b'a',), (), [b'lines\n'])
697
source.add_lines(('a',), (), ['lines\n'])
725
b_parents = ((b'a',),)
699
b_parents = (('a',),)
728
source.add_lines((b'b',), b_parents, [b'lines\n'])
702
source.add_lines(('b',), b_parents, ['lines\n'])
731
705
def do_inconsistent_inserts(self, inconsistency_fatal):
732
706
target = self.make_test_vf(True, dir='target',
733
707
inconsistency_fatal=inconsistency_fatal)
734
708
for x in range(2):
735
source = self.make_source_with_b(x == 1, 'source%s' % x)
709
source = self.make_source_with_b(x==1, 'source%s' % x)
736
710
target.insert_record_stream(source.get_record_stream(
737
[(b'b',)], 'unordered', False))
711
[('b',)], 'unordered', False))
739
713
def test_inconsistent_redundant_inserts_warn(self):
740
714
"""Should not insert a record that is already present."""
743
716
def warning(template, args):
744
717
warnings.append(template % args)
745
718
_trace_warning = trace.warning
748
721
self.do_inconsistent_inserts(inconsistency_fatal=False)
750
723
trace.warning = _trace_warning
751
self.assertContainsRe(
753
r"^inconsistent details in skipped record: \(b?'b',\)"
754
r" \(b?'42 32 0 8', \(\(\),\)\)"
755
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
724
self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
757
728
def test_inconsistent_redundant_inserts_raises(self):
758
e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
729
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
759
730
inconsistency_fatal=True)
760
self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
762
r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
763
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
731
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
733
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
" 0 8', \(\(\('a',\),\),\)\)")
765
736
def test_clear_cache(self):
766
737
vf = self.make_source_with_b(True, 'source')
768
for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
739
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
771
742
self.assertTrue(len(vf._group_cache) > 0)
773
744
self.assertEqual(0, len(vf._group_cache))
776
class TestGroupCompressConfig(tests.TestCaseWithTransport):
778
def make_test_vf(self):
779
t = self.get_transport('.')
781
factory = groupcompress.make_pack_factory(graph=True,
782
delta=False, keylength=1, inconsistency_fatal=True)
784
self.addCleanup(groupcompress.cleanup_pack_group, vf)
787
def test_max_bytes_to_index_default(self):
788
vf = self.make_test_vf()
789
gc = vf._make_group_compressor()
790
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
791
vf._max_bytes_to_index)
792
if isinstance(gc, groupcompress.PyrexGroupCompressor):
793
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
794
gc._delta_index._max_bytes_to_index)
796
def test_max_bytes_to_index_in_config(self):
797
c = config.GlobalConfig()
798
c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
799
vf = self.make_test_vf()
800
gc = vf._make_group_compressor()
801
self.assertEqual(10000, vf._max_bytes_to_index)
802
if isinstance(gc, groupcompress.PyrexGroupCompressor):
803
self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
805
def test_max_bytes_to_index_bad_config(self):
806
c = config.GlobalConfig()
807
c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
808
vf = self.make_test_vf()
809
# TODO: This is triggering a warning, we might want to trap and make
810
# sure it is readable.
811
gc = vf._make_group_compressor()
812
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
813
vf._max_bytes_to_index)
814
if isinstance(gc, groupcompress.PyrexGroupCompressor):
815
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
816
gc._delta_index._max_bytes_to_index)
819
748
class StubGCVF(object):
820
749
def __init__(self, canned_get_blocks=None):
821
750
self._group_cache = {}
822
751
self._canned_get_blocks = canned_get_blocks or []
824
752
def _get_blocks(self, read_memos):
825
753
return iter(self._canned_get_blocks)
828
756
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
829
757
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
831
759
def test_add_key_new_read_memo(self):
832
760
"""Adding a key with an uncached read_memo new to this batch adds that
833
761
read_memo to the list of memos to fetch.
925
853
class TestLazyGroupCompress(tests.TestCaseWithTransport):
928
(b'key1',): b"this is a text\n"
929
b"with a reasonable amount of compressible bytes\n"
930
b"which can be shared between various other texts\n",
931
(b'key2',): b"another text\n"
932
b"with a reasonable amount of compressible bytes\n"
933
b"which can be shared between various other texts\n",
934
(b'key3',): b"yet another text which won't be extracted\n"
935
b"with a reasonable amount of compressible bytes\n"
936
b"which can be shared between various other texts\n",
937
(b'key4',): b"this will be extracted\n"
938
b"but references most of its bytes from\n"
939
b"yet another text which won't be extracted\n"
940
b"with a reasonable amount of compressible bytes\n"
941
b"which can be shared between various other texts\n",
856
('key1',): "this is a text\n"
857
"with a reasonable amount of compressible bytes\n"
858
"which can be shared between various other texts\n",
859
('key2',): "another text\n"
860
"with a reasonable amount of compressible bytes\n"
861
"which can be shared between various other texts\n",
862
('key3',): "yet another text which won't be extracted\n"
863
"with a reasonable amount of compressible bytes\n"
864
"which can be shared between various other texts\n",
865
('key4',): "this will be extracted\n"
866
"but references most of its bytes from\n"
867
"yet another text which won't be extracted\n"
868
"with a reasonable amount of compressible bytes\n"
869
"which can be shared between various other texts\n",
944
871
def make_block(self, key_to_text):
945
872
"""Create a GroupCompressBlock, filling it with the given texts."""
946
873
compressor = groupcompress.GroupCompressor()
967
894
def test_get_fulltexts(self):
968
895
locations, block = self.make_block(self._texts)
969
896
manager = groupcompress._LazyGroupContentManager(block)
970
self.add_key_to_manager((b'key1',), locations, block, manager)
971
self.add_key_to_manager((b'key2',), locations, block, manager)
897
self.add_key_to_manager(('key1',), locations, block, manager)
898
self.add_key_to_manager(('key2',), locations, block, manager)
972
899
result_order = []
973
900
for record in manager.get_record_stream():
974
901
result_order.append(record.key)
975
902
text = self._texts[record.key]
976
903
self.assertEqual(text, record.get_bytes_as('fulltext'))
977
self.assertEqual([(b'key1',), (b'key2',)], result_order)
904
self.assertEqual([('key1',), ('key2',)], result_order)
979
906
# If we build the manager in the opposite order, we should get them
980
907
# back in the opposite order
981
908
manager = groupcompress._LazyGroupContentManager(block)
982
self.add_key_to_manager((b'key2',), locations, block, manager)
983
self.add_key_to_manager((b'key1',), locations, block, manager)
909
self.add_key_to_manager(('key2',), locations, block, manager)
910
self.add_key_to_manager(('key1',), locations, block, manager)
984
911
result_order = []
985
912
for record in manager.get_record_stream():
986
913
result_order.append(record.key)
987
914
text = self._texts[record.key]
988
915
self.assertEqual(text, record.get_bytes_as('fulltext'))
989
self.assertEqual([(b'key2',), (b'key1',)], result_order)
916
self.assertEqual([('key2',), ('key1',)], result_order)
991
918
def test__wire_bytes_no_keys(self):
992
919
locations, block = self.make_block(self._texts)
1010
937
def test__wire_bytes(self):
1011
938
locations, block = self.make_block(self._texts)
1012
939
manager = groupcompress._LazyGroupContentManager(block)
1013
self.add_key_to_manager((b'key1',), locations, block, manager)
1014
self.add_key_to_manager((b'key4',), locations, block, manager)
940
self.add_key_to_manager(('key1',), locations, block, manager)
941
self.add_key_to_manager(('key4',), locations, block, manager)
1015
942
block_bytes = block.to_bytes()
1016
943
wire_bytes = manager._wire_bytes()
1017
944
(storage_kind, z_header_len, header_len,
1018
block_len, rest) = wire_bytes.split(b'\n', 4)
945
block_len, rest) = wire_bytes.split('\n', 4)
1019
946
z_header_len = int(z_header_len)
1020
947
header_len = int(header_len)
1021
948
block_len = int(block_len)
1022
self.assertEqual(b'groupcompress-block', storage_kind)
949
self.assertEqual('groupcompress-block', storage_kind)
1023
950
self.assertEqual(34, z_header_len)
1024
951
self.assertEqual(26, header_len)
1025
952
self.assertEqual(len(block_bytes), block_len)
1026
953
z_header = rest[:z_header_len]
1027
954
header = zlib.decompress(z_header)
1028
955
self.assertEqual(header_len, len(header))
1029
entry1 = locations[(b'key1',)]
1030
entry4 = locations[(b'key4',)]
1031
self.assertEqualDiff(b'key1\n'
1033
b'%d\n' # start offset
1034
b'%d\n' # end offset
956
entry1 = locations[('key1',)]
957
entry4 = locations[('key4',)]
958
self.assertEqualDiff('key1\n'
960
'%d\n' # start offset
1039
966
% (entry1[0], entry1[1],
1040
967
entry4[0], entry4[1]),
1042
969
z_block = rest[z_header_len:]
1043
970
self.assertEqual(block_bytes, z_block)
1045
972
def test_from_bytes(self):
1046
973
locations, block = self.make_block(self._texts)
1047
974
manager = groupcompress._LazyGroupContentManager(block)
1048
self.add_key_to_manager((b'key1',), locations, block, manager)
1049
self.add_key_to_manager((b'key4',), locations, block, manager)
975
self.add_key_to_manager(('key1',), locations, block, manager)
976
self.add_key_to_manager(('key4',), locations, block, manager)
1050
977
wire_bytes = manager._wire_bytes()
1051
self.assertStartsWith(wire_bytes, b'groupcompress-block\n')
978
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1052
979
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
1053
980
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
1054
981
self.assertEqual(2, len(manager._factories))
1084
1011
locations, block = self.make_block(self._texts)
1085
1012
manager = groupcompress._LazyGroupContentManager(block)
1086
1013
# Request a small key in the middle should trigger a 'rebuild'
1087
self.add_key_to_manager((b'key4',), locations, block, manager)
1014
self.add_key_to_manager(('key4',), locations, block, manager)
1088
1015
manager._check_rebuild_block()
1089
1016
self.assertIsNot(block, manager._block)
1090
1017
self.assertTrue(block._content_length > manager._block._content_length)
1091
1018
for record in manager.get_record_stream():
1092
self.assertEqual((b'key4',), record.key)
1019
self.assertEqual(('key4',), record.key)
1093
1020
self.assertEqual(self._texts[record.key],
1094
1021
record.get_bytes_as('fulltext'))
1096
def test_manager_default_compressor_settings(self):
1097
locations, old_block = self.make_block(self._texts)
1098
manager = groupcompress._LazyGroupContentManager(old_block)
1099
gcvf = groupcompress.GroupCompressVersionedFiles
1100
# It doesn't greedily evaluate _max_bytes_to_index
1101
self.assertIs(None, manager._compressor_settings)
1102
self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1103
manager._get_compressor_settings())
1105
def test_manager_custom_compressor_settings(self):
1106
locations, old_block = self.make_block(self._texts)
1109
def compressor_settings():
1110
called.append('called')
1112
manager = groupcompress._LazyGroupContentManager(old_block,
1113
get_compressor_settings=compressor_settings)
1114
gcvf = groupcompress.GroupCompressVersionedFiles
1115
# It doesn't greedily evaluate compressor_settings
1116
self.assertIs(None, manager._compressor_settings)
1117
self.assertEqual((10,), manager._get_compressor_settings())
1118
self.assertEqual((10,), manager._get_compressor_settings())
1119
self.assertEqual((10,), manager._compressor_settings)
1120
# Only called 1 time
1121
self.assertEqual(['called'], called)
1123
def test__rebuild_handles_compressor_settings(self):
1124
if not isinstance(groupcompress.GroupCompressor,
1125
groupcompress.PyrexGroupCompressor):
1126
raise tests.TestNotApplicable('pure-python compressor'
1127
' does not handle compressor_settings')
1128
locations, old_block = self.make_block(self._texts)
1129
manager = groupcompress._LazyGroupContentManager(old_block,
1130
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1131
gc = manager._make_group_compressor()
1132
self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1133
self.add_key_to_manager((b'key3',), locations, old_block, manager)
1134
self.add_key_to_manager((b'key4',), locations, old_block, manager)
1135
action, last_byte, total_bytes = manager._check_rebuild_action()
1136
self.assertEqual('rebuild', action)
1137
manager._rebuild_block()
1138
new_block = manager._block
1139
self.assertIsNot(old_block, new_block)
1140
# Because of the new max_bytes_to_index, we do a poor job of
1141
# rebuilding. This is a side-effect of the change, but at least it does
1142
# show the setting had an effect.
1143
self.assertTrue(old_block._content_length < new_block._content_length)
1145
1023
def test_check_is_well_utilized_all_keys(self):
1146
1024
block, manager = self.make_block_and_full_manager(self._texts)
1147
1025
self.assertFalse(manager.check_is_well_utilized())
1179
1057
locations, block = self.make_block(self._texts)
1180
1058
manager = groupcompress._LazyGroupContentManager(block)
1181
1059
manager._full_enough_block_size = block._content_length
1182
self.add_key_to_manager((b'key1',), locations, block, manager)
1183
self.add_key_to_manager((b'key2',), locations, block, manager)
1060
self.add_key_to_manager(('key1',), locations, block, manager)
1061
self.add_key_to_manager(('key2',), locations, block, manager)
1184
1062
# Just using the content from key1 and 2 is not enough to be considered
1186
1064
self.assertFalse(manager.check_is_well_utilized())
1187
1065
# However if we add key3, then we have enough, as we only require 75%
1189
self.add_key_to_manager((b'key4',), locations, block, manager)
1067
self.add_key_to_manager(('key4',), locations, block, manager)
1190
1068
self.assertTrue(manager.check_is_well_utilized())
1193
class Test_GCBuildDetails(tests.TestCase):
1195
def test_acts_like_tuple(self):
1196
# _GCBuildDetails inlines some of the data that used to be spread out
1197
# across a bunch of tuples
1198
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1199
('INDEX', 10, 20, 0, 5))
1200
self.assertEqual(4, len(bd))
1201
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1202
self.assertEqual(None, bd[1]) # Compression Parent is always None
1203
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1204
self.assertEqual(('group', None), bd[3]) # Record details
1206
def test__repr__(self):
1207
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1208
('INDEX', 10, 20, 0, 5))
1209
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1210
" (('parent1',), ('parent2',)))",