75
81
def test_one_nosha_delta(self):
76
82
# diff against NUKK
77
83
compressor = self.compressor()
78
sha1, start_point, end_point, _ = compressor.compress(('label',),
79
'strange\ncommon\n', None)
80
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
self.assertEqual(expected_lines, ''.join(compressor.chunks))
84
text = b'strange\ncommon\n'
85
sha1, start_point, end_point, _ = compressor.compress(
86
('label',), [text], len(text), None)
87
self.assertEqual(sha_string(b'strange\ncommon\n'), sha1)
88
expected_lines = b'f\x0fstrange\ncommon\n'
89
self.assertEqual(expected_lines, b''.join(compressor.chunks))
83
90
self.assertEqual(0, start_point)
84
self.assertEqual(sum(map(len, expected_lines)), end_point)
91
self.assertEqual(len(expected_lines), end_point)
86
93
def test_empty_content(self):
87
94
compressor = self.compressor()
88
95
# Adding empty bytes should return the 'null' record
89
sha1, start_point, end_point, kind = compressor.compress(('empty',),
96
sha1, start_point, end_point, kind = compressor.compress(
97
('empty',), [], 0, None)
91
98
self.assertEqual(0, start_point)
92
99
self.assertEqual(0, end_point)
93
100
self.assertEqual('fulltext', kind)
108
116
# Knit fetching will try to reconstruct texts locally which results in
109
117
# reading something that is in the compressor stream already.
110
118
compressor = self.compressor()
111
sha1_1, _, _, _ = compressor.compress(('label',),
112
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
119
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
120
sha1_1, _, _, _ = compressor.compress(
121
('label',), [text], len(text), None)
113
122
expected_lines = list(compressor.chunks)
114
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
123
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
124
sha1_2, _, end_point, _ = compressor.compress(
125
('newlabel',), [text], len(text), None)
116
126
# get the first out
117
self.assertEqual(('strange\ncommon long line\n'
118
'that needs a 16 byte match\n', sha1_1),
127
self.assertEqual(([b'strange\ncommon long line\n'
128
b'that needs a 16 byte match\n'], sha1_1),
119
129
compressor.extract(('label',)))
121
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
'different\n', sha1_2),
131
self.assertEqual(([b'common long line\nthat needs a 16 byte match\n'
132
b'different\n'], sha1_2),
123
133
compressor.extract(('newlabel',)))
125
135
def test_pop_last(self):
126
136
compressor = self.compressor()
127
_, _, _, _ = compressor.compress(('key1',),
128
'some text\nfor the first entry\n', None)
137
text = b'some text\nfor the first entry\n'
138
_, _, _, _ = compressor.compress(
139
('key1',), [text], len(text), None)
129
140
expected_lines = list(compressor.chunks)
130
_, _, _, _ = compressor.compress(('key2',),
131
'some text\nfor the second entry\n', None)
141
text = b'some text\nfor the second entry\n'
142
_, _, _, _ = compressor.compress(
143
('key2',), [text], len(text), None)
132
144
compressor.pop_last()
133
145
self.assertEqual(expected_lines, compressor.chunks)
141
153
def test_stats(self):
142
154
compressor = self.compressor()
143
compressor.compress(('label',),
145
'common very very long line\n'
146
'plus more text\n', None)
147
compressor.compress(('newlabel',),
148
'common very very long line\n'
151
'moredifferent\n', None)
152
compressor.compress(('label3',),
154
'common very very long line\n'
157
'moredifferent\n', None)
155
chunks = [b'strange\n',
156
b'common very very long line\n',
159
('label',), chunks, sum(map(len, chunks)), None)
161
b'common very very long line\n',
167
chunks, sum(map(len, chunks)), None)
170
b'common very very long line\n',
175
('label3',), chunks, sum(map(len, chunks)), None)
158
176
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
160
178
def test_two_nosha_delta(self):
161
179
compressor = self.compressor()
162
sha1_1, _, _, _ = compressor.compress(('label',),
163
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
180
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
181
sha1_1, _, _, _ = compressor.compress(('label',), [text], len(text), None)
164
182
expected_lines = list(compressor.chunks)
165
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
self.assertEqual(sha_string('common long line\n'
168
'that needs a 16 byte match\n'
169
'different\n'), sha1_2)
183
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
184
sha1_2, start_point, end_point, _ = compressor.compress(
185
('newlabel',), [text], len(text), None)
186
self.assertEqual(sha_string(text), sha1_2)
170
187
expected_lines.extend([
171
188
# 'delta', delta length
173
190
# source and target length
175
192
# copy the line common
176
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
193
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
177
194
# add the line different, and the trailing newline
178
'\x0adifferent\n', # insert 10 bytes
195
b'\x0adifferent\n', # insert 10 bytes
180
197
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
198
self.assertEqual(sum(map(len, expected_lines)), end_point)
184
201
# The first interesting test: make a change that should use lines from
186
203
compressor = self.compressor()
187
sha1_1, _, _, _ = compressor.compress(('label',),
188
'strange\ncommon very very long line\nwith some extra text\n', None)
189
sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
'different\nmoredifferent\nand then some more\n', None)
204
text = b'strange\ncommon very very long line\nwith some extra text\n'
205
sha1_1, _, _, _ = compressor.compress(
206
('label',), [text], len(text), None)
207
text = b'different\nmoredifferent\nand then some more\n'
208
sha1_2, _, _, _ = compressor.compress(
209
('newlabel',), [text], len(text), None)
191
210
expected_lines = list(compressor.chunks)
192
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
'new\ncommon very very long line\nwith some extra text\n'
194
'different\nmoredifferent\nand then some more\n',
197
sha_string('new\ncommon very very long line\nwith some extra text\n'
198
'different\nmoredifferent\nand then some more\n'),
211
text = (b'new\ncommon very very long line\nwith some extra text\n'
212
b'different\nmoredifferent\nand then some more\n')
213
sha1_3, start_point, end_point, _ = compressor.compress(
214
('label3',), [text], len(text), None)
215
self.assertEqual(sha_string(text), sha1_3)
200
216
expected_lines.extend([
201
217
# 'delta', delta length
203
219
# source and target length
207
223
# Copy of first parent 'common' range
208
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
224
b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
225
# Copy of second parent 'different' range
210
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
226
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
212
228
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
229
self.assertEqual(sum(map(len, expected_lines)), end_point)
220
236
def test_stats(self):
221
237
compressor = self.compressor()
222
compressor.compress(('label',),
224
'common very very long line\n'
225
'plus more text\n', None)
226
compressor.compress(('newlabel',),
227
'common very very long line\n'
230
'moredifferent\n', None)
231
compressor.compress(('label3',),
233
'common very very long line\n'
236
'moredifferent\n', None)
238
chunks = [b'strange\n',
239
b'common very very long line\n',
242
('label',), chunks, sum(map(len, chunks)), None)
244
b'common very very long line\n',
249
('newlabel',), chunks, sum(map(len, chunks)), None)
252
b'common very very long line\n',
258
chunks, sum(map(len, chunks)), None)
237
259
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
239
261
def test_two_nosha_delta(self):
240
262
compressor = self.compressor()
241
sha1_1, _, _, _ = compressor.compress(('label',),
242
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
263
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
264
sha1_1, _, _, _ = compressor.compress(
265
('label',), [text], len(text), None)
243
266
expected_lines = list(compressor.chunks)
244
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
self.assertEqual(sha_string('common long line\n'
247
'that needs a 16 byte match\n'
248
'different\n'), sha1_2)
267
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
268
sha1_2, start_point, end_point, _ = compressor.compress(
269
('newlabel',), [text], len(text), None)
270
self.assertEqual(sha_string(text), sha1_2)
249
271
expected_lines.extend([
250
272
# 'delta', delta length
254
276
# copy the line common
255
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
277
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
256
278
# add the line different, and the trailing newline
257
'\x0adifferent\n', # insert 10 bytes
279
b'\x0adifferent\n', # insert 10 bytes
259
281
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
282
self.assertEqual(sum(map(len, expected_lines)), end_point)
263
285
# The first interesting test: make a change that should use lines from
265
287
compressor = self.compressor()
266
sha1_1, _, _, _ = compressor.compress(('label',),
267
'strange\ncommon very very long line\nwith some extra text\n', None)
268
sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
'different\nmoredifferent\nand then some more\n', None)
288
text = b'strange\ncommon very very long line\nwith some extra text\n'
289
sha1_1, _, _, _ = compressor.compress(
290
('label',), [text], len(text), None)
291
text = b'different\nmoredifferent\nand then some more\n'
292
sha1_2, _, _, _ = compressor.compress(
293
('newlabel',), [text], len(text), None)
270
294
expected_lines = list(compressor.chunks)
271
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
'new\ncommon very very long line\nwith some extra text\n'
273
'different\nmoredifferent\nand then some more\n',
276
sha_string('new\ncommon very very long line\nwith some extra text\n'
277
'different\nmoredifferent\nand then some more\n'),
295
text = (b'new\ncommon very very long line\nwith some extra text\n'
296
b'different\nmoredifferent\nand then some more\n')
297
sha1_3, start_point, end_point, _ = compressor.compress(
298
('label3',), [text], len(text), None)
299
self.assertEqual(sha_string(text), sha1_3)
279
300
expected_lines.extend([
280
301
# 'delta', delta length
286
307
# Copy of first parent 'common' range
287
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
308
b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
309
# Copy of second parent 'different' range
289
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
310
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
291
312
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
313
self.assertEqual(sum(map(len, expected_lines)), end_point)
311
333
def test_from_empty_bytes(self):
312
334
self.assertRaises(ValueError,
313
groupcompress.GroupCompressBlock.from_bytes, '')
335
groupcompress.GroupCompressBlock.from_bytes, b'')
315
337
def test_from_minimal_bytes(self):
316
338
block = groupcompress.GroupCompressBlock.from_bytes(
318
340
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
341
self.assertIs(None, block._content)
320
self.assertEqual('', block._z_content)
342
self.assertEqual(b'', block._z_content)
321
343
block._ensure_content()
322
self.assertEqual('', block._content)
323
self.assertEqual('', block._z_content)
324
block._ensure_content() # Ensure content is safe to call 2x
344
self.assertEqual(b'', block._content)
345
self.assertEqual(b'', block._z_content)
346
block._ensure_content() # Ensure content is safe to call 2x
326
348
def test_from_invalid(self):
327
349
self.assertRaises(ValueError,
328
350
groupcompress.GroupCompressBlock.from_bytes,
329
'this is not a valid header')
351
b'this is not a valid header')
331
353
def test_from_bytes(self):
332
content = ('a tiny bit of content\n')
354
content = (b'a tiny bit of content\n')
333
355
z_content = zlib.compress(content)
335
'gcb1z\n' # group compress block v1 plain
336
'%d\n' # Length of compressed content
337
'%d\n' # Length of uncompressed content
338
'%s' # Compressed content
357
b'gcb1z\n' # group compress block v1 plain
358
b'%d\n' # Length of compressed content
359
b'%d\n' # Length of uncompressed content
360
b'%s' # Compressed content
339
361
) % (len(z_content), len(content), z_content)
340
362
block = groupcompress.GroupCompressBlock.from_bytes(
347
369
self.assertEqual(z_content, block._z_content)
348
370
self.assertEqual(content, block._content)
372
def test_to_chunks(self):
373
content_chunks = [b'this is some content\n',
374
b'this content will be compressed\n']
375
content_len = sum(map(len, content_chunks))
376
content = b''.join(content_chunks)
377
gcb = groupcompress.GroupCompressBlock()
378
gcb.set_chunked_content(content_chunks, content_len)
379
total_len, block_chunks = gcb.to_chunks()
380
block_bytes = b''.join(block_chunks)
381
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
382
self.assertEqual(total_len, len(block_bytes))
383
self.assertEqual(gcb._content_length, content_len)
384
expected_header = (b'gcb1z\n' # group compress block v1 zlib
385
b'%d\n' # Length of compressed content
386
b'%d\n' # Length of uncompressed content
387
) % (gcb._z_content_length, gcb._content_length)
388
# The first chunk should be the header chunk. It is small, fixed size,
389
# and there is no compelling reason to split it up
390
self.assertEqual(expected_header, block_chunks[0])
391
self.assertStartsWith(block_bytes, expected_header)
392
remaining_bytes = block_bytes[len(expected_header):]
393
raw_bytes = zlib.decompress(remaining_bytes)
394
self.assertEqual(content, raw_bytes)
350
396
def test_to_bytes(self):
351
content = ('this is some content\n'
352
'this content will be compressed\n')
397
content = (b'this is some content\n'
398
b'this content will be compressed\n')
353
399
gcb = groupcompress.GroupCompressBlock()
354
400
gcb.set_content(content)
355
bytes = gcb.to_bytes()
401
data = gcb.to_bytes()
356
402
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
403
self.assertEqual(gcb._content_length, len(content))
358
expected_header =('gcb1z\n' # group compress block v1 zlib
359
'%d\n' # Length of compressed content
360
'%d\n' # Length of uncompressed content
361
) % (gcb._z_content_length, gcb._content_length)
362
self.assertStartsWith(bytes, expected_header)
363
remaining_bytes = bytes[len(expected_header):]
404
expected_header = (b'gcb1z\n' # group compress block v1 zlib
405
b'%d\n' # Length of compressed content
406
b'%d\n' # Length of uncompressed content
407
) % (gcb._z_content_length, gcb._content_length)
408
self.assertStartsWith(data, expected_header)
409
remaining_bytes = data[len(expected_header):]
364
410
raw_bytes = zlib.decompress(remaining_bytes)
365
411
self.assertEqual(content, raw_bytes)
367
413
# we should get the same results if using the chunked version
368
414
gcb = groupcompress.GroupCompressBlock()
369
gcb.set_chunked_content(['this is some content\n'
370
'this content will be compressed\n'],
373
bytes = gcb.to_bytes()
374
self.assertEqual(old_bytes, bytes)
415
gcb.set_chunked_content([b'this is some content\n'
416
b'this content will be compressed\n'],
419
data = gcb.to_bytes()
420
self.assertEqual(old_data, data)
376
422
def test_partial_decomp(self):
377
423
content_chunks = []
488
534
def make_g_index_missing_parent(self):
489
535
graph_index = self.make_g_index('missing_parent', 1,
490
[(('parent', ), '2 78 2 10', ([],)),
491
(('tip', ), '2 78 2 10',
492
([('parent', ), ('missing-parent', )],)),
536
[((b'parent', ), b'2 78 2 10', ([],)),
537
((b'tip', ), b'2 78 2 10',
538
([(b'parent', ), (b'missing-parent', )],)),
494
540
return graph_index
496
542
def test_get_record_stream_as_requested(self):
497
543
# Consider promoting 'as-requested' to general availability, and
498
544
# make this a VF interface test
499
545
vf = self.make_test_vf(False, dir='source')
500
vf.add_lines(('a',), (), ['lines\n'])
501
vf.add_lines(('b',), (), ['lines\n'])
502
vf.add_lines(('c',), (), ['lines\n'])
503
vf.add_lines(('d',), (), ['lines\n'])
546
vf.add_lines((b'a',), (), [b'lines\n'])
547
vf.add_lines((b'b',), (), [b'lines\n'])
548
vf.add_lines((b'c',), (), [b'lines\n'])
549
vf.add_lines((b'd',), (), [b'lines\n'])
505
551
keys = [record.key for record in vf.get_record_stream(
506
[('a',), ('b',), ('c',), ('d',)],
507
'as-requested', False)]
508
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
552
[(b'a',), (b'b',), (b'c',), (b'd',)],
553
'as-requested', False)]
554
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
509
555
keys = [record.key for record in vf.get_record_stream(
510
[('b',), ('a',), ('d',), ('c',)],
511
'as-requested', False)]
512
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
556
[(b'b',), (b'a',), (b'd',), (b'c',)],
557
'as-requested', False)]
558
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
514
560
# It should work even after being repacked into another VF
515
561
vf2 = self.make_test_vf(False, dir='target')
516
562
vf2.insert_record_stream(vf.get_record_stream(
517
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
563
[(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
520
566
keys = [record.key for record in vf2.get_record_stream(
521
[('a',), ('b',), ('c',), ('d',)],
522
'as-requested', False)]
523
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
567
[(b'a',), (b'b',), (b'c',), (b'd',)],
568
'as-requested', False)]
569
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
524
570
keys = [record.key for record in vf2.get_record_stream(
525
[('b',), ('a',), ('d',), ('c',)],
526
'as-requested', False)]
527
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
571
[(b'b',), (b'a',), (b'd',), (b'c',)],
572
'as-requested', False)]
573
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
575
def test_get_record_stream_max_bytes_to_index_default(self):
576
vf = self.make_test_vf(True, dir='source')
577
vf.add_lines((b'a',), (), [b'lines\n'])
579
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
580
self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
581
record._manager._get_compressor_settings())
583
def test_get_record_stream_accesses_compressor_settings(self):
584
vf = self.make_test_vf(True, dir='source')
585
vf.add_lines((b'a',), (), [b'lines\n'])
587
vf._max_bytes_to_index = 1234
588
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
589
self.assertEqual(dict(max_bytes_to_index=1234),
590
record._manager._get_compressor_settings())
593
def grouped_stream(revision_ids, first_parents=()):
594
parents = first_parents
595
for revision_id in revision_ids:
597
record = versionedfile.FulltextContentFactory(
599
b'some content that is\n'
600
b'identical except for\n'
601
b'revision_id:%s\n' % (revision_id,))
529
605
def test_insert_record_stream_reuses_blocks(self):
530
606
vf = self.make_test_vf(True, dir='source')
531
def grouped_stream(revision_ids, first_parents=()):
532
parents = first_parents
533
for revision_id in revision_ids:
535
record = versionedfile.FulltextContentFactory(
537
'some content that is\n'
538
'identical except for\n'
539
'revision_id:%s\n' % (revision_id,))
543
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
608
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
544
609
# Second group, e-h
545
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
546
first_parents=(('d',),)))
610
vf.insert_record_stream(self.grouped_stream(
611
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
548
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
613
stream = vf.get_record_stream(
614
[(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
551
616
for record in stream:
552
if record.key in [('a',), ('e',)]:
617
if record.key in [(b'a',), (b'e',)]:
553
618
self.assertEqual('groupcompress-block', record.storage_kind)
555
620
self.assertEqual('groupcompress-block-ref',
559
624
self.assertEqual(8, num_records)
562
self.assertIs(block_bytes[key], block_bytes[('a',)])
563
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
627
self.assertIs(block_bytes[key], block_bytes[(b'a',)])
628
self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
566
self.assertIs(block_bytes[key], block_bytes[('e',)])
567
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
631
self.assertIs(block_bytes[key], block_bytes[(b'e',)])
632
self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
568
633
# Now copy the blocks into another vf, and ensure that the blocks are
569
634
# preserved without creating new entries
570
635
vf2 = self.make_test_vf(True, dir='target')
636
keys = [(r.encode(),) for r in 'abcdefgh']
571
637
# ordering in 'groupcompress' order, should actually swap the groups in
572
638
# the target vf, but the groups themselves should not be disturbed.
573
640
def small_size_stream():
574
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
'groupcompress', False):
641
for record in vf.get_record_stream(keys, 'groupcompress', False):
576
642
record._manager._full_enough_block_size = \
577
643
record._manager._block._content_length
580
646
vf2.insert_record_stream(small_size_stream())
581
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
'groupcompress', False)
647
stream = vf2.get_record_stream(keys, 'groupcompress', False)
585
650
for record in stream:
591
656
def test_insert_record_stream_packs_on_the_fly(self):
592
657
vf = self.make_test_vf(True, dir='source')
593
def grouped_stream(revision_ids, first_parents=()):
594
parents = first_parents
595
for revision_id in revision_ids:
597
record = versionedfile.FulltextContentFactory(
599
'some content that is\n'
600
'identical except for\n'
601
'revision_id:%s\n' % (revision_id,))
605
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
659
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
606
660
# Second group, e-h
607
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
first_parents=(('d',),)))
661
vf.insert_record_stream(self.grouped_stream(
662
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
609
663
# Now copy the blocks into another vf, and see that the
610
664
# insert_record_stream rebuilt a new block on-the-fly because of
611
665
# under-utilization
612
666
vf2 = self.make_test_vf(True, dir='target')
667
keys = [(r.encode(),) for r in 'abcdefgh']
613
668
vf2.insert_record_stream(vf.get_record_stream(
614
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
'groupcompress', False)
669
keys, 'groupcompress', False))
670
stream = vf2.get_record_stream(keys, 'groupcompress', False)
619
673
# All of the records should be recombined into a single block
629
683
def test__insert_record_stream_no_reuse_block(self):
630
684
vf = self.make_test_vf(True, dir='source')
631
def grouped_stream(revision_ids, first_parents=()):
632
parents = first_parents
633
for revision_id in revision_ids:
635
record = versionedfile.FulltextContentFactory(
637
'some content that is\n'
638
'identical except for\n'
639
'revision_id:%s\n' % (revision_id,))
643
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
686
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
644
687
# Second group, e-h
645
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
646
first_parents=(('d',),)))
688
vf.insert_record_stream(self.grouped_stream(
689
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
648
self.assertEqual(8, len(list(vf.get_record_stream(
649
[(r,) for r in 'abcdefgh'],
650
'unordered', False))))
691
keys = [(r.encode(),) for r in 'abcdefgh']
692
self.assertEqual(8, len(list(
693
vf.get_record_stream(keys, 'unordered', False))))
651
694
# Now copy the blocks into another vf, and ensure that the blocks are
652
695
# preserved without creating new entries
653
696
vf2 = self.make_test_vf(True, dir='target')
654
697
# ordering in 'groupcompress' order, should actually swap the groups in
655
698
# the target vf, but the groups themselves should not be disturbed.
656
699
list(vf2._insert_record_stream(vf.get_record_stream(
657
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
700
keys, 'groupcompress', False),
658
701
reuse_blocks=False))
660
703
# After inserting with reuse_blocks=False, we should have everything in
661
704
# a single new block.
662
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
'groupcompress', False)
705
stream = vf2.get_record_stream(keys, 'groupcompress', False)
665
707
for record in stream:
666
708
if block is None:
672
714
unvalidated = self.make_g_index_missing_parent()
673
715
combined = _mod_index.CombinedGraphIndex([unvalidated])
674
716
index = groupcompress._GCGraphIndex(combined,
675
is_locked=lambda: True, parents=True,
676
track_external_parent_refs=True)
717
is_locked=lambda: True, parents=True,
718
track_external_parent_refs=True)
677
719
index.scan_unvalidated_index(unvalidated)
678
720
self.assertEqual(
679
frozenset([('missing-parent',)]), index.get_missing_parents())
721
frozenset([(b'missing-parent',)]), index.get_missing_parents())
681
723
def test_track_external_parent_refs(self):
682
724
g_index = self.make_g_index('empty', 1, [])
683
725
mod_index = btree_index.BTreeBuilder(1, 1)
684
726
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
685
727
index = groupcompress._GCGraphIndex(combined,
686
is_locked=lambda: True, parents=True,
687
add_callback=mod_index.add_nodes,
688
track_external_parent_refs=True)
728
is_locked=lambda: True, parents=True,
729
add_callback=mod_index.add_nodes,
730
track_external_parent_refs=True)
689
731
index.add_records([
690
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
732
((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
691
733
self.assertEqual(
692
frozenset([('parent-1',), ('parent-2',)]),
734
frozenset([(b'parent-1',), (b'parent-2',)]),
693
735
index.get_missing_parents())
695
737
def make_source_with_b(self, a_parent, path):
696
738
source = self.make_test_vf(True, dir=path)
697
source.add_lines(('a',), (), ['lines\n'])
739
source.add_lines((b'a',), (), [b'lines\n'])
699
b_parents = (('a',),)
741
b_parents = ((b'a',),)
702
source.add_lines(('b',), b_parents, ['lines\n'])
744
source.add_lines((b'b',), b_parents, [b'lines\n'])
705
747
def do_inconsistent_inserts(self, inconsistency_fatal):
706
748
target = self.make_test_vf(True, dir='target',
707
749
inconsistency_fatal=inconsistency_fatal)
708
750
for x in range(2):
709
source = self.make_source_with_b(x==1, 'source%s' % x)
751
source = self.make_source_with_b(x == 1, 'source%s' % x)
710
752
target.insert_record_stream(source.get_record_stream(
711
[('b',)], 'unordered', False))
753
[(b'b',)], 'unordered', False))
713
755
def test_inconsistent_redundant_inserts_warn(self):
714
756
"""Should not insert a record that is already present."""
716
759
def warning(template, args):
717
760
warnings.append(template % args)
718
761
_trace_warning = trace.warning
721
764
self.do_inconsistent_inserts(inconsistency_fatal=False)
723
766
trace.warning = _trace_warning
724
self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
767
self.assertContainsRe(
769
r"^inconsistent details in skipped record: \(b?'b',\)"
770
r" \(b?'42 32 0 8', \(\(\),\)\)"
771
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
728
773
def test_inconsistent_redundant_inserts_raises(self):
729
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
774
e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
730
775
inconsistency_fatal=True)
731
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
733
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
" 0 8', \(\(\('a',\),\),\)\)")
776
self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
778
r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
779
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
736
781
def test_clear_cache(self):
737
782
vf = self.make_source_with_b(True, 'source')
739
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
784
for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
742
787
self.assertTrue(len(vf._group_cache) > 0)
744
789
self.assertEqual(0, len(vf._group_cache))
792
class TestGroupCompressConfig(tests.TestCaseWithTransport):
794
def make_test_vf(self):
795
t = self.get_transport('.')
797
factory = groupcompress.make_pack_factory(graph=True,
798
delta=False, keylength=1, inconsistency_fatal=True)
800
self.addCleanup(groupcompress.cleanup_pack_group, vf)
803
def test_max_bytes_to_index_default(self):
804
vf = self.make_test_vf()
805
gc = vf._make_group_compressor()
806
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
807
vf._max_bytes_to_index)
808
if isinstance(gc, groupcompress.PyrexGroupCompressor):
809
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
810
gc._delta_index._max_bytes_to_index)
812
def test_max_bytes_to_index_in_config(self):
813
c = config.GlobalConfig()
814
c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
815
vf = self.make_test_vf()
816
gc = vf._make_group_compressor()
817
self.assertEqual(10000, vf._max_bytes_to_index)
818
if isinstance(gc, groupcompress.PyrexGroupCompressor):
819
self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
821
def test_max_bytes_to_index_bad_config(self):
822
c = config.GlobalConfig()
823
c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
824
vf = self.make_test_vf()
825
# TODO: This is triggering a warning, we might want to trap and make
826
# sure it is readable.
827
gc = vf._make_group_compressor()
828
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
829
vf._max_bytes_to_index)
830
if isinstance(gc, groupcompress.PyrexGroupCompressor):
831
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
832
gc._delta_index._max_bytes_to_index)
748
835
class StubGCVF(object):
749
836
def __init__(self, canned_get_blocks=None):
750
837
self._group_cache = {}
751
838
self._canned_get_blocks = canned_get_blocks or []
752
840
def _get_blocks(self, read_memos):
753
841
return iter(self._canned_get_blocks)
756
844
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
845
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
759
847
def test_add_key_new_read_memo(self):
760
848
"""Adding a key with an uncached read_memo new to this batch adds that
761
849
read_memo to the list of memos to fetch.
853
941
class TestLazyGroupCompress(tests.TestCaseWithTransport):
856
('key1',): "this is a text\n"
857
"with a reasonable amount of compressible bytes\n"
858
"which can be shared between various other texts\n",
859
('key2',): "another text\n"
860
"with a reasonable amount of compressible bytes\n"
861
"which can be shared between various other texts\n",
862
('key3',): "yet another text which won't be extracted\n"
863
"with a reasonable amount of compressible bytes\n"
864
"which can be shared between various other texts\n",
865
('key4',): "this will be extracted\n"
866
"but references most of its bytes from\n"
867
"yet another text which won't be extracted\n"
868
"with a reasonable amount of compressible bytes\n"
869
"which can be shared between various other texts\n",
944
(b'key1',): b"this is a text\n"
945
b"with a reasonable amount of compressible bytes\n"
946
b"which can be shared between various other texts\n",
947
(b'key2',): b"another text\n"
948
b"with a reasonable amount of compressible bytes\n"
949
b"which can be shared between various other texts\n",
950
(b'key3',): b"yet another text which won't be extracted\n"
951
b"with a reasonable amount of compressible bytes\n"
952
b"which can be shared between various other texts\n",
953
(b'key4',): b"this will be extracted\n"
954
b"but references most of its bytes from\n"
955
b"yet another text which won't be extracted\n"
956
b"with a reasonable amount of compressible bytes\n"
957
b"which can be shared between various other texts\n",
871
960
def make_block(self, key_to_text):
872
961
"""Create a GroupCompressBlock, filling it with the given texts."""
873
962
compressor = groupcompress.GroupCompressor()
875
964
for key in sorted(key_to_text):
876
compressor.compress(key, key_to_text[key], None)
966
key, [key_to_text[key]], len(key_to_text[key]), None)
877
967
locs = dict((key, (start, end)) for key, (start, _, end, _)
878
in compressor.labels_deltas.iteritems())
968
in compressor.labels_deltas.items())
879
969
block = compressor.flush()
880
970
raw_bytes = block.to_bytes()
881
971
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
894
984
def test_get_fulltexts(self):
895
985
locations, block = self.make_block(self._texts)
896
986
manager = groupcompress._LazyGroupContentManager(block)
897
self.add_key_to_manager(('key1',), locations, block, manager)
898
self.add_key_to_manager(('key2',), locations, block, manager)
987
self.add_key_to_manager((b'key1',), locations, block, manager)
988
self.add_key_to_manager((b'key2',), locations, block, manager)
899
989
result_order = []
900
990
for record in manager.get_record_stream():
901
991
result_order.append(record.key)
902
992
text = self._texts[record.key]
903
993
self.assertEqual(text, record.get_bytes_as('fulltext'))
904
self.assertEqual([('key1',), ('key2',)], result_order)
994
self.assertEqual([(b'key1',), (b'key2',)], result_order)
906
996
# If we build the manager in the opposite order, we should get them
907
997
# back in the opposite order
908
998
manager = groupcompress._LazyGroupContentManager(block)
909
self.add_key_to_manager(('key2',), locations, block, manager)
910
self.add_key_to_manager(('key1',), locations, block, manager)
999
self.add_key_to_manager((b'key2',), locations, block, manager)
1000
self.add_key_to_manager((b'key1',), locations, block, manager)
911
1001
result_order = []
912
1002
for record in manager.get_record_stream():
913
1003
result_order.append(record.key)
914
1004
text = self._texts[record.key]
915
1005
self.assertEqual(text, record.get_bytes_as('fulltext'))
916
self.assertEqual([('key2',), ('key1',)], result_order)
1006
self.assertEqual([(b'key2',), (b'key1',)], result_order)
918
1008
def test__wire_bytes_no_keys(self):
919
1009
locations, block = self.make_block(self._texts)
937
1027
def test__wire_bytes(self):
938
1028
locations, block = self.make_block(self._texts)
939
1029
manager = groupcompress._LazyGroupContentManager(block)
940
self.add_key_to_manager(('key1',), locations, block, manager)
941
self.add_key_to_manager(('key4',), locations, block, manager)
1030
self.add_key_to_manager((b'key1',), locations, block, manager)
1031
self.add_key_to_manager((b'key4',), locations, block, manager)
942
1032
block_bytes = block.to_bytes()
943
1033
wire_bytes = manager._wire_bytes()
944
1034
(storage_kind, z_header_len, header_len,
945
block_len, rest) = wire_bytes.split('\n', 4)
1035
block_len, rest) = wire_bytes.split(b'\n', 4)
946
1036
z_header_len = int(z_header_len)
947
1037
header_len = int(header_len)
948
1038
block_len = int(block_len)
949
self.assertEqual('groupcompress-block', storage_kind)
1039
self.assertEqual(b'groupcompress-block', storage_kind)
950
1040
self.assertEqual(34, z_header_len)
951
1041
self.assertEqual(26, header_len)
952
1042
self.assertEqual(len(block_bytes), block_len)
953
1043
z_header = rest[:z_header_len]
954
1044
header = zlib.decompress(z_header)
955
1045
self.assertEqual(header_len, len(header))
956
entry1 = locations[('key1',)]
957
entry4 = locations[('key4',)]
958
self.assertEqualDiff('key1\n'
960
'%d\n' # start offset
1046
entry1 = locations[(b'key1',)]
1047
entry4 = locations[(b'key4',)]
1048
self.assertEqualDiff(b'key1\n'
1050
b'%d\n' # start offset
1051
b'%d\n' # end offset
966
1056
% (entry1[0], entry1[1],
967
1057
entry4[0], entry4[1]),
969
1059
z_block = rest[z_header_len:]
970
1060
self.assertEqual(block_bytes, z_block)
972
1062
def test_from_bytes(self):
973
1063
locations, block = self.make_block(self._texts)
974
1064
manager = groupcompress._LazyGroupContentManager(block)
975
self.add_key_to_manager(('key1',), locations, block, manager)
976
self.add_key_to_manager(('key4',), locations, block, manager)
1065
self.add_key_to_manager((b'key1',), locations, block, manager)
1066
self.add_key_to_manager((b'key4',), locations, block, manager)
977
1067
wire_bytes = manager._wire_bytes()
978
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1068
self.assertStartsWith(wire_bytes, b'groupcompress-block\n')
979
1069
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
980
1070
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
981
1071
self.assertEqual(2, len(manager._factories))
1011
1101
locations, block = self.make_block(self._texts)
1012
1102
manager = groupcompress._LazyGroupContentManager(block)
1013
1103
# Request a small key in the middle should trigger a 'rebuild'
1014
self.add_key_to_manager(('key4',), locations, block, manager)
1104
self.add_key_to_manager((b'key4',), locations, block, manager)
1015
1105
manager._check_rebuild_block()
1016
1106
self.assertIsNot(block, manager._block)
1017
1107
self.assertTrue(block._content_length > manager._block._content_length)
1018
1108
for record in manager.get_record_stream():
1019
self.assertEqual(('key4',), record.key)
1109
self.assertEqual((b'key4',), record.key)
1020
1110
self.assertEqual(self._texts[record.key],
1021
1111
record.get_bytes_as('fulltext'))
1113
def test_manager_default_compressor_settings(self):
1114
locations, old_block = self.make_block(self._texts)
1115
manager = groupcompress._LazyGroupContentManager(old_block)
1116
gcvf = groupcompress.GroupCompressVersionedFiles
1117
# It doesn't greedily evaluate _max_bytes_to_index
1118
self.assertIs(None, manager._compressor_settings)
1119
self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1120
manager._get_compressor_settings())
1122
def test_manager_custom_compressor_settings(self):
1123
locations, old_block = self.make_block(self._texts)
1126
def compressor_settings():
1127
called.append('called')
1129
manager = groupcompress._LazyGroupContentManager(old_block,
1130
get_compressor_settings=compressor_settings)
1131
gcvf = groupcompress.GroupCompressVersionedFiles
1132
# It doesn't greedily evaluate compressor_settings
1133
self.assertIs(None, manager._compressor_settings)
1134
self.assertEqual((10,), manager._get_compressor_settings())
1135
self.assertEqual((10,), manager._get_compressor_settings())
1136
self.assertEqual((10,), manager._compressor_settings)
1137
# Only called 1 time
1138
self.assertEqual(['called'], called)
1140
def test__rebuild_handles_compressor_settings(self):
1141
if not isinstance(groupcompress.GroupCompressor,
1142
groupcompress.PyrexGroupCompressor):
1143
raise tests.TestNotApplicable('pure-python compressor'
1144
' does not handle compressor_settings')
1145
locations, old_block = self.make_block(self._texts)
1146
manager = groupcompress._LazyGroupContentManager(old_block,
1147
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1148
gc = manager._make_group_compressor()
1149
self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1150
self.add_key_to_manager((b'key3',), locations, old_block, manager)
1151
self.add_key_to_manager((b'key4',), locations, old_block, manager)
1152
action, last_byte, total_bytes = manager._check_rebuild_action()
1153
self.assertEqual('rebuild', action)
1154
manager._rebuild_block()
1155
new_block = manager._block
1156
self.assertIsNot(old_block, new_block)
1157
# Because of the new max_bytes_to_index, we do a poor job of
1158
# rebuilding. This is a side-effect of the change, but at least it does
1159
# show the setting had an effect.
1160
self.assertTrue(old_block._content_length < new_block._content_length)
1023
1162
def test_check_is_well_utilized_all_keys(self):
1024
1163
block, manager = self.make_block_and_full_manager(self._texts)
1025
1164
self.assertFalse(manager.check_is_well_utilized())
1057
1196
locations, block = self.make_block(self._texts)
1058
1197
manager = groupcompress._LazyGroupContentManager(block)
1059
1198
manager._full_enough_block_size = block._content_length
1060
self.add_key_to_manager(('key1',), locations, block, manager)
1061
self.add_key_to_manager(('key2',), locations, block, manager)
1199
self.add_key_to_manager((b'key1',), locations, block, manager)
1200
self.add_key_to_manager((b'key2',), locations, block, manager)
1062
1201
# Just using the content from key1 and 2 is not enough to be considered
1064
1203
self.assertFalse(manager.check_is_well_utilized())
1065
1204
# However if we add key3, then we have enough, as we only require 75%
1067
self.add_key_to_manager(('key4',), locations, block, manager)
1206
self.add_key_to_manager((b'key4',), locations, block, manager)
1068
1207
self.assertTrue(manager.check_is_well_utilized())
1210
class Test_GCBuildDetails(tests.TestCase):
1212
def test_acts_like_tuple(self):
1213
# _GCBuildDetails inlines some of the data that used to be spread out
1214
# across a bunch of tuples
1215
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1216
('INDEX', 10, 20, 0, 5))
1217
self.assertEqual(4, len(bd))
1218
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1219
self.assertEqual(None, bd[1]) # Compression Parent is always None
1220
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1221
self.assertEqual(('group', None), bd[3]) # Record details
1223
def test__repr__(self):
1224
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1225
('INDEX', 10, 20, 0, 5))
1226
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1227
" (('parent1',), ('parent2',)))",