81
75
def test_one_nosha_delta(self):
82
76
# diff against NUKK
83
77
compressor = self.compressor()
84
text = b'strange\ncommon\n'
85
sha1, start_point, end_point, _ = compressor.compress(
86
('label',), [text], len(text), None)
87
self.assertEqual(sha_string(b'strange\ncommon\n'), sha1)
88
expected_lines = b'f\x0fstrange\ncommon\n'
89
self.assertEqual(expected_lines, b''.join(compressor.chunks))
78
sha1, start_point, end_point, _ = compressor.compress(('label',),
79
'strange\ncommon\n', None)
80
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
self.assertEqual(expected_lines, ''.join(compressor.chunks))
90
83
self.assertEqual(0, start_point)
91
self.assertEqual(len(expected_lines), end_point)
84
self.assertEqual(sum(map(len, expected_lines)), end_point)
93
86
def test_empty_content(self):
94
87
compressor = self.compressor()
95
88
# Adding empty bytes should return the 'null' record
96
sha1, start_point, end_point, kind = compressor.compress(
97
('empty',), [], 0, None)
89
sha1, start_point, end_point, kind = compressor.compress(('empty',),
98
91
self.assertEqual(0, start_point)
99
92
self.assertEqual(0, end_point)
100
93
self.assertEqual('fulltext', kind)
116
108
# Knit fetching will try to reconstruct texts locally which results in
117
109
# reading something that is in the compressor stream already.
118
110
compressor = self.compressor()
119
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
120
sha1_1, _, _, _ = compressor.compress(
121
('label',), [text], len(text), None)
111
sha1_1, _, _, _ = compressor.compress(('label',),
112
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
122
113
expected_lines = list(compressor.chunks)
123
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
124
sha1_2, _, end_point, _ = compressor.compress(
125
('newlabel',), [text], len(text), None)
114
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
126
116
# get the first out
127
self.assertEqual(([b'strange\ncommon long line\n'
128
b'that needs a 16 byte match\n'], sha1_1),
117
self.assertEqual(('strange\ncommon long line\n'
118
'that needs a 16 byte match\n', sha1_1),
129
119
compressor.extract(('label',)))
131
self.assertEqual(([b'common long line\nthat needs a 16 byte match\n'
132
b'different\n'], sha1_2),
121
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
'different\n', sha1_2),
133
123
compressor.extract(('newlabel',)))
135
125
def test_pop_last(self):
136
126
compressor = self.compressor()
137
text = b'some text\nfor the first entry\n'
138
_, _, _, _ = compressor.compress(
139
('key1',), [text], len(text), None)
127
_, _, _, _ = compressor.compress(('key1',),
128
'some text\nfor the first entry\n', None)
140
129
expected_lines = list(compressor.chunks)
141
text = b'some text\nfor the second entry\n'
142
_, _, _, _ = compressor.compress(
143
('key2',), [text], len(text), None)
130
_, _, _, _ = compressor.compress(('key2',),
131
'some text\nfor the second entry\n', None)
144
132
compressor.pop_last()
145
133
self.assertEqual(expected_lines, compressor.chunks)
153
141
def test_stats(self):
154
142
compressor = self.compressor()
155
chunks = [b'strange\n',
156
b'common very very long line\n',
159
('label',), chunks, sum(map(len, chunks)), None)
161
b'common very very long line\n',
167
chunks, sum(map(len, chunks)), None)
170
b'common very very long line\n',
175
('label3',), chunks, sum(map(len, chunks)), None)
143
compressor.compress(('label',),
145
'common very very long line\n'
146
'plus more text\n', None)
147
compressor.compress(('newlabel',),
148
'common very very long line\n'
151
'moredifferent\n', None)
152
compressor.compress(('label3',),
154
'common very very long line\n'
157
'moredifferent\n', None)
176
158
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
178
160
def test_two_nosha_delta(self):
179
161
compressor = self.compressor()
180
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
181
sha1_1, _, _, _ = compressor.compress(('label',), [text], len(text), None)
162
sha1_1, _, _, _ = compressor.compress(('label',),
163
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
182
164
expected_lines = list(compressor.chunks)
183
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
184
sha1_2, start_point, end_point, _ = compressor.compress(
185
('newlabel',), [text], len(text), None)
186
self.assertEqual(sha_string(text), sha1_2)
165
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
self.assertEqual(sha_string('common long line\n'
168
'that needs a 16 byte match\n'
169
'different\n'), sha1_2)
187
170
expected_lines.extend([
188
171
# 'delta', delta length
190
173
# source and target length
192
175
# copy the line common
193
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
176
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
194
177
# add the line different, and the trailing newline
195
b'\x0adifferent\n', # insert 10 bytes
178
'\x0adifferent\n', # insert 10 bytes
197
180
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
198
181
self.assertEqual(sum(map(len, expected_lines)), end_point)
201
184
# The first interesting test: make a change that should use lines from
203
186
compressor = self.compressor()
204
text = b'strange\ncommon very very long line\nwith some extra text\n'
205
sha1_1, _, _, _ = compressor.compress(
206
('label',), [text], len(text), None)
207
text = b'different\nmoredifferent\nand then some more\n'
208
sha1_2, _, _, _ = compressor.compress(
209
('newlabel',), [text], len(text), None)
187
sha1_1, _, _, _ = compressor.compress(('label',),
188
'strange\ncommon very very long line\nwith some extra text\n', None)
189
sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
'different\nmoredifferent\nand then some more\n', None)
210
191
expected_lines = list(compressor.chunks)
211
text = (b'new\ncommon very very long line\nwith some extra text\n'
212
b'different\nmoredifferent\nand then some more\n')
213
sha1_3, start_point, end_point, _ = compressor.compress(
214
('label3',), [text], len(text), None)
215
self.assertEqual(sha_string(text), sha1_3)
192
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
'new\ncommon very very long line\nwith some extra text\n'
194
'different\nmoredifferent\nand then some more\n',
197
sha_string('new\ncommon very very long line\nwith some extra text\n'
198
'different\nmoredifferent\nand then some more\n'),
216
200
expected_lines.extend([
217
201
# 'delta', delta length
219
203
# source and target length
223
207
# Copy of first parent 'common' range
224
b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
208
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
225
209
# Copy of second parent 'different' range
226
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
210
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
228
212
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
229
213
self.assertEqual(sum(map(len, expected_lines)), end_point)
236
220
def test_stats(self):
237
221
compressor = self.compressor()
238
chunks = [b'strange\n',
239
b'common very very long line\n',
242
('label',), chunks, sum(map(len, chunks)), None)
244
b'common very very long line\n',
249
('newlabel',), chunks, sum(map(len, chunks)), None)
252
b'common very very long line\n',
258
chunks, sum(map(len, chunks)), None)
222
compressor.compress(('label',),
224
'common very very long line\n'
225
'plus more text\n', None)
226
compressor.compress(('newlabel',),
227
'common very very long line\n'
230
'moredifferent\n', None)
231
compressor.compress(('label3',),
233
'common very very long line\n'
236
'moredifferent\n', None)
259
237
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
261
239
def test_two_nosha_delta(self):
262
240
compressor = self.compressor()
263
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
264
sha1_1, _, _, _ = compressor.compress(
265
('label',), [text], len(text), None)
241
sha1_1, _, _, _ = compressor.compress(('label',),
242
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
266
243
expected_lines = list(compressor.chunks)
267
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
268
sha1_2, start_point, end_point, _ = compressor.compress(
269
('newlabel',), [text], len(text), None)
270
self.assertEqual(sha_string(text), sha1_2)
244
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
self.assertEqual(sha_string('common long line\n'
247
'that needs a 16 byte match\n'
248
'different\n'), sha1_2)
271
249
expected_lines.extend([
272
250
# 'delta', delta length
276
254
# copy the line common
277
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
255
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
278
256
# add the line different, and the trailing newline
279
b'\x0adifferent\n', # insert 10 bytes
257
'\x0adifferent\n', # insert 10 bytes
281
259
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
282
260
self.assertEqual(sum(map(len, expected_lines)), end_point)
285
263
# The first interesting test: make a change that should use lines from
287
265
compressor = self.compressor()
288
text = b'strange\ncommon very very long line\nwith some extra text\n'
289
sha1_1, _, _, _ = compressor.compress(
290
('label',), [text], len(text), None)
291
text = b'different\nmoredifferent\nand then some more\n'
292
sha1_2, _, _, _ = compressor.compress(
293
('newlabel',), [text], len(text), None)
266
sha1_1, _, _, _ = compressor.compress(('label',),
267
'strange\ncommon very very long line\nwith some extra text\n', None)
268
sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
'different\nmoredifferent\nand then some more\n', None)
294
270
expected_lines = list(compressor.chunks)
295
text = (b'new\ncommon very very long line\nwith some extra text\n'
296
b'different\nmoredifferent\nand then some more\n')
297
sha1_3, start_point, end_point, _ = compressor.compress(
298
('label3',), [text], len(text), None)
299
self.assertEqual(sha_string(text), sha1_3)
271
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
'new\ncommon very very long line\nwith some extra text\n'
273
'different\nmoredifferent\nand then some more\n',
276
sha_string('new\ncommon very very long line\nwith some extra text\n'
277
'different\nmoredifferent\nand then some more\n'),
300
279
expected_lines.extend([
301
280
# 'delta', delta length
307
286
# Copy of first parent 'common' range
308
b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
287
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
309
288
# Copy of second parent 'different' range
310
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
289
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
312
291
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
313
292
self.assertEqual(sum(map(len, expected_lines)), end_point)
333
311
def test_from_empty_bytes(self):
334
312
self.assertRaises(ValueError,
335
groupcompress.GroupCompressBlock.from_bytes, b'')
313
groupcompress.GroupCompressBlock.from_bytes, '')
337
315
def test_from_minimal_bytes(self):
338
316
block = groupcompress.GroupCompressBlock.from_bytes(
340
318
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
341
319
self.assertIs(None, block._content)
342
self.assertEqual(b'', block._z_content)
320
self.assertEqual('', block._z_content)
343
321
block._ensure_content()
344
self.assertEqual(b'', block._content)
345
self.assertEqual(b'', block._z_content)
346
block._ensure_content() # Ensure content is safe to call 2x
322
self.assertEqual('', block._content)
323
self.assertEqual('', block._z_content)
324
block._ensure_content() # Ensure content is safe to call 2x
348
326
def test_from_invalid(self):
349
327
self.assertRaises(ValueError,
350
328
groupcompress.GroupCompressBlock.from_bytes,
351
b'this is not a valid header')
329
'this is not a valid header')
353
331
def test_from_bytes(self):
354
content = (b'a tiny bit of content\n')
332
content = ('a tiny bit of content\n')
355
333
z_content = zlib.compress(content)
357
b'gcb1z\n' # group compress block v1 plain
358
b'%d\n' # Length of compressed content
359
b'%d\n' # Length of uncompressed content
360
b'%s' # Compressed content
335
'gcb1z\n' # group compress block v1 plain
336
'%d\n' # Length of compressed content
337
'%d\n' # Length of uncompressed content
338
'%s' # Compressed content
361
339
) % (len(z_content), len(content), z_content)
362
340
block = groupcompress.GroupCompressBlock.from_bytes(
369
347
self.assertEqual(z_content, block._z_content)
370
348
self.assertEqual(content, block._content)
372
def test_to_chunks(self):
373
content_chunks = [b'this is some content\n',
374
b'this content will be compressed\n']
375
content_len = sum(map(len, content_chunks))
376
content = b''.join(content_chunks)
377
gcb = groupcompress.GroupCompressBlock()
378
gcb.set_chunked_content(content_chunks, content_len)
379
total_len, block_chunks = gcb.to_chunks()
380
block_bytes = b''.join(block_chunks)
381
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
382
self.assertEqual(total_len, len(block_bytes))
383
self.assertEqual(gcb._content_length, content_len)
384
expected_header = (b'gcb1z\n' # group compress block v1 zlib
385
b'%d\n' # Length of compressed content
386
b'%d\n' # Length of uncompressed content
387
) % (gcb._z_content_length, gcb._content_length)
388
# The first chunk should be the header chunk. It is small, fixed size,
389
# and there is no compelling reason to split it up
390
self.assertEqual(expected_header, block_chunks[0])
391
self.assertStartsWith(block_bytes, expected_header)
392
remaining_bytes = block_bytes[len(expected_header):]
393
raw_bytes = zlib.decompress(remaining_bytes)
394
self.assertEqual(content, raw_bytes)
396
350
def test_to_bytes(self):
397
content = (b'this is some content\n'
398
b'this content will be compressed\n')
351
content = ('this is some content\n'
352
'this content will be compressed\n')
399
353
gcb = groupcompress.GroupCompressBlock()
400
354
gcb.set_content(content)
401
data = gcb.to_bytes()
355
bytes = gcb.to_bytes()
402
356
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
403
357
self.assertEqual(gcb._content_length, len(content))
404
expected_header = (b'gcb1z\n' # group compress block v1 zlib
405
b'%d\n' # Length of compressed content
406
b'%d\n' # Length of uncompressed content
407
) % (gcb._z_content_length, gcb._content_length)
408
self.assertStartsWith(data, expected_header)
409
remaining_bytes = data[len(expected_header):]
358
expected_header =('gcb1z\n' # group compress block v1 zlib
359
'%d\n' # Length of compressed content
360
'%d\n' # Length of uncompressed content
361
) % (gcb._z_content_length, gcb._content_length)
362
self.assertStartsWith(bytes, expected_header)
363
remaining_bytes = bytes[len(expected_header):]
410
364
raw_bytes = zlib.decompress(remaining_bytes)
411
365
self.assertEqual(content, raw_bytes)
413
367
# we should get the same results if using the chunked version
414
368
gcb = groupcompress.GroupCompressBlock()
415
gcb.set_chunked_content([b'this is some content\n'
416
b'this content will be compressed\n'],
419
data = gcb.to_bytes()
420
self.assertEqual(old_data, data)
369
gcb.set_chunked_content(['this is some content\n'
370
'this content will be compressed\n'],
373
bytes = gcb.to_bytes()
374
self.assertEqual(old_bytes, bytes)
422
376
def test_partial_decomp(self):
423
377
content_chunks = []
534
488
def make_g_index_missing_parent(self):
535
489
graph_index = self.make_g_index('missing_parent', 1,
536
[((b'parent', ), b'2 78 2 10', ([],)),
537
((b'tip', ), b'2 78 2 10',
538
([(b'parent', ), (b'missing-parent', )],)),
490
[(('parent', ), '2 78 2 10', ([],)),
491
(('tip', ), '2 78 2 10',
492
([('parent', ), ('missing-parent', )],)),
540
494
return graph_index
542
496
def test_get_record_stream_as_requested(self):
543
497
# Consider promoting 'as-requested' to general availability, and
544
498
# make this a VF interface test
545
499
vf = self.make_test_vf(False, dir='source')
546
vf.add_lines((b'a',), (), [b'lines\n'])
547
vf.add_lines((b'b',), (), [b'lines\n'])
548
vf.add_lines((b'c',), (), [b'lines\n'])
549
vf.add_lines((b'd',), (), [b'lines\n'])
500
vf.add_lines(('a',), (), ['lines\n'])
501
vf.add_lines(('b',), (), ['lines\n'])
502
vf.add_lines(('c',), (), ['lines\n'])
503
vf.add_lines(('d',), (), ['lines\n'])
551
505
keys = [record.key for record in vf.get_record_stream(
552
[(b'a',), (b'b',), (b'c',), (b'd',)],
553
'as-requested', False)]
554
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
506
[('a',), ('b',), ('c',), ('d',)],
507
'as-requested', False)]
508
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
555
509
keys = [record.key for record in vf.get_record_stream(
556
[(b'b',), (b'a',), (b'd',), (b'c',)],
557
'as-requested', False)]
558
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
510
[('b',), ('a',), ('d',), ('c',)],
511
'as-requested', False)]
512
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
560
514
# It should work even after being repacked into another VF
561
515
vf2 = self.make_test_vf(False, dir='target')
562
516
vf2.insert_record_stream(vf.get_record_stream(
563
[(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
517
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
566
520
keys = [record.key for record in vf2.get_record_stream(
567
[(b'a',), (b'b',), (b'c',), (b'd',)],
568
'as-requested', False)]
569
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
521
[('a',), ('b',), ('c',), ('d',)],
522
'as-requested', False)]
523
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
570
524
keys = [record.key for record in vf2.get_record_stream(
571
[(b'b',), (b'a',), (b'd',), (b'c',)],
572
'as-requested', False)]
573
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
575
def test_get_record_stream_max_bytes_to_index_default(self):
576
vf = self.make_test_vf(True, dir='source')
577
vf.add_lines((b'a',), (), [b'lines\n'])
579
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
580
self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
581
record._manager._get_compressor_settings())
583
def test_get_record_stream_accesses_compressor_settings(self):
584
vf = self.make_test_vf(True, dir='source')
585
vf.add_lines((b'a',), (), [b'lines\n'])
587
vf._max_bytes_to_index = 1234
588
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
589
self.assertEqual(dict(max_bytes_to_index=1234),
590
record._manager._get_compressor_settings())
593
def grouped_stream(revision_ids, first_parents=()):
594
parents = first_parents
595
for revision_id in revision_ids:
597
record = versionedfile.FulltextContentFactory(
599
b'some content that is\n'
600
b'identical except for\n'
601
b'revision_id:%s\n' % (revision_id,))
525
[('b',), ('a',), ('d',), ('c',)],
526
'as-requested', False)]
527
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
605
529
def test_insert_record_stream_reuses_blocks(self):
606
530
vf = self.make_test_vf(True, dir='source')
531
def grouped_stream(revision_ids, first_parents=()):
532
parents = first_parents
533
for revision_id in revision_ids:
535
record = versionedfile.FulltextContentFactory(
537
'some content that is\n'
538
'identical except for\n'
539
'revision_id:%s\n' % (revision_id,))
608
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
543
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
609
544
# Second group, e-h
610
vf.insert_record_stream(self.grouped_stream(
611
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
545
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
546
first_parents=(('d',),)))
613
stream = vf.get_record_stream(
614
[(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
548
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
616
551
for record in stream:
617
if record.key in [(b'a',), (b'e',)]:
552
if record.key in [('a',), ('e',)]:
618
553
self.assertEqual('groupcompress-block', record.storage_kind)
620
555
self.assertEqual('groupcompress-block-ref',
624
559
self.assertEqual(8, num_records)
627
self.assertIs(block_bytes[key], block_bytes[(b'a',)])
628
self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
562
self.assertIs(block_bytes[key], block_bytes[('a',)])
563
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
631
self.assertIs(block_bytes[key], block_bytes[(b'e',)])
632
self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
566
self.assertIs(block_bytes[key], block_bytes[('e',)])
567
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
633
568
# Now copy the blocks into another vf, and ensure that the blocks are
634
569
# preserved without creating new entries
635
570
vf2 = self.make_test_vf(True, dir='target')
636
keys = [(r.encode(),) for r in 'abcdefgh']
637
571
# ordering in 'groupcompress' order, should actually swap the groups in
638
572
# the target vf, but the groups themselves should not be disturbed.
640
573
def small_size_stream():
641
for record in vf.get_record_stream(keys, 'groupcompress', False):
574
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
'groupcompress', False):
642
576
record._manager._full_enough_block_size = \
643
577
record._manager._block._content_length
646
580
vf2.insert_record_stream(small_size_stream())
647
stream = vf2.get_record_stream(keys, 'groupcompress', False)
581
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
'groupcompress', False)
650
585
for record in stream:
656
591
def test_insert_record_stream_packs_on_the_fly(self):
657
592
vf = self.make_test_vf(True, dir='source')
593
def grouped_stream(revision_ids, first_parents=()):
594
parents = first_parents
595
for revision_id in revision_ids:
597
record = versionedfile.FulltextContentFactory(
599
'some content that is\n'
600
'identical except for\n'
601
'revision_id:%s\n' % (revision_id,))
659
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
605
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
660
606
# Second group, e-h
661
vf.insert_record_stream(self.grouped_stream(
662
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
607
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
first_parents=(('d',),)))
663
609
# Now copy the blocks into another vf, and see that the
664
610
# insert_record_stream rebuilt a new block on-the-fly because of
665
611
# under-utilization
666
612
vf2 = self.make_test_vf(True, dir='target')
667
keys = [(r.encode(),) for r in 'abcdefgh']
668
613
vf2.insert_record_stream(vf.get_record_stream(
669
keys, 'groupcompress', False))
670
stream = vf2.get_record_stream(keys, 'groupcompress', False)
614
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
'groupcompress', False)
673
619
# All of the records should be recombined into a single block
683
629
def test__insert_record_stream_no_reuse_block(self):
684
630
vf = self.make_test_vf(True, dir='source')
631
def grouped_stream(revision_ids, first_parents=()):
632
parents = first_parents
633
for revision_id in revision_ids:
635
record = versionedfile.FulltextContentFactory(
637
'some content that is\n'
638
'identical except for\n'
639
'revision_id:%s\n' % (revision_id,))
686
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
643
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
687
644
# Second group, e-h
688
vf.insert_record_stream(self.grouped_stream(
689
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
645
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
646
first_parents=(('d',),)))
691
keys = [(r.encode(),) for r in 'abcdefgh']
692
self.assertEqual(8, len(list(
693
vf.get_record_stream(keys, 'unordered', False))))
648
self.assertEqual(8, len(list(vf.get_record_stream(
649
[(r,) for r in 'abcdefgh'],
650
'unordered', False))))
694
651
# Now copy the blocks into another vf, and ensure that the blocks are
695
652
# preserved without creating new entries
696
653
vf2 = self.make_test_vf(True, dir='target')
697
654
# ordering in 'groupcompress' order, should actually swap the groups in
698
655
# the target vf, but the groups themselves should not be disturbed.
699
656
list(vf2._insert_record_stream(vf.get_record_stream(
700
keys, 'groupcompress', False),
657
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
701
658
reuse_blocks=False))
703
660
# After inserting with reuse_blocks=False, we should have everything in
704
661
# a single new block.
705
stream = vf2.get_record_stream(keys, 'groupcompress', False)
662
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
'groupcompress', False)
707
665
for record in stream:
708
666
if block is None:
714
672
unvalidated = self.make_g_index_missing_parent()
715
673
combined = _mod_index.CombinedGraphIndex([unvalidated])
716
674
index = groupcompress._GCGraphIndex(combined,
717
is_locked=lambda: True, parents=True,
718
track_external_parent_refs=True)
675
is_locked=lambda: True, parents=True,
676
track_external_parent_refs=True)
719
677
index.scan_unvalidated_index(unvalidated)
720
678
self.assertEqual(
721
frozenset([(b'missing-parent',)]), index.get_missing_parents())
679
frozenset([('missing-parent',)]), index.get_missing_parents())
723
681
def test_track_external_parent_refs(self):
724
682
g_index = self.make_g_index('empty', 1, [])
725
683
mod_index = btree_index.BTreeBuilder(1, 1)
726
684
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
727
685
index = groupcompress._GCGraphIndex(combined,
728
is_locked=lambda: True, parents=True,
729
add_callback=mod_index.add_nodes,
730
track_external_parent_refs=True)
686
is_locked=lambda: True, parents=True,
687
add_callback=mod_index.add_nodes,
688
track_external_parent_refs=True)
731
689
index.add_records([
732
((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
690
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
733
691
self.assertEqual(
734
frozenset([(b'parent-1',), (b'parent-2',)]),
692
frozenset([('parent-1',), ('parent-2',)]),
735
693
index.get_missing_parents())
737
695
def make_source_with_b(self, a_parent, path):
738
696
source = self.make_test_vf(True, dir=path)
739
source.add_lines((b'a',), (), [b'lines\n'])
697
source.add_lines(('a',), (), ['lines\n'])
741
b_parents = ((b'a',),)
699
b_parents = (('a',),)
744
source.add_lines((b'b',), b_parents, [b'lines\n'])
702
source.add_lines(('b',), b_parents, ['lines\n'])
747
705
def do_inconsistent_inserts(self, inconsistency_fatal):
748
706
target = self.make_test_vf(True, dir='target',
749
707
inconsistency_fatal=inconsistency_fatal)
750
708
for x in range(2):
751
source = self.make_source_with_b(x == 1, 'source%s' % x)
709
source = self.make_source_with_b(x==1, 'source%s' % x)
752
710
target.insert_record_stream(source.get_record_stream(
753
[(b'b',)], 'unordered', False))
711
[('b',)], 'unordered', False))
755
713
def test_inconsistent_redundant_inserts_warn(self):
756
714
"""Should not insert a record that is already present."""
759
716
def warning(template, args):
760
717
warnings.append(template % args)
761
718
_trace_warning = trace.warning
764
721
self.do_inconsistent_inserts(inconsistency_fatal=False)
766
723
trace.warning = _trace_warning
767
self.assertContainsRe(
769
r"^inconsistent details in skipped record: \(b?'b',\)"
770
r" \(b?'42 32 0 8', \(\(\),\)\)"
771
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
724
self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
773
728
def test_inconsistent_redundant_inserts_raises(self):
774
e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
729
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
775
730
inconsistency_fatal=True)
776
self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
778
r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
779
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
731
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
733
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
" 0 8', \(\(\('a',\),\),\)\)")
781
736
def test_clear_cache(self):
782
737
vf = self.make_source_with_b(True, 'source')
784
for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
739
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
787
742
self.assertTrue(len(vf._group_cache) > 0)
789
744
self.assertEqual(0, len(vf._group_cache))
792
class TestGroupCompressConfig(tests.TestCaseWithTransport):
794
def make_test_vf(self):
795
t = self.get_transport('.')
797
factory = groupcompress.make_pack_factory(graph=True,
798
delta=False, keylength=1, inconsistency_fatal=True)
800
self.addCleanup(groupcompress.cleanup_pack_group, vf)
803
def test_max_bytes_to_index_default(self):
804
vf = self.make_test_vf()
805
gc = vf._make_group_compressor()
806
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
807
vf._max_bytes_to_index)
808
if isinstance(gc, groupcompress.PyrexGroupCompressor):
809
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
810
gc._delta_index._max_bytes_to_index)
812
def test_max_bytes_to_index_in_config(self):
813
c = config.GlobalConfig()
814
c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
815
vf = self.make_test_vf()
816
gc = vf._make_group_compressor()
817
self.assertEqual(10000, vf._max_bytes_to_index)
818
if isinstance(gc, groupcompress.PyrexGroupCompressor):
819
self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
821
def test_max_bytes_to_index_bad_config(self):
822
c = config.GlobalConfig()
823
c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
824
vf = self.make_test_vf()
825
# TODO: This is triggering a warning, we might want to trap and make
826
# sure it is readable.
827
gc = vf._make_group_compressor()
828
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
829
vf._max_bytes_to_index)
830
if isinstance(gc, groupcompress.PyrexGroupCompressor):
831
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
832
gc._delta_index._max_bytes_to_index)
835
748
class StubGCVF(object):
836
749
def __init__(self, canned_get_blocks=None):
837
750
self._group_cache = {}
838
751
self._canned_get_blocks = canned_get_blocks or []
840
752
def _get_blocks(self, read_memos):
841
753
return iter(self._canned_get_blocks)
844
756
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
845
757
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
847
759
def test_add_key_new_read_memo(self):
848
760
"""Adding a key with an uncached read_memo new to this batch adds that
849
761
read_memo to the list of memos to fetch.
941
853
class TestLazyGroupCompress(tests.TestCaseWithTransport):
944
(b'key1',): b"this is a text\n"
945
b"with a reasonable amount of compressible bytes\n"
946
b"which can be shared between various other texts\n",
947
(b'key2',): b"another text\n"
948
b"with a reasonable amount of compressible bytes\n"
949
b"which can be shared between various other texts\n",
950
(b'key3',): b"yet another text which won't be extracted\n"
951
b"with a reasonable amount of compressible bytes\n"
952
b"which can be shared between various other texts\n",
953
(b'key4',): b"this will be extracted\n"
954
b"but references most of its bytes from\n"
955
b"yet another text which won't be extracted\n"
956
b"with a reasonable amount of compressible bytes\n"
957
b"which can be shared between various other texts\n",
856
('key1',): "this is a text\n"
857
"with a reasonable amount of compressible bytes\n"
858
"which can be shared between various other texts\n",
859
('key2',): "another text\n"
860
"with a reasonable amount of compressible bytes\n"
861
"which can be shared between various other texts\n",
862
('key3',): "yet another text which won't be extracted\n"
863
"with a reasonable amount of compressible bytes\n"
864
"which can be shared between various other texts\n",
865
('key4',): "this will be extracted\n"
866
"but references most of its bytes from\n"
867
"yet another text which won't be extracted\n"
868
"with a reasonable amount of compressible bytes\n"
869
"which can be shared between various other texts\n",
960
871
def make_block(self, key_to_text):
961
872
"""Create a GroupCompressBlock, filling it with the given texts."""
962
873
compressor = groupcompress.GroupCompressor()
964
875
for key in sorted(key_to_text):
966
key, [key_to_text[key]], len(key_to_text[key]), None)
876
compressor.compress(key, key_to_text[key], None)
967
877
locs = dict((key, (start, end)) for key, (start, _, end, _)
968
in compressor.labels_deltas.items())
878
in compressor.labels_deltas.iteritems())
969
879
block = compressor.flush()
970
880
raw_bytes = block.to_bytes()
971
881
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
984
894
def test_get_fulltexts(self):
985
895
locations, block = self.make_block(self._texts)
986
896
manager = groupcompress._LazyGroupContentManager(block)
987
self.add_key_to_manager((b'key1',), locations, block, manager)
988
self.add_key_to_manager((b'key2',), locations, block, manager)
897
self.add_key_to_manager(('key1',), locations, block, manager)
898
self.add_key_to_manager(('key2',), locations, block, manager)
989
899
result_order = []
990
900
for record in manager.get_record_stream():
991
901
result_order.append(record.key)
992
902
text = self._texts[record.key]
993
903
self.assertEqual(text, record.get_bytes_as('fulltext'))
994
self.assertEqual([(b'key1',), (b'key2',)], result_order)
904
self.assertEqual([('key1',), ('key2',)], result_order)
996
906
# If we build the manager in the opposite order, we should get them
997
907
# back in the opposite order
998
908
manager = groupcompress._LazyGroupContentManager(block)
999
self.add_key_to_manager((b'key2',), locations, block, manager)
1000
self.add_key_to_manager((b'key1',), locations, block, manager)
909
self.add_key_to_manager(('key2',), locations, block, manager)
910
self.add_key_to_manager(('key1',), locations, block, manager)
1001
911
result_order = []
1002
912
for record in manager.get_record_stream():
1003
913
result_order.append(record.key)
1004
914
text = self._texts[record.key]
1005
915
self.assertEqual(text, record.get_bytes_as('fulltext'))
1006
self.assertEqual([(b'key2',), (b'key1',)], result_order)
916
self.assertEqual([('key2',), ('key1',)], result_order)
1008
918
def test__wire_bytes_no_keys(self):
1009
919
locations, block = self.make_block(self._texts)
1027
937
def test__wire_bytes(self):
1028
938
locations, block = self.make_block(self._texts)
1029
939
manager = groupcompress._LazyGroupContentManager(block)
1030
self.add_key_to_manager((b'key1',), locations, block, manager)
1031
self.add_key_to_manager((b'key4',), locations, block, manager)
940
self.add_key_to_manager(('key1',), locations, block, manager)
941
self.add_key_to_manager(('key4',), locations, block, manager)
1032
942
block_bytes = block.to_bytes()
1033
943
wire_bytes = manager._wire_bytes()
1034
944
(storage_kind, z_header_len, header_len,
1035
block_len, rest) = wire_bytes.split(b'\n', 4)
945
block_len, rest) = wire_bytes.split('\n', 4)
1036
946
z_header_len = int(z_header_len)
1037
947
header_len = int(header_len)
1038
948
block_len = int(block_len)
1039
self.assertEqual(b'groupcompress-block', storage_kind)
949
self.assertEqual('groupcompress-block', storage_kind)
1040
950
self.assertEqual(34, z_header_len)
1041
951
self.assertEqual(26, header_len)
1042
952
self.assertEqual(len(block_bytes), block_len)
1043
953
z_header = rest[:z_header_len]
1044
954
header = zlib.decompress(z_header)
1045
955
self.assertEqual(header_len, len(header))
1046
entry1 = locations[(b'key1',)]
1047
entry4 = locations[(b'key4',)]
1048
self.assertEqualDiff(b'key1\n'
1050
b'%d\n' # start offset
1051
b'%d\n' # end offset
956
entry1 = locations[('key1',)]
957
entry4 = locations[('key4',)]
958
self.assertEqualDiff('key1\n'
960
'%d\n' # start offset
1056
966
% (entry1[0], entry1[1],
1057
967
entry4[0], entry4[1]),
1059
969
z_block = rest[z_header_len:]
1060
970
self.assertEqual(block_bytes, z_block)
1062
972
def test_from_bytes(self):
1063
973
locations, block = self.make_block(self._texts)
1064
974
manager = groupcompress._LazyGroupContentManager(block)
1065
self.add_key_to_manager((b'key1',), locations, block, manager)
1066
self.add_key_to_manager((b'key4',), locations, block, manager)
975
self.add_key_to_manager(('key1',), locations, block, manager)
976
self.add_key_to_manager(('key4',), locations, block, manager)
1067
977
wire_bytes = manager._wire_bytes()
1068
self.assertStartsWith(wire_bytes, b'groupcompress-block\n')
978
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1069
979
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
1070
980
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
1071
981
self.assertEqual(2, len(manager._factories))
1101
1011
locations, block = self.make_block(self._texts)
1102
1012
manager = groupcompress._LazyGroupContentManager(block)
1103
1013
# Request a small key in the middle should trigger a 'rebuild'
1104
self.add_key_to_manager((b'key4',), locations, block, manager)
1014
self.add_key_to_manager(('key4',), locations, block, manager)
1105
1015
manager._check_rebuild_block()
1106
1016
self.assertIsNot(block, manager._block)
1107
1017
self.assertTrue(block._content_length > manager._block._content_length)
1108
1018
for record in manager.get_record_stream():
1109
self.assertEqual((b'key4',), record.key)
1019
self.assertEqual(('key4',), record.key)
1110
1020
self.assertEqual(self._texts[record.key],
1111
1021
record.get_bytes_as('fulltext'))
1113
def test_manager_default_compressor_settings(self):
1114
locations, old_block = self.make_block(self._texts)
1115
manager = groupcompress._LazyGroupContentManager(old_block)
1116
gcvf = groupcompress.GroupCompressVersionedFiles
1117
# It doesn't greedily evaluate _max_bytes_to_index
1118
self.assertIs(None, manager._compressor_settings)
1119
self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1120
manager._get_compressor_settings())
1122
def test_manager_custom_compressor_settings(self):
1123
locations, old_block = self.make_block(self._texts)
1126
def compressor_settings():
1127
called.append('called')
1129
manager = groupcompress._LazyGroupContentManager(old_block,
1130
get_compressor_settings=compressor_settings)
1131
gcvf = groupcompress.GroupCompressVersionedFiles
1132
# It doesn't greedily evaluate compressor_settings
1133
self.assertIs(None, manager._compressor_settings)
1134
self.assertEqual((10,), manager._get_compressor_settings())
1135
self.assertEqual((10,), manager._get_compressor_settings())
1136
self.assertEqual((10,), manager._compressor_settings)
1137
# Only called 1 time
1138
self.assertEqual(['called'], called)
1140
def test__rebuild_handles_compressor_settings(self):
1141
if not isinstance(groupcompress.GroupCompressor,
1142
groupcompress.PyrexGroupCompressor):
1143
raise tests.TestNotApplicable('pure-python compressor'
1144
' does not handle compressor_settings')
1145
locations, old_block = self.make_block(self._texts)
1146
manager = groupcompress._LazyGroupContentManager(old_block,
1147
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1148
gc = manager._make_group_compressor()
1149
self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1150
self.add_key_to_manager((b'key3',), locations, old_block, manager)
1151
self.add_key_to_manager((b'key4',), locations, old_block, manager)
1152
action, last_byte, total_bytes = manager._check_rebuild_action()
1153
self.assertEqual('rebuild', action)
1154
manager._rebuild_block()
1155
new_block = manager._block
1156
self.assertIsNot(old_block, new_block)
1157
# Because of the new max_bytes_to_index, we do a poor job of
1158
# rebuilding. This is a side-effect of the change, but at least it does
1159
# show the setting had an effect.
1160
self.assertTrue(old_block._content_length < new_block._content_length)
1162
1023
def test_check_is_well_utilized_all_keys(self):
1163
1024
block, manager = self.make_block_and_full_manager(self._texts)
1164
1025
self.assertFalse(manager.check_is_well_utilized())
1196
1057
locations, block = self.make_block(self._texts)
1197
1058
manager = groupcompress._LazyGroupContentManager(block)
1198
1059
manager._full_enough_block_size = block._content_length
1199
self.add_key_to_manager((b'key1',), locations, block, manager)
1200
self.add_key_to_manager((b'key2',), locations, block, manager)
1060
self.add_key_to_manager(('key1',), locations, block, manager)
1061
self.add_key_to_manager(('key2',), locations, block, manager)
1201
1062
# Just using the content from key1 and 2 is not enough to be considered
1203
1064
self.assertFalse(manager.check_is_well_utilized())
1204
1065
# However if we add key3, then we have enough, as we only require 75%
1206
self.add_key_to_manager((b'key4',), locations, block, manager)
1067
self.add_key_to_manager(('key4',), locations, block, manager)
1207
1068
self.assertTrue(manager.check_is_well_utilized())
1210
class Test_GCBuildDetails(tests.TestCase):
1212
def test_acts_like_tuple(self):
1213
# _GCBuildDetails inlines some of the data that used to be spread out
1214
# across a bunch of tuples
1215
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1216
('INDEX', 10, 20, 0, 5))
1217
self.assertEqual(4, len(bd))
1218
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1219
self.assertEqual(None, bd[1]) # Compression Parent is always None
1220
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1221
self.assertEqual(('group', None), bd[3]) # Record details
1223
def test__repr__(self):
1224
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1225
('INDEX', 10, 20, 0, 5))
1226
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1227
" (('parent1',), ('parent2',)))",