115
115
# reading something that is in the compressor stream already.
116
116
compressor = self.compressor()
117
117
sha1_1, _, _, _ = compressor.compress(('label',),
118
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
118
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
119
119
expected_lines = list(compressor.chunks)
120
120
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
121
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
121
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
122
122
# get the first out
123
self.assertEqual(('strange\ncommon long line\n'
124
'that needs a 16 byte match\n', sha1_1),
123
self.assertEqual((b'strange\ncommon long line\n'
124
b'that needs a 16 byte match\n', sha1_1),
125
125
compressor.extract(('label',)))
127
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
128
'different\n', sha1_2),
127
self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
128
b'different\n', sha1_2),
129
129
compressor.extract(('newlabel',)))
131
131
def test_pop_last(self):
132
132
compressor = self.compressor()
133
133
_, _, _, _ = compressor.compress(('key1',),
134
'some text\nfor the first entry\n', None)
134
b'some text\nfor the first entry\n', None)
135
135
expected_lines = list(compressor.chunks)
136
136
_, _, _, _ = compressor.compress(('key2',),
137
'some text\nfor the second entry\n', None)
137
b'some text\nfor the second entry\n', None)
138
138
compressor.pop_last()
139
139
self.assertEqual(expected_lines, compressor.chunks)
147
147
def test_stats(self):
148
148
compressor = self.compressor()
149
149
compressor.compress(('label',),
151
'common very very long line\n'
152
'plus more text\n', None)
151
b'common very very long line\n'
152
b'plus more text\n', None)
153
153
compressor.compress(('newlabel',),
154
'common very very long line\n'
157
'moredifferent\n', None)
154
b'common very very long line\n'
157
b'moredifferent\n', None)
158
158
compressor.compress(('label3',),
160
'common very very long line\n'
163
'moredifferent\n', None)
160
b'common very very long line\n'
163
b'moredifferent\n', None)
164
164
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
166
166
def test_two_nosha_delta(self):
167
167
compressor = self.compressor()
168
168
sha1_1, _, _, _ = compressor.compress(('label',),
169
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
169
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
170
170
expected_lines = list(compressor.chunks)
171
171
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
172
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
self.assertEqual(sha_string('common long line\n'
174
'that needs a 16 byte match\n'
175
'different\n'), sha1_2)
172
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
self.assertEqual(sha_string(b'common long line\n'
174
b'that needs a 16 byte match\n'
175
b'different\n'), sha1_2)
176
176
expected_lines.extend([
177
177
# 'delta', delta length
179
179
# source and target length
181
181
# copy the line common
182
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
182
b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
183
183
# add the line different, and the trailing newline
184
'\x0adifferent\n', # insert 10 bytes
184
b'\x0adifferent\n', # insert 10 bytes
186
186
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
187
187
self.assertEqual(sum(map(len, expected_lines)), end_point)
192
192
compressor = self.compressor()
193
193
sha1_1, _, _, _ = compressor.compress(('label',),
194
'strange\ncommon very very long line\nwith some extra text\n', None)
194
b'strange\ncommon very very long line\nwith some extra text\n', None)
195
195
sha1_2, _, _, _ = compressor.compress(('newlabel',),
196
'different\nmoredifferent\nand then some more\n', None)
196
b'different\nmoredifferent\nand then some more\n', None)
197
197
expected_lines = list(compressor.chunks)
198
198
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
199
'new\ncommon very very long line\nwith some extra text\n'
200
'different\nmoredifferent\nand then some more\n',
199
b'new\ncommon very very long line\nwith some extra text\n'
200
b'different\nmoredifferent\nand then some more\n',
202
202
self.assertEqual(
203
sha_string('new\ncommon very very long line\nwith some extra text\n'
204
'different\nmoredifferent\nand then some more\n'),
203
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
204
b'different\nmoredifferent\nand then some more\n'),
206
206
expected_lines.extend([
207
207
# 'delta', delta length
209
209
# source and target length
213
213
# Copy of first parent 'common' range
214
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
214
b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
215
215
# Copy of second parent 'different' range
216
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
216
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
218
218
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
219
219
self.assertEqual(sum(map(len, expected_lines)), end_point)
226
226
def test_stats(self):
227
227
compressor = self.compressor()
228
228
compressor.compress(('label',),
230
'common very very long line\n'
231
'plus more text\n', None)
230
b'common very very long line\n'
231
b'plus more text\n', None)
232
232
compressor.compress(('newlabel',),
233
'common very very long line\n'
236
'moredifferent\n', None)
233
b'common very very long line\n'
236
b'moredifferent\n', None)
237
237
compressor.compress(('label3',),
239
'common very very long line\n'
242
'moredifferent\n', None)
239
b'common very very long line\n'
242
b'moredifferent\n', None)
243
243
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
245
245
def test_two_nosha_delta(self):
246
246
compressor = self.compressor()
247
247
sha1_1, _, _, _ = compressor.compress(('label',),
248
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
248
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
249
249
expected_lines = list(compressor.chunks)
250
250
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
251
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
self.assertEqual(sha_string('common long line\n'
253
'that needs a 16 byte match\n'
254
'different\n'), sha1_2)
251
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
self.assertEqual(sha_string(b'common long line\n'
253
b'that needs a 16 byte match\n'
254
b'different\n'), sha1_2)
255
255
expected_lines.extend([
256
256
# 'delta', delta length
260
260
# copy the line common
261
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
261
b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
262
262
# add the line different, and the trailing newline
263
'\x0adifferent\n', # insert 10 bytes
263
b'\x0adifferent\n', # insert 10 bytes
265
265
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
266
266
self.assertEqual(sum(map(len, expected_lines)), end_point)
271
271
compressor = self.compressor()
272
272
sha1_1, _, _, _ = compressor.compress(('label',),
273
'strange\ncommon very very long line\nwith some extra text\n', None)
273
b'strange\ncommon very very long line\nwith some extra text\n', None)
274
274
sha1_2, _, _, _ = compressor.compress(('newlabel',),
275
'different\nmoredifferent\nand then some more\n', None)
275
b'different\nmoredifferent\nand then some more\n', None)
276
276
expected_lines = list(compressor.chunks)
277
277
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
278
'new\ncommon very very long line\nwith some extra text\n'
279
'different\nmoredifferent\nand then some more\n',
278
b'new\ncommon very very long line\nwith some extra text\n'
279
b'different\nmoredifferent\nand then some more\n',
281
281
self.assertEqual(
282
sha_string('new\ncommon very very long line\nwith some extra text\n'
283
'different\nmoredifferent\nand then some more\n'),
282
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
283
b'different\nmoredifferent\nand then some more\n'),
285
285
expected_lines.extend([
286
286
# 'delta', delta length
292
292
# Copy of first parent 'common' range
293
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
293
b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
294
294
# Copy of second parent 'different' range
295
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
295
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
297
297
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
298
298
self.assertEqual(sum(map(len, expected_lines)), end_point)
317
317
def test_from_empty_bytes(self):
318
318
self.assertRaises(ValueError,
319
groupcompress.GroupCompressBlock.from_bytes, '')
319
groupcompress.GroupCompressBlock.from_bytes, b'')
321
321
def test_from_minimal_bytes(self):
322
322
block = groupcompress.GroupCompressBlock.from_bytes(
324
324
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
325
325
self.assertIs(None, block._content)
326
self.assertEqual('', block._z_content)
326
self.assertEqual(b'', block._z_content)
327
327
block._ensure_content()
328
self.assertEqual('', block._content)
329
self.assertEqual('', block._z_content)
328
self.assertEqual(b'', block._content)
329
self.assertEqual(b'', block._z_content)
330
330
block._ensure_content() # Ensure content is safe to call 2x
332
332
def test_from_invalid(self):
333
333
self.assertRaises(ValueError,
334
334
groupcompress.GroupCompressBlock.from_bytes,
335
'this is not a valid header')
335
b'this is not a valid header')
337
337
def test_from_bytes(self):
338
content = ('a tiny bit of content\n')
338
content = (b'a tiny bit of content\n')
339
339
z_content = zlib.compress(content)
341
'gcb1z\n' # group compress block v1 plain
342
'%d\n' # Length of compressed content
343
'%d\n' # Length of uncompressed content
344
'%s' # Compressed content
341
b'gcb1z\n' # group compress block v1 plain
342
b'%d\n' # Length of compressed content
343
b'%d\n' # Length of uncompressed content
344
b'%s' # Compressed content
345
345
) % (len(z_content), len(content), z_content)
346
346
block = groupcompress.GroupCompressBlock.from_bytes(
378
378
self.assertEqual(content, raw_bytes)
380
380
def test_to_bytes(self):
381
content = ('this is some content\n'
382
'this content will be compressed\n')
381
content = (b'this is some content\n'
382
b'this content will be compressed\n')
383
383
gcb = groupcompress.GroupCompressBlock()
384
384
gcb.set_content(content)
385
bytes = gcb.to_bytes()
385
data = gcb.to_bytes()
386
386
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
387
387
self.assertEqual(gcb._content_length, len(content))
388
expected_header =('gcb1z\n' # group compress block v1 zlib
389
'%d\n' # Length of compressed content
390
'%d\n' # Length of uncompressed content
388
expected_header =(b'gcb1z\n' # group compress block v1 zlib
389
b'%d\n' # Length of compressed content
390
b'%d\n' # Length of uncompressed content
391
391
) % (gcb._z_content_length, gcb._content_length)
392
self.assertStartsWith(bytes, expected_header)
393
remaining_bytes = bytes[len(expected_header):]
392
self.assertStartsWith(data, expected_header)
393
remaining_bytes = data[len(expected_header):]
394
394
raw_bytes = zlib.decompress(remaining_bytes)
395
395
self.assertEqual(content, raw_bytes)
397
397
# we should get the same results if using the chunked version
398
398
gcb = groupcompress.GroupCompressBlock()
399
gcb.set_chunked_content(['this is some content\n'
400
'this content will be compressed\n'],
399
gcb.set_chunked_content([b'this is some content\n'
400
b'this content will be compressed\n'],
403
bytes = gcb.to_bytes()
404
self.assertEqual(old_bytes, bytes)
403
data = gcb.to_bytes()
404
self.assertEqual(old_data, data)
406
406
def test_partial_decomp(self):
407
407
content_chunks = []
527
527
# Consider promoting 'as-requested' to general availability, and
528
528
# make this a VF interface test
529
529
vf = self.make_test_vf(False, dir='source')
530
vf.add_lines(('a',), (), ['lines\n'])
531
vf.add_lines(('b',), (), ['lines\n'])
532
vf.add_lines(('c',), (), ['lines\n'])
533
vf.add_lines(('d',), (), ['lines\n'])
530
vf.add_lines((b'a',), (), [b'lines\n'])
531
vf.add_lines((b'b',), (), [b'lines\n'])
532
vf.add_lines((b'c',), (), [b'lines\n'])
533
vf.add_lines((b'd',), (), [b'lines\n'])
535
535
keys = [record.key for record in vf.get_record_stream(
536
[('a',), ('b',), ('c',), ('d',)],
536
[(b'a',), (b'b',), (b'c',), (b'd',)],
537
537
'as-requested', False)]
538
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
538
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
539
539
keys = [record.key for record in vf.get_record_stream(
540
[('b',), ('a',), ('d',), ('c',)],
540
[(b'b',), (b'a',), (b'd',), (b'c',)],
541
541
'as-requested', False)]
542
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
542
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
544
544
# It should work even after being repacked into another VF
545
545
vf2 = self.make_test_vf(False, dir='target')
546
546
vf2.insert_record_stream(vf.get_record_stream(
547
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
547
[(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
550
550
keys = [record.key for record in vf2.get_record_stream(
551
[('a',), ('b',), ('c',), ('d',)],
551
[(b'a',), (b'b',), (b'c',), (b'd',)],
552
552
'as-requested', False)]
553
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
553
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
554
554
keys = [record.key for record in vf2.get_record_stream(
555
[('b',), ('a',), ('d',), ('c',)],
555
[(b'b',), (b'a',), (b'd',), (b'c',)],
556
556
'as-requested', False)]
557
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
557
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
559
559
def test_get_record_stream_max_bytes_to_index_default(self):
560
560
vf = self.make_test_vf(True, dir='source')
561
vf.add_lines(('a',), (), ['lines\n'])
561
vf.add_lines((b'a',), (), [b'lines\n'])
563
record = next(vf.get_record_stream([('a',)], 'unordered', True))
563
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
564
564
self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
565
565
record._manager._get_compressor_settings())
567
567
def test_get_record_stream_accesses_compressor_settings(self):
568
568
vf = self.make_test_vf(True, dir='source')
569
vf.add_lines(('a',), (), ['lines\n'])
569
vf.add_lines((b'a',), (), [b'lines\n'])
571
571
vf._max_bytes_to_index = 1234
572
record = next(vf.get_record_stream([('a',)], 'unordered', True))
572
record = next(vf.get_record_stream([(b'a',)], 'unordered', True))
573
573
self.assertEqual(dict(max_bytes_to_index=1234),
574
574
record._manager._get_compressor_settings())
577
def grouped_stream(revision_ids, first_parents=()):
578
parents = first_parents
579
for revision_id in revision_ids:
581
record = versionedfile.FulltextContentFactory(
583
b'some content that is\n'
584
b'identical except for\n'
585
b'revision_id:%s\n' % (revision_id,))
576
589
def test_insert_record_stream_reuses_blocks(self):
577
590
vf = self.make_test_vf(True, dir='source')
578
def grouped_stream(revision_ids, first_parents=()):
579
parents = first_parents
580
for revision_id in revision_ids:
582
record = versionedfile.FulltextContentFactory(
584
'some content that is\n'
585
'identical except for\n'
586
'revision_id:%s\n' % (revision_id,))
590
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
592
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
591
593
# Second group, e-h
592
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
593
first_parents=(('d',),)))
594
vf.insert_record_stream(self.grouped_stream(
595
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
595
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
597
stream = vf.get_record_stream(
598
[(r.encode(),) for r in 'abcdefgh'], 'unordered', False)
598
600
for record in stream:
599
if record.key in [('a',), ('e',)]:
601
if record.key in [(b'a',), (b'e',)]:
600
602
self.assertEqual('groupcompress-block', record.storage_kind)
602
604
self.assertEqual('groupcompress-block-ref',
606
608
self.assertEqual(8, num_records)
609
self.assertIs(block_bytes[key], block_bytes[('a',)])
610
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
611
self.assertIs(block_bytes[key], block_bytes[(b'a',)])
612
self.assertNotEqual(block_bytes[key], block_bytes[(b'e',)])
613
self.assertIs(block_bytes[key], block_bytes[('e',)])
614
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
615
self.assertIs(block_bytes[key], block_bytes[(b'e',)])
616
self.assertNotEqual(block_bytes[key], block_bytes[(b'a',)])
615
617
# Now copy the blocks into another vf, and ensure that the blocks are
616
618
# preserved without creating new entries
617
619
vf2 = self.make_test_vf(True, dir='target')
620
keys = [(r.encode(),) for r in 'abcdefgh']
618
621
# ordering in 'groupcompress' order, should actually swap the groups in
619
622
# the target vf, but the groups themselves should not be disturbed.
620
623
def small_size_stream():
621
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
622
'groupcompress', False):
624
for record in vf.get_record_stream(keys, 'groupcompress', False):
623
625
record._manager._full_enough_block_size = \
624
626
record._manager._block._content_length
627
629
vf2.insert_record_stream(small_size_stream())
628
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
629
'groupcompress', False)
630
stream = vf2.get_record_stream(keys, 'groupcompress', False)
632
633
for record in stream:
638
639
def test_insert_record_stream_packs_on_the_fly(self):
639
640
vf = self.make_test_vf(True, dir='source')
640
def grouped_stream(revision_ids, first_parents=()):
641
parents = first_parents
642
for revision_id in revision_ids:
644
record = versionedfile.FulltextContentFactory(
646
'some content that is\n'
647
'identical except for\n'
648
'revision_id:%s\n' % (revision_id,))
652
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
642
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
653
643
# Second group, e-h
654
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
655
first_parents=(('d',),)))
644
vf.insert_record_stream(self.grouped_stream(
645
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
656
646
# Now copy the blocks into another vf, and see that the
657
647
# insert_record_stream rebuilt a new block on-the-fly because of
658
648
# under-utilization
659
649
vf2 = self.make_test_vf(True, dir='target')
650
keys = [(r.encode(),) for r in 'abcdefgh']
660
651
vf2.insert_record_stream(vf.get_record_stream(
661
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
662
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
'groupcompress', False)
652
keys, 'groupcompress', False))
653
stream = vf2.get_record_stream(keys, 'groupcompress', False)
666
656
# All of the records should be recombined into a single block
676
666
def test__insert_record_stream_no_reuse_block(self):
677
667
vf = self.make_test_vf(True, dir='source')
678
def grouped_stream(revision_ids, first_parents=()):
679
parents = first_parents
680
for revision_id in revision_ids:
682
record = versionedfile.FulltextContentFactory(
684
'some content that is\n'
685
'identical except for\n'
686
'revision_id:%s\n' % (revision_id,))
690
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
669
vf.insert_record_stream(self.grouped_stream([b'a', b'b', b'c', b'd']))
691
670
# Second group, e-h
692
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
693
first_parents=(('d',),)))
671
vf.insert_record_stream(self.grouped_stream(
672
[b'e', b'f', b'g', b'h'], first_parents=((b'd',),)))
695
self.assertEqual(8, len(list(vf.get_record_stream(
696
[(r,) for r in 'abcdefgh'],
697
'unordered', False))))
674
keys = [(r.encode(),) for r in 'abcdefgh']
675
self.assertEqual(8, len(list(
676
vf.get_record_stream(keys, 'unordered', False))))
698
677
# Now copy the blocks into another vf, and ensure that the blocks are
699
678
# preserved without creating new entries
700
679
vf2 = self.make_test_vf(True, dir='target')
701
680
# ordering in 'groupcompress' order, should actually swap the groups in
702
681
# the target vf, but the groups themselves should not be disturbed.
703
682
list(vf2._insert_record_stream(vf.get_record_stream(
704
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
683
keys, 'groupcompress', False),
705
684
reuse_blocks=False))
707
686
# After inserting with reuse_blocks=False, we should have everything in
708
687
# a single new block.
709
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
710
'groupcompress', False)
688
stream = vf2.get_record_stream(keys, 'groupcompress', False)
712
690
for record in stream:
713
691
if block is None:
768
746
self.do_inconsistent_inserts(inconsistency_fatal=False)
770
748
trace.warning = _trace_warning
771
self.assertEqual(["inconsistent details in skipped record: ('b',)"
772
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
749
self.assertContainsRe(
751
r"^inconsistent details in skipped record: \(b?'b',\)"
752
r" \(b?'42 32 0 8', \(\(\),\)\)"
753
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)$")
775
755
def test_inconsistent_redundant_inserts_raises(self):
776
756
e = self.assertRaises(knit.KnitCorrupt, self.do_inconsistent_inserts,
777
757
inconsistency_fatal=True)
778
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
780
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
781
" 0 8', \(\(\('a',\),\),\)\)")
758
self.assertContainsRe(str(e), r"Knit.* corrupt: inconsistent details"
760
r" \(b?'b',\) \(b?'42 32 0 8', \(\(\),\)\)"
761
r" \(b?'74 32 0 8', \(\(\(b?'a',\),\),\)\)")
783
763
def test_clear_cache(self):
784
764
vf = self.make_source_with_b(True, 'source')
786
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
766
for record in vf.get_record_stream([(b'a',), (b'b',)], 'unordered',
789
769
self.assertTrue(len(vf._group_cache) > 0)
942
922
class TestLazyGroupCompress(tests.TestCaseWithTransport):
945
('key1',): "this is a text\n"
946
"with a reasonable amount of compressible bytes\n"
947
"which can be shared between various other texts\n",
948
('key2',): "another text\n"
949
"with a reasonable amount of compressible bytes\n"
950
"which can be shared between various other texts\n",
951
('key3',): "yet another text which won't be extracted\n"
952
"with a reasonable amount of compressible bytes\n"
953
"which can be shared between various other texts\n",
954
('key4',): "this will be extracted\n"
955
"but references most of its bytes from\n"
956
"yet another text which won't be extracted\n"
957
"with a reasonable amount of compressible bytes\n"
958
"which can be shared between various other texts\n",
925
(b'key1',): b"this is a text\n"
926
b"with a reasonable amount of compressible bytes\n"
927
b"which can be shared between various other texts\n",
928
(b'key2',): b"another text\n"
929
b"with a reasonable amount of compressible bytes\n"
930
b"which can be shared between various other texts\n",
931
(b'key3',): b"yet another text which won't be extracted\n"
932
b"with a reasonable amount of compressible bytes\n"
933
b"which can be shared between various other texts\n",
934
(b'key4',): b"this will be extracted\n"
935
b"but references most of its bytes from\n"
936
b"yet another text which won't be extracted\n"
937
b"with a reasonable amount of compressible bytes\n"
938
b"which can be shared between various other texts\n",
960
940
def make_block(self, key_to_text):
961
941
"""Create a GroupCompressBlock, filling it with the given texts."""
983
963
def test_get_fulltexts(self):
984
964
locations, block = self.make_block(self._texts)
985
965
manager = groupcompress._LazyGroupContentManager(block)
986
self.add_key_to_manager(('key1',), locations, block, manager)
987
self.add_key_to_manager(('key2',), locations, block, manager)
966
self.add_key_to_manager((b'key1',), locations, block, manager)
967
self.add_key_to_manager((b'key2',), locations, block, manager)
988
968
result_order = []
989
969
for record in manager.get_record_stream():
990
970
result_order.append(record.key)
991
971
text = self._texts[record.key]
992
972
self.assertEqual(text, record.get_bytes_as('fulltext'))
993
self.assertEqual([('key1',), ('key2',)], result_order)
973
self.assertEqual([(b'key1',), (b'key2',)], result_order)
995
975
# If we build the manager in the opposite order, we should get them
996
976
# back in the opposite order
997
977
manager = groupcompress._LazyGroupContentManager(block)
998
self.add_key_to_manager(('key2',), locations, block, manager)
999
self.add_key_to_manager(('key1',), locations, block, manager)
978
self.add_key_to_manager((b'key2',), locations, block, manager)
979
self.add_key_to_manager((b'key1',), locations, block, manager)
1000
980
result_order = []
1001
981
for record in manager.get_record_stream():
1002
982
result_order.append(record.key)
1003
983
text = self._texts[record.key]
1004
984
self.assertEqual(text, record.get_bytes_as('fulltext'))
1005
self.assertEqual([('key2',), ('key1',)], result_order)
985
self.assertEqual([(b'key2',), (b'key1',)], result_order)
1007
987
def test__wire_bytes_no_keys(self):
1008
988
locations, block = self.make_block(self._texts)
1026
1006
def test__wire_bytes(self):
1027
1007
locations, block = self.make_block(self._texts)
1028
1008
manager = groupcompress._LazyGroupContentManager(block)
1029
self.add_key_to_manager(('key1',), locations, block, manager)
1030
self.add_key_to_manager(('key4',), locations, block, manager)
1009
self.add_key_to_manager((b'key1',), locations, block, manager)
1010
self.add_key_to_manager((b'key4',), locations, block, manager)
1031
1011
block_bytes = block.to_bytes()
1032
1012
wire_bytes = manager._wire_bytes()
1033
1013
(storage_kind, z_header_len, header_len,
1034
block_len, rest) = wire_bytes.split('\n', 4)
1014
block_len, rest) = wire_bytes.split(b'\n', 4)
1035
1015
z_header_len = int(z_header_len)
1036
1016
header_len = int(header_len)
1037
1017
block_len = int(block_len)
1038
self.assertEqual('groupcompress-block', storage_kind)
1018
self.assertEqual(b'groupcompress-block', storage_kind)
1039
1019
self.assertEqual(34, z_header_len)
1040
1020
self.assertEqual(26, header_len)
1041
1021
self.assertEqual(len(block_bytes), block_len)
1042
1022
z_header = rest[:z_header_len]
1043
1023
header = zlib.decompress(z_header)
1044
1024
self.assertEqual(header_len, len(header))
1045
entry1 = locations[('key1',)]
1046
entry4 = locations[('key4',)]
1047
self.assertEqualDiff('key1\n'
1049
'%d\n' # start offset
1025
entry1 = locations[(b'key1',)]
1026
entry4 = locations[(b'key4',)]
1027
self.assertEqualDiff(b'key1\n'
1029
b'%d\n' # start offset
1030
b'%d\n' # end offset
1055
1035
% (entry1[0], entry1[1],
1056
1036
entry4[0], entry4[1]),