115
115
# reading something that is in the compressor stream already.
116
116
compressor = self.compressor()
117
117
sha1_1, _, _, _ = compressor.compress(('label',),
118
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
118
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
119
119
expected_lines = list(compressor.chunks)
120
120
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
121
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
121
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
122
122
# get the first out
123
123
self.assertEqual((b'strange\ncommon long line\n'
124
124
b'that needs a 16 byte match\n', sha1_1),
131
131
def test_pop_last(self):
132
132
compressor = self.compressor()
133
133
_, _, _, _ = compressor.compress(('key1',),
134
b'some text\nfor the first entry\n', None)
134
b'some text\nfor the first entry\n', None)
135
135
expected_lines = list(compressor.chunks)
136
136
_, _, _, _ = compressor.compress(('key2',),
137
b'some text\nfor the second entry\n', None)
137
b'some text\nfor the second entry\n', None)
138
138
compressor.pop_last()
139
139
self.assertEqual(expected_lines, compressor.chunks)
166
166
def test_two_nosha_delta(self):
167
167
compressor = self.compressor()
168
168
sha1_1, _, _, _ = compressor.compress(('label',),
169
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
169
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
170
170
expected_lines = list(compressor.chunks)
171
171
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
172
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
172
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
173
self.assertEqual(sha_string(b'common long line\n'
174
174
b'that needs a 16 byte match\n'
175
175
b'different\n'), sha1_2)
179
179
# source and target length
181
181
# copy the line common
182
b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
182
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
183
183
# add the line different, and the trailing newline
184
b'\x0adifferent\n', # insert 10 bytes
184
b'\x0adifferent\n', # insert 10 bytes
186
186
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
187
187
self.assertEqual(sum(map(len, expected_lines)), end_point)
192
192
compressor = self.compressor()
193
193
sha1_1, _, _, _ = compressor.compress(('label',),
194
b'strange\ncommon very very long line\nwith some extra text\n', None)
194
b'strange\ncommon very very long line\nwith some extra text\n', None)
195
195
sha1_2, _, _, _ = compressor.compress(('newlabel',),
196
b'different\nmoredifferent\nand then some more\n', None)
196
b'different\nmoredifferent\nand then some more\n', None)
197
197
expected_lines = list(compressor.chunks)
198
198
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
199
b'new\ncommon very very long line\nwith some extra text\n'
200
b'different\nmoredifferent\nand then some more\n',
199
b'new\ncommon very very long line\nwith some extra text\n'
200
b'different\nmoredifferent\nand then some more\n',
202
202
self.assertEqual(
203
203
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
204
204
b'different\nmoredifferent\nand then some more\n'),
213
213
# Copy of first parent 'common' range
214
b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
214
b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
215
215
# Copy of second parent 'different' range
216
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
216
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
218
218
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
219
219
self.assertEqual(sum(map(len, expected_lines)), end_point)
245
245
def test_two_nosha_delta(self):
246
246
compressor = self.compressor()
247
247
sha1_1, _, _, _ = compressor.compress(('label',),
248
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
248
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
249
249
expected_lines = list(compressor.chunks)
250
250
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
251
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
251
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
252
self.assertEqual(sha_string(b'common long line\n'
253
253
b'that needs a 16 byte match\n'
254
254
b'different\n'), sha1_2)
260
260
# copy the line common
261
b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
261
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
262
262
# add the line different, and the trailing newline
263
b'\x0adifferent\n', # insert 10 bytes
263
b'\x0adifferent\n', # insert 10 bytes
265
265
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
266
266
self.assertEqual(sum(map(len, expected_lines)), end_point)
271
271
compressor = self.compressor()
272
272
sha1_1, _, _, _ = compressor.compress(('label',),
273
b'strange\ncommon very very long line\nwith some extra text\n', None)
273
b'strange\ncommon very very long line\nwith some extra text\n', None)
274
274
sha1_2, _, _, _ = compressor.compress(('newlabel',),
275
b'different\nmoredifferent\nand then some more\n', None)
275
b'different\nmoredifferent\nand then some more\n', None)
276
276
expected_lines = list(compressor.chunks)
277
277
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
278
b'new\ncommon very very long line\nwith some extra text\n'
279
b'different\nmoredifferent\nand then some more\n',
278
b'new\ncommon very very long line\nwith some extra text\n'
279
b'different\nmoredifferent\nand then some more\n',
281
281
self.assertEqual(
282
282
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
283
283
b'different\nmoredifferent\nand then some more\n'),
292
292
# Copy of first parent 'common' range
293
b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
293
b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
294
294
# Copy of second parent 'different' range
295
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
295
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
297
297
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
298
298
self.assertEqual(sum(map(len, expected_lines)), end_point)
338
338
content = (b'a tiny bit of content\n')
339
339
z_content = zlib.compress(content)
341
b'gcb1z\n' # group compress block v1 plain
342
b'%d\n' # Length of compressed content
343
b'%d\n' # Length of uncompressed content
341
b'gcb1z\n' # group compress block v1 plain
342
b'%d\n' # Length of compressed content
343
b'%d\n' # Length of uncompressed content
344
344
b'%s' # Compressed content
345
345
) % (len(z_content), len(content), z_content)
346
346
block = groupcompress.GroupCompressBlock.from_bytes(
365
365
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
366
366
self.assertEqual(total_len, len(block_bytes))
367
367
self.assertEqual(gcb._content_length, content_len)
368
expected_header =(b'gcb1z\n' # group compress block v1 zlib
369
b'%d\n' # Length of compressed content
370
b'%d\n' # Length of uncompressed content
371
) % (gcb._z_content_length, gcb._content_length)
368
expected_header = (b'gcb1z\n' # group compress block v1 zlib
369
b'%d\n' # Length of compressed content
370
b'%d\n' # Length of uncompressed content
371
) % (gcb._z_content_length, gcb._content_length)
372
372
# The first chunk should be the header chunk. It is small, fixed size,
373
373
# and there is no compelling reason to split it up
374
374
self.assertEqual(expected_header, block_chunks[0])
385
385
data = gcb.to_bytes()
386
386
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
387
387
self.assertEqual(gcb._content_length, len(content))
388
expected_header =(b'gcb1z\n' # group compress block v1 zlib
389
b'%d\n' # Length of compressed content
390
b'%d\n' # Length of uncompressed content
391
) % (gcb._z_content_length, gcb._content_length)
388
expected_header = (b'gcb1z\n' # group compress block v1 zlib
389
b'%d\n' # Length of compressed content
390
b'%d\n' # Length of uncompressed content
391
) % (gcb._z_content_length, gcb._content_length)
392
392
self.assertStartsWith(data, expected_header)
393
393
remaining_bytes = data[len(expected_header):]
394
394
raw_bytes = zlib.decompress(remaining_bytes)
518
518
def make_g_index_missing_parent(self):
519
519
graph_index = self.make_g_index('missing_parent', 1,
520
[((b'parent', ), b'2 78 2 10', ([],)),
521
((b'tip', ), b'2 78 2 10',
522
([(b'parent', ), (b'missing-parent', )],)),
520
[((b'parent', ), b'2 78 2 10', ([],)),
521
((b'tip', ), b'2 78 2 10',
522
([(b'parent', ), (b'missing-parent', )],)),
524
524
return graph_index
526
526
def test_get_record_stream_as_requested(self):
533
533
vf.add_lines((b'd',), (), [b'lines\n'])
535
535
keys = [record.key for record in vf.get_record_stream(
536
[(b'a',), (b'b',), (b'c',), (b'd',)],
537
'as-requested', False)]
536
[(b'a',), (b'b',), (b'c',), (b'd',)],
537
'as-requested', False)]
538
538
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
539
539
keys = [record.key for record in vf.get_record_stream(
540
[(b'b',), (b'a',), (b'd',), (b'c',)],
541
'as-requested', False)]
540
[(b'b',), (b'a',), (b'd',), (b'c',)],
541
'as-requested', False)]
542
542
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
544
544
# It should work even after being repacked into another VF
545
545
vf2 = self.make_test_vf(False, dir='target')
546
546
vf2.insert_record_stream(vf.get_record_stream(
547
[(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
547
[(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
550
550
keys = [record.key for record in vf2.get_record_stream(
551
[(b'a',), (b'b',), (b'c',), (b'd',)],
552
'as-requested', False)]
551
[(b'a',), (b'b',), (b'c',), (b'd',)],
552
'as-requested', False)]
553
553
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
554
554
keys = [record.key for record in vf2.get_record_stream(
555
[(b'b',), (b'a',), (b'd',), (b'c',)],
556
'as-requested', False)]
555
[(b'b',), (b'a',), (b'd',), (b'c',)],
556
'as-requested', False)]
557
557
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
559
559
def test_get_record_stream_max_bytes_to_index_default(self):
697
698
unvalidated = self.make_g_index_missing_parent()
698
699
combined = _mod_index.CombinedGraphIndex([unvalidated])
699
700
index = groupcompress._GCGraphIndex(combined,
700
is_locked=lambda: True, parents=True,
701
track_external_parent_refs=True)
701
is_locked=lambda: True, parents=True,
702
track_external_parent_refs=True)
702
703
index.scan_unvalidated_index(unvalidated)
703
704
self.assertEqual(
704
705
frozenset([(b'missing-parent',)]), index.get_missing_parents())
708
709
mod_index = btree_index.BTreeBuilder(1, 1)
709
710
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
710
711
index = groupcompress._GCGraphIndex(combined,
711
is_locked=lambda: True, parents=True,
712
add_callback=mod_index.add_nodes,
713
track_external_parent_refs=True)
712
is_locked=lambda: True, parents=True,
713
add_callback=mod_index.add_nodes,
714
track_external_parent_refs=True)
714
715
index.add_records([
715
716
((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
716
717
self.assertEqual(
731
732
target = self.make_test_vf(True, dir='target',
732
733
inconsistency_fatal=inconsistency_fatal)
733
734
for x in range(2):
734
source = self.make_source_with_b(x==1, 'source%s' % x)
735
source = self.make_source_with_b(x == 1, 'source%s' % x)
735
736
target.insert_record_stream(source.get_record_stream(
736
737
[(b'b',)], 'unordered', False))
738
739
def test_inconsistent_redundant_inserts_warn(self):
739
740
"""Should not insert a record that is already present."""
741
743
def warning(template, args):
742
744
warnings.append(template % args)
743
745
_trace_warning = trace.warning
925
928
(b'key1',): b"this is a text\n"
926
b"with a reasonable amount of compressible bytes\n"
927
b"which can be shared between various other texts\n",
929
b"with a reasonable amount of compressible bytes\n"
930
b"which can be shared between various other texts\n",
928
931
(b'key2',): b"another text\n"
929
b"with a reasonable amount of compressible bytes\n"
930
b"which can be shared between various other texts\n",
932
b"with a reasonable amount of compressible bytes\n"
933
b"which can be shared between various other texts\n",
931
934
(b'key3',): b"yet another text which won't be extracted\n"
932
b"with a reasonable amount of compressible bytes\n"
933
b"which can be shared between various other texts\n",
935
b"with a reasonable amount of compressible bytes\n"
936
b"which can be shared between various other texts\n",
934
937
(b'key4',): b"this will be extracted\n"
935
b"but references most of its bytes from\n"
936
b"yet another text which won't be extracted\n"
937
b"with a reasonable amount of compressible bytes\n"
938
b"which can be shared between various other texts\n",
938
b"but references most of its bytes from\n"
939
b"yet another text which won't be extracted\n"
940
b"with a reasonable amount of compressible bytes\n"
941
b"which can be shared between various other texts\n",
940
944
def make_block(self, key_to_text):
941
945
"""Create a GroupCompressBlock, filling it with the given texts."""
942
946
compressor = groupcompress.GroupCompressor()
1119
1124
if not isinstance(groupcompress.GroupCompressor,
1120
1125
groupcompress.PyrexGroupCompressor):
1121
1126
raise tests.TestNotApplicable('pure-python compressor'
1122
' does not handle compressor_settings')
1127
' does not handle compressor_settings')
1123
1128
locations, old_block = self.make_block(self._texts)
1124
1129
manager = groupcompress._LazyGroupContentManager(old_block,
1125
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1130
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1126
1131
gc = manager._make_group_compressor()
1127
1132
self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1128
1133
self.add_key_to_manager((b'key3',), locations, old_block, manager)
1191
1196
# _GCBuildDetails inlines some of the data that used to be spread out
1192
1197
# across a bunch of tuples
1193
1198
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1194
('INDEX', 10, 20, 0, 5))
1199
('INDEX', 10, 20, 0, 5))
1195
1200
self.assertEqual(4, len(bd))
1196
1201
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1197
self.assertEqual(None, bd[1]) # Compression Parent is always None
1202
self.assertEqual(None, bd[1]) # Compression Parent is always None
1198
1203
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1199
self.assertEqual(('group', None), bd[3]) # Record details
1204
self.assertEqual(('group', None), bd[3]) # Record details
1201
1206
def test__repr__(self):
1202
1207
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1203
('INDEX', 10, 20, 0, 5))
1208
('INDEX', 10, 20, 0, 5))
1204
1209
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1205
1210
" (('parent1',), ('parent2',)))",