81
81
def test_one_nosha_delta(self):
82
82
# diff against NUKK
83
83
compressor = self.compressor()
84
text = b'strange\ncommon\n'
85
sha1, start_point, end_point, _ = compressor.compress(
86
('label',), [text], len(text), None)
84
sha1, start_point, end_point, _ = compressor.compress(('label',),
85
b'strange\ncommon\n', None)
87
86
self.assertEqual(sha_string(b'strange\ncommon\n'), sha1)
88
87
expected_lines = b'f\x0fstrange\ncommon\n'
89
88
self.assertEqual(expected_lines, b''.join(compressor.chunks))
93
92
def test_empty_content(self):
94
93
compressor = self.compressor()
95
94
# Adding empty bytes should return the 'null' record
96
sha1, start_point, end_point, kind = compressor.compress(
97
('empty',), [], 0, None)
95
sha1, start_point, end_point, kind = compressor.compress(('empty',),
98
97
self.assertEqual(0, start_point)
99
98
self.assertEqual(0, end_point)
100
99
self.assertEqual('fulltext', kind)
102
101
self.assertEqual(0, compressor.endpoint)
103
102
self.assertEqual([], compressor.chunks)
104
103
# Even after adding some content
105
text = b'some\nbytes\n'
106
compressor.compress(('content',), [text], len(text), None)
104
compressor.compress(('content',), b'some\nbytes\n', None)
107
105
self.assertTrue(compressor.endpoint > 0)
108
sha1, start_point, end_point, kind = compressor.compress(
109
('empty2',), [], 0, None)
106
sha1, start_point, end_point, kind = compressor.compress(('empty2',),
110
108
self.assertEqual(0, start_point)
111
109
self.assertEqual(0, end_point)
112
110
self.assertEqual('fulltext', kind)
116
114
# Knit fetching will try to reconstruct texts locally which results in
117
115
# reading something that is in the compressor stream already.
118
116
compressor = self.compressor()
119
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
120
sha1_1, _, _, _ = compressor.compress(
121
('label',), [text], len(text), None)
117
sha1_1, _, _, _ = compressor.compress(('label',),
118
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
122
119
expected_lines = list(compressor.chunks)
123
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
124
sha1_2, _, end_point, _ = compressor.compress(
125
('newlabel',), [text], len(text), None)
120
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
121
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
126
122
# get the first out
127
self.assertEqual(([b'strange\ncommon long line\n'
128
b'that needs a 16 byte match\n'], sha1_1),
123
self.assertEqual((b'strange\ncommon long line\n'
124
b'that needs a 16 byte match\n', sha1_1),
129
125
compressor.extract(('label',)))
131
self.assertEqual(([b'common long line\nthat needs a 16 byte match\n'
132
b'different\n'], sha1_2),
127
self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
128
b'different\n', sha1_2),
133
129
compressor.extract(('newlabel',)))
135
131
def test_pop_last(self):
136
132
compressor = self.compressor()
137
text = b'some text\nfor the first entry\n'
138
_, _, _, _ = compressor.compress(
139
('key1',), [text], len(text), None)
133
_, _, _, _ = compressor.compress(('key1',),
134
b'some text\nfor the first entry\n', None)
140
135
expected_lines = list(compressor.chunks)
141
text = b'some text\nfor the second entry\n'
142
_, _, _, _ = compressor.compress(
143
('key2',), [text], len(text), None)
136
_, _, _, _ = compressor.compress(('key2',),
137
b'some text\nfor the second entry\n', None)
144
138
compressor.pop_last()
145
139
self.assertEqual(expected_lines, compressor.chunks)
153
147
def test_stats(self):
154
148
compressor = self.compressor()
155
chunks = [b'strange\n',
156
b'common very very long line\n',
159
('label',), chunks, sum(map(len, chunks)), None)
161
b'common very very long line\n',
167
chunks, sum(map(len, chunks)), None)
170
b'common very very long line\n',
175
('label3',), chunks, sum(map(len, chunks)), None)
149
compressor.compress(('label',),
151
b'common very very long line\n'
152
b'plus more text\n', None)
153
compressor.compress(('newlabel',),
154
b'common very very long line\n'
157
b'moredifferent\n', None)
158
compressor.compress(('label3',),
160
b'common very very long line\n'
163
b'moredifferent\n', None)
176
164
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
178
166
def test_two_nosha_delta(self):
179
167
compressor = self.compressor()
180
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
181
sha1_1, _, _, _ = compressor.compress(('label',), [text], len(text), None)
168
sha1_1, _, _, _ = compressor.compress(('label',),
169
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
182
170
expected_lines = list(compressor.chunks)
183
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
184
sha1_2, start_point, end_point, _ = compressor.compress(
185
('newlabel',), [text], len(text), None)
186
self.assertEqual(sha_string(text), sha1_2)
171
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
172
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
self.assertEqual(sha_string(b'common long line\n'
174
b'that needs a 16 byte match\n'
175
b'different\n'), sha1_2)
187
176
expected_lines.extend([
188
177
# 'delta', delta length
190
179
# source and target length
192
181
# copy the line common
193
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
182
b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
194
183
# add the line different, and the trailing newline
195
b'\x0adifferent\n', # insert 10 bytes
184
b'\x0adifferent\n', # insert 10 bytes
197
186
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
198
187
self.assertEqual(sum(map(len, expected_lines)), end_point)
201
190
# The first interesting test: make a change that should use lines from
203
192
compressor = self.compressor()
204
text = b'strange\ncommon very very long line\nwith some extra text\n'
205
sha1_1, _, _, _ = compressor.compress(
206
('label',), [text], len(text), None)
207
text = b'different\nmoredifferent\nand then some more\n'
208
sha1_2, _, _, _ = compressor.compress(
209
('newlabel',), [text], len(text), None)
193
sha1_1, _, _, _ = compressor.compress(('label',),
194
b'strange\ncommon very very long line\nwith some extra text\n', None)
195
sha1_2, _, _, _ = compressor.compress(('newlabel',),
196
b'different\nmoredifferent\nand then some more\n', None)
210
197
expected_lines = list(compressor.chunks)
211
text = (b'new\ncommon very very long line\nwith some extra text\n'
212
b'different\nmoredifferent\nand then some more\n')
213
sha1_3, start_point, end_point, _ = compressor.compress(
214
('label3',), [text], len(text), None)
215
self.assertEqual(sha_string(text), sha1_3)
198
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
199
b'new\ncommon very very long line\nwith some extra text\n'
200
b'different\nmoredifferent\nand then some more\n',
203
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
204
b'different\nmoredifferent\nand then some more\n'),
216
206
expected_lines.extend([
217
207
# 'delta', delta length
223
213
# Copy of first parent 'common' range
224
b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
214
b'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
225
215
# Copy of second parent 'different' range
226
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
216
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
228
218
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
229
219
self.assertEqual(sum(map(len, expected_lines)), end_point)
236
226
def test_stats(self):
237
227
compressor = self.compressor()
238
chunks = [b'strange\n',
239
b'common very very long line\n',
242
('label',), chunks, sum(map(len, chunks)), None)
244
b'common very very long line\n',
249
('newlabel',), chunks, sum(map(len, chunks)), None)
252
b'common very very long line\n',
258
chunks, sum(map(len, chunks)), None)
228
compressor.compress(('label',),
230
b'common very very long line\n'
231
b'plus more text\n', None)
232
compressor.compress(('newlabel',),
233
b'common very very long line\n'
236
b'moredifferent\n', None)
237
compressor.compress(('label3',),
239
b'common very very long line\n'
242
b'moredifferent\n', None)
259
243
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
261
245
def test_two_nosha_delta(self):
262
246
compressor = self.compressor()
263
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
264
sha1_1, _, _, _ = compressor.compress(
265
('label',), [text], len(text), None)
247
sha1_1, _, _, _ = compressor.compress(('label',),
248
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
266
249
expected_lines = list(compressor.chunks)
267
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
268
sha1_2, start_point, end_point, _ = compressor.compress(
269
('newlabel',), [text], len(text), None)
270
self.assertEqual(sha_string(text), sha1_2)
250
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
251
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
self.assertEqual(sha_string(b'common long line\n'
253
b'that needs a 16 byte match\n'
254
b'different\n'), sha1_2)
271
255
expected_lines.extend([
272
256
# 'delta', delta length
276
260
# copy the line common
277
b'\x91\x0a\x2c', # copy, offset 0x0a, len 0x2c
261
b'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
278
262
# add the line different, and the trailing newline
279
b'\x0adifferent\n', # insert 10 bytes
263
b'\x0adifferent\n', # insert 10 bytes
281
265
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
282
266
self.assertEqual(sum(map(len, expected_lines)), end_point)
285
269
# The first interesting test: make a change that should use lines from
287
271
compressor = self.compressor()
288
text = b'strange\ncommon very very long line\nwith some extra text\n'
289
sha1_1, _, _, _ = compressor.compress(
290
('label',), [text], len(text), None)
291
text = b'different\nmoredifferent\nand then some more\n'
292
sha1_2, _, _, _ = compressor.compress(
293
('newlabel',), [text], len(text), None)
272
sha1_1, _, _, _ = compressor.compress(('label',),
273
b'strange\ncommon very very long line\nwith some extra text\n', None)
274
sha1_2, _, _, _ = compressor.compress(('newlabel',),
275
b'different\nmoredifferent\nand then some more\n', None)
294
276
expected_lines = list(compressor.chunks)
295
text = (b'new\ncommon very very long line\nwith some extra text\n'
296
b'different\nmoredifferent\nand then some more\n')
297
sha1_3, start_point, end_point, _ = compressor.compress(
298
('label3',), [text], len(text), None)
299
self.assertEqual(sha_string(text), sha1_3)
277
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
278
b'new\ncommon very very long line\nwith some extra text\n'
279
b'different\nmoredifferent\nand then some more\n',
282
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
283
b'different\nmoredifferent\nand then some more\n'),
300
285
expected_lines.extend([
301
286
# 'delta', delta length
307
292
# Copy of first parent 'common' range
308
b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
293
b'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
309
294
# Copy of second parent 'different' range
310
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
295
b'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
312
297
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
313
298
self.assertEqual(sum(map(len, expected_lines)), end_point)
320
305
compressor = groupcompress.GroupCompressor()
322
307
for key in sorted(key_to_text):
324
key, [key_to_text[key]], len(key_to_text[key]), None)
308
compressor.compress(key, key_to_text[key], None)
325
309
locs = dict((key, (start, end)) for key, (start, _, end, _)
326
310
in compressor.labels_deltas.items())
327
311
block = compressor.flush()
354
338
content = (b'a tiny bit of content\n')
355
339
z_content = zlib.compress(content)
357
b'gcb1z\n' # group compress block v1 plain
358
b'%d\n' # Length of compressed content
359
b'%d\n' # Length of uncompressed content
341
b'gcb1z\n' # group compress block v1 plain
342
b'%d\n' # Length of compressed content
343
b'%d\n' # Length of uncompressed content
360
344
b'%s' # Compressed content
361
345
) % (len(z_content), len(content), z_content)
362
346
block = groupcompress.GroupCompressBlock.from_bytes(
381
365
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
382
366
self.assertEqual(total_len, len(block_bytes))
383
367
self.assertEqual(gcb._content_length, content_len)
384
expected_header = (b'gcb1z\n' # group compress block v1 zlib
385
b'%d\n' # Length of compressed content
386
b'%d\n' # Length of uncompressed content
387
) % (gcb._z_content_length, gcb._content_length)
368
expected_header =(b'gcb1z\n' # group compress block v1 zlib
369
b'%d\n' # Length of compressed content
370
b'%d\n' # Length of uncompressed content
371
) % (gcb._z_content_length, gcb._content_length)
388
372
# The first chunk should be the header chunk. It is small, fixed size,
389
373
# and there is no compelling reason to split it up
390
374
self.assertEqual(expected_header, block_chunks[0])
401
385
data = gcb.to_bytes()
402
386
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
403
387
self.assertEqual(gcb._content_length, len(content))
404
expected_header = (b'gcb1z\n' # group compress block v1 zlib
405
b'%d\n' # Length of compressed content
406
b'%d\n' # Length of uncompressed content
407
) % (gcb._z_content_length, gcb._content_length)
388
expected_header =(b'gcb1z\n' # group compress block v1 zlib
389
b'%d\n' # Length of compressed content
390
b'%d\n' # Length of uncompressed content
391
) % (gcb._z_content_length, gcb._content_length)
408
392
self.assertStartsWith(data, expected_header)
409
393
remaining_bytes = data[len(expected_header):]
410
394
raw_bytes = zlib.decompress(remaining_bytes)
532
516
return btree_index.BTreeGraphIndex(trans, name, size)
534
518
def make_g_index_missing_parent(self):
535
graph_index = self.make_g_index('missing_parent', 1,
536
[((b'parent', ), b'2 78 2 10', ([],)),
537
((b'tip', ), b'2 78 2 10',
538
([(b'parent', ), (b'missing-parent', )],)),
519
graph_index = self.make_g_index(b'missing_parent', 1,
520
[((b'parent', ), b'2 78 2 10', ([],)),
521
((b'tip', ), b'2 78 2 10',
522
([(b'parent', ), (b'missing-parent', )],)),
540
524
return graph_index
542
526
def test_get_record_stream_as_requested(self):
549
533
vf.add_lines((b'd',), (), [b'lines\n'])
551
535
keys = [record.key for record in vf.get_record_stream(
552
[(b'a',), (b'b',), (b'c',), (b'd',)],
553
'as-requested', False)]
536
[(b'a',), (b'b',), (b'c',), (b'd',)],
537
'as-requested', False)]
554
538
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
555
539
keys = [record.key for record in vf.get_record_stream(
556
[(b'b',), (b'a',), (b'd',), (b'c',)],
557
'as-requested', False)]
540
[(b'b',), (b'a',), (b'd',), (b'c',)],
541
'as-requested', False)]
558
542
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
560
544
# It should work even after being repacked into another VF
561
545
vf2 = self.make_test_vf(False, dir='target')
562
546
vf2.insert_record_stream(vf.get_record_stream(
563
[(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
547
[(b'b',), (b'a',), (b'd',), (b'c',)], 'as-requested', False))
566
550
keys = [record.key for record in vf2.get_record_stream(
567
[(b'a',), (b'b',), (b'c',), (b'd',)],
568
'as-requested', False)]
551
[(b'a',), (b'b',), (b'c',), (b'd',)],
552
'as-requested', False)]
569
553
self.assertEqual([(b'a',), (b'b',), (b'c',), (b'd',)], keys)
570
554
keys = [record.key for record in vf2.get_record_stream(
571
[(b'b',), (b'a',), (b'd',), (b'c',)],
572
'as-requested', False)]
555
[(b'b',), (b'a',), (b'd',), (b'c',)],
556
'as-requested', False)]
573
557
self.assertEqual([(b'b',), (b'a',), (b'd',), (b'c',)], keys)
575
559
def test_get_record_stream_max_bytes_to_index_default(self):
714
697
unvalidated = self.make_g_index_missing_parent()
715
698
combined = _mod_index.CombinedGraphIndex([unvalidated])
716
699
index = groupcompress._GCGraphIndex(combined,
717
is_locked=lambda: True, parents=True,
718
track_external_parent_refs=True)
700
is_locked=lambda: True, parents=True,
701
track_external_parent_refs=True)
719
702
index.scan_unvalidated_index(unvalidated)
720
703
self.assertEqual(
721
704
frozenset([(b'missing-parent',)]), index.get_missing_parents())
725
708
mod_index = btree_index.BTreeBuilder(1, 1)
726
709
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
727
710
index = groupcompress._GCGraphIndex(combined,
728
is_locked=lambda: True, parents=True,
729
add_callback=mod_index.add_nodes,
730
track_external_parent_refs=True)
711
is_locked=lambda: True, parents=True,
712
add_callback=mod_index.add_nodes,
713
track_external_parent_refs=True)
731
714
index.add_records([
732
715
((b'new-key',), b'2 10 2 10', [((b'parent-1',), (b'parent-2',))])])
733
716
self.assertEqual(
748
731
target = self.make_test_vf(True, dir='target',
749
732
inconsistency_fatal=inconsistency_fatal)
750
733
for x in range(2):
751
source = self.make_source_with_b(x == 1, 'source%s' % x)
734
source = self.make_source_with_b(x==1, 'source%s' % x)
752
735
target.insert_record_stream(source.get_record_stream(
753
736
[(b'b',)], 'unordered', False))
755
738
def test_inconsistent_redundant_inserts_warn(self):
756
739
"""Should not insert a record that is already present."""
759
741
def warning(template, args):
760
742
warnings.append(template % args)
761
743
_trace_warning = trace.warning
944
925
(b'key1',): b"this is a text\n"
945
b"with a reasonable amount of compressible bytes\n"
946
b"which can be shared between various other texts\n",
926
b"with a reasonable amount of compressible bytes\n"
927
b"which can be shared between various other texts\n",
947
928
(b'key2',): b"another text\n"
948
b"with a reasonable amount of compressible bytes\n"
949
b"which can be shared between various other texts\n",
929
b"with a reasonable amount of compressible bytes\n"
930
b"which can be shared between various other texts\n",
950
931
(b'key3',): b"yet another text which won't be extracted\n"
951
b"with a reasonable amount of compressible bytes\n"
952
b"which can be shared between various other texts\n",
932
b"with a reasonable amount of compressible bytes\n"
933
b"which can be shared between various other texts\n",
953
934
(b'key4',): b"this will be extracted\n"
954
b"but references most of its bytes from\n"
955
b"yet another text which won't be extracted\n"
956
b"with a reasonable amount of compressible bytes\n"
957
b"which can be shared between various other texts\n",
935
b"but references most of its bytes from\n"
936
b"yet another text which won't be extracted\n"
937
b"with a reasonable amount of compressible bytes\n"
938
b"which can be shared between various other texts\n",
960
940
def make_block(self, key_to_text):
961
941
"""Create a GroupCompressBlock, filling it with the given texts."""
962
942
compressor = groupcompress.GroupCompressor()
964
944
for key in sorted(key_to_text):
966
key, [key_to_text[key]], len(key_to_text[key]), None)
945
compressor.compress(key, key_to_text[key], None)
967
946
locs = dict((key, (start, end)) for key, (start, _, end, _)
968
947
in compressor.labels_deltas.items())
969
948
block = compressor.flush()
1141
1119
if not isinstance(groupcompress.GroupCompressor,
1142
1120
groupcompress.PyrexGroupCompressor):
1143
1121
raise tests.TestNotApplicable('pure-python compressor'
1144
' does not handle compressor_settings')
1122
' does not handle compressor_settings')
1145
1123
locations, old_block = self.make_block(self._texts)
1146
1124
manager = groupcompress._LazyGroupContentManager(old_block,
1147
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1125
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1148
1126
gc = manager._make_group_compressor()
1149
1127
self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1150
1128
self.add_key_to_manager((b'key3',), locations, old_block, manager)
1213
1191
# _GCBuildDetails inlines some of the data that used to be spread out
1214
1192
# across a bunch of tuples
1215
1193
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1216
('INDEX', 10, 20, 0, 5))
1194
('INDEX', 10, 20, 0, 5))
1217
1195
self.assertEqual(4, len(bd))
1218
1196
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1219
self.assertEqual(None, bd[1]) # Compression Parent is always None
1197
self.assertEqual(None, bd[1]) # Compression Parent is always None
1220
1198
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1221
self.assertEqual(('group', None), bd[3]) # Record details
1199
self.assertEqual(('group', None), bd[3]) # Record details
1223
1201
def test__repr__(self):
1224
1202
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1225
('INDEX', 10, 20, 0, 5))
1203
('INDEX', 10, 20, 0, 5))
1226
1204
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1227
1205
" (('parent1',), ('parent2',)))",