137
134
self.apply_delta_to_source = self._gc_module.apply_delta_to_source
139
136
def test_make_delta_is_typesafe(self):
140
self.make_delta(b'a string', b'another string')
137
self.make_delta('a string', 'another string')
142
139
def _check_make_delta(string1, string2):
143
140
self.assertRaises(TypeError, self.make_delta, string1, string2)
145
_check_make_delta(b'a string', object())
146
_check_make_delta(b'a string', u'not a string')
147
_check_make_delta(object(), b'a string')
148
_check_make_delta(u'not a string', b'a string')
142
_check_make_delta('a string', object())
143
_check_make_delta('a string', u'not a string')
144
_check_make_delta(object(), 'a string')
145
_check_make_delta(u'not a string', 'a string')
150
147
def test_make_noop_delta(self):
151
148
ident_delta = self.make_delta(_text1, _text1)
152
self.assertEqual(b'M\x90M', ident_delta)
149
self.assertEqual('M\x90M', ident_delta)
153
150
ident_delta = self.make_delta(_text2, _text2)
154
self.assertEqual(b'N\x90N', ident_delta)
151
self.assertEqual('N\x90N', ident_delta)
155
152
ident_delta = self.make_delta(_text3, _text3)
156
self.assertEqual(b'\x87\x01\x90\x87', ident_delta)
153
self.assertEqual('\x87\x01\x90\x87', ident_delta)
158
155
def assertDeltaIn(self, delta1, delta2, delta):
159
156
"""Make sure that the delta bytes match one of the expectations."""
160
157
# In general, the python delta matcher gives different results than the
161
158
# pyrex delta matcher. Both should be valid deltas, though.
162
159
if delta not in (delta1, delta2):
163
self.fail(b"Delta bytes:\n"
160
self.fail("Delta bytes:\n"
167
164
% (delta, delta1, delta2))
169
166
def test_make_delta(self):
170
167
delta = self.make_delta(_text1, _text2)
171
168
self.assertDeltaIn(
172
b'N\x90/\x1fdiffer from\nagainst other text\n',
173
b'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
169
'N\x90/\x1fdiffer from\nagainst other text\n',
170
'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
175
172
delta = self.make_delta(_text2, _text1)
176
173
self.assertDeltaIn(
177
b'M\x90/\x1ebe matched\nagainst other text\n',
178
b'M\x90\x1d\x1dwhich is meant to be matched\n\x91;\x13',
174
'M\x90/\x1ebe matched\nagainst other text\n',
175
'M\x90\x1d\x1dwhich is meant to be matched\n\x91;\x13',
180
177
delta = self.make_delta(_text3, _text1)
181
self.assertEqual(b'M\x90M', delta)
178
self.assertEqual('M\x90M', delta)
182
179
delta = self.make_delta(_text3, _text2)
183
180
self.assertDeltaIn(
184
b'N\x90/\x1fdiffer from\nagainst other text\n',
185
b'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
181
'N\x90/\x1fdiffer from\nagainst other text\n',
182
'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
188
185
def test_make_delta_with_large_copies(self):
191
188
big_text = _text3 * 1220
192
189
delta = self.make_delta(big_text, big_text)
193
190
self.assertDeltaIn(
194
b'\xdc\x86\x0a' # Encoding the length of the uncompressed text
195
b'\x80' # Copy 64kB, starting at byte 0
196
b'\x84\x01' # and another 64kB starting at 64kB
197
b'\xb4\x02\x5c\x83', # And the bit of tail.
191
'\xdc\x86\x0a' # Encoding the length of the uncompressed text
192
'\x80' # Copy 64kB, starting at byte 0
193
'\x84\x01' # and another 64kB starting at 64kB
194
'\xb4\x02\x5c\x83', # And the bit of tail.
198
195
None, # Both implementations should be identical
201
198
def test_apply_delta_is_typesafe(self):
202
self.apply_delta(_text1, b'M\x90M')
203
self.assertRaises(TypeError, self.apply_delta, object(), b'M\x90M')
199
self.apply_delta(_text1, 'M\x90M')
200
self.assertRaises(TypeError, self.apply_delta, object(), 'M\x90M')
204
201
self.assertRaises(TypeError, self.apply_delta,
205
_text1.decode('latin1'), b'M\x90M')
202
unicode(_text1), 'M\x90M')
206
203
self.assertRaises(TypeError, self.apply_delta, _text1, u'M\x90M')
207
204
self.assertRaises(TypeError, self.apply_delta, _text1, object())
209
206
def test_apply_delta(self):
210
207
target = self.apply_delta(_text1,
211
b'N\x90/\x1fdiffer from\nagainst other text\n')
208
'N\x90/\x1fdiffer from\nagainst other text\n')
212
209
self.assertEqual(_text2, target)
213
210
target = self.apply_delta(_text2,
214
b'M\x90/\x1ebe matched\nagainst other text\n')
211
'M\x90/\x1ebe matched\nagainst other text\n')
215
212
self.assertEqual(_text1, target)
217
214
def test_apply_delta_to_source_is_safe(self):
218
215
self.assertRaises(TypeError,
219
self.apply_delta_to_source, object(), 0, 1)
216
self.apply_delta_to_source, object(), 0, 1)
220
217
self.assertRaises(TypeError,
221
self.apply_delta_to_source, u'unicode str', 0, 1)
218
self.apply_delta_to_source, u'unicode str', 0, 1)
223
220
self.assertRaises(ValueError,
224
self.apply_delta_to_source, b'foo', 1, 4)
221
self.apply_delta_to_source, 'foo', 1, 4)
226
223
self.assertRaises(ValueError,
227
self.apply_delta_to_source, b'foo', 5, 3)
224
self.apply_delta_to_source, 'foo', 5, 3)
229
226
self.assertRaises(ValueError,
230
self.apply_delta_to_source, b'foo', 3, 2)
227
self.apply_delta_to_source, 'foo', 3, 2)
232
229
def test_apply_delta_to_source(self):
233
230
source_and_delta = (_text1
234
+ b'N\x90/\x1fdiffer from\nagainst other text\n')
231
+ 'N\x90/\x1fdiffer from\nagainst other text\n')
235
232
self.assertEqual(_text2, self.apply_delta_to_source(source_and_delta,
236
len(_text1), len(source_and_delta)))
233
len(_text1), len(source_and_delta)))
239
236
class TestMakeAndApplyCompatible(tests.TestCase):
241
238
scenarios = two_way_scenarios()
243
make_delta = None # Set by load_tests
244
apply_delta = None # Set by load_tests
240
make_delta = None # Set by load_tests
241
apply_delta = None # Set by load_tests
246
243
def assertMakeAndApply(self, source, target):
247
244
"""Assert that generating a delta and applying gives success."""
438
428
third_delta = di.make_delta(_third_text)
439
429
result = self._gc_module.apply_delta(source, third_delta)
440
430
self.assertEqualDiff(_third_text, result)
441
self.assertEqual(b'\x85\x01\x90\x14\x91\x7e\x1c'
442
b'\x91S&\x03and\x91\x18,', third_delta)
431
self.assertEqual('\x85\x01\x90\x14\x91\x7e\x1c'
432
'\x91S&\x03and\x91\x18,', third_delta)
443
433
# Now create a delta, which we know won't be able to be 'fit' into the
445
435
fourth_delta = di.make_delta(_fourth_text)
446
436
self.assertEqual(_fourth_text,
447
437
self._gc_module.apply_delta(source, fourth_delta))
448
self.assertEqual(b'\x80\x01'
449
b'\x7f123456789012345\nsame rabin hash\n'
450
b'123456789012345\nsame rabin hash\n'
451
b'123456789012345\nsame rabin hash\n'
452
b'123456789012345\nsame rabin hash'
453
b'\x01\n', fourth_delta)
438
self.assertEqual('\x80\x01'
439
'\x7f123456789012345\nsame rabin hash\n'
440
'123456789012345\nsame rabin hash\n'
441
'123456789012345\nsame rabin hash\n'
442
'123456789012345\nsame rabin hash'
443
'\x01\n', fourth_delta)
454
444
di.add_delta_source(fourth_delta, 0)
455
445
source += fourth_delta
456
446
# With the next delta, everything should be found
457
447
fifth_delta = di.make_delta(_fourth_text)
458
448
self.assertEqual(_fourth_text,
459
449
self._gc_module.apply_delta(source, fifth_delta))
460
self.assertEqual(b'\x80\x01\x91\xa7\x7f\x01\n', fifth_delta)
450
self.assertEqual('\x80\x01\x91\xa7\x7f\x01\n', fifth_delta)
463
453
class TestCopyInstruction(tests.TestCase):
465
455
def assertEncode(self, expected, offset, length):
466
data = _groupcompress_py.encode_copy_instruction(offset, length)
467
self.assertEqual(expected, data)
456
bytes = _groupcompress_py.encode_copy_instruction(offset, length)
457
if expected != bytes:
458
self.assertEqual([hex(ord(e)) for e in expected],
459
[hex(ord(b)) for b in bytes])
469
def assertDecode(self, exp_offset, exp_length, exp_newpos, data, pos):
461
def assertDecode(self, exp_offset, exp_length, exp_newpos, bytes, pos):
462
cmd = ord(bytes[pos])
472
out = _groupcompress_py.decode_copy_instruction(data, cmd, pos)
464
out = _groupcompress_py.decode_copy_instruction(bytes, cmd, pos)
473
465
self.assertEqual((exp_offset, exp_length, exp_newpos), out)
475
467
def test_encode_no_length(self):
476
self.assertEncode(b'\x80', 0, 64 * 1024)
477
self.assertEncode(b'\x81\x01', 1, 64 * 1024)
478
self.assertEncode(b'\x81\x0a', 10, 64 * 1024)
479
self.assertEncode(b'\x81\xff', 255, 64 * 1024)
480
self.assertEncode(b'\x82\x01', 256, 64 * 1024)
481
self.assertEncode(b'\x83\x01\x01', 257, 64 * 1024)
482
self.assertEncode(b'\x8F\xff\xff\xff\xff', 0xFFFFFFFF, 64 * 1024)
483
self.assertEncode(b'\x8E\xff\xff\xff', 0xFFFFFF00, 64 * 1024)
484
self.assertEncode(b'\x8D\xff\xff\xff', 0xFFFF00FF, 64 * 1024)
485
self.assertEncode(b'\x8B\xff\xff\xff', 0xFF00FFFF, 64 * 1024)
486
self.assertEncode(b'\x87\xff\xff\xff', 0x00FFFFFF, 64 * 1024)
487
self.assertEncode(b'\x8F\x04\x03\x02\x01', 0x01020304, 64 * 1024)
468
self.assertEncode('\x80', 0, 64*1024)
469
self.assertEncode('\x81\x01', 1, 64*1024)
470
self.assertEncode('\x81\x0a', 10, 64*1024)
471
self.assertEncode('\x81\xff', 255, 64*1024)
472
self.assertEncode('\x82\x01', 256, 64*1024)
473
self.assertEncode('\x83\x01\x01', 257, 64*1024)
474
self.assertEncode('\x8F\xff\xff\xff\xff', 0xFFFFFFFF, 64*1024)
475
self.assertEncode('\x8E\xff\xff\xff', 0xFFFFFF00, 64*1024)
476
self.assertEncode('\x8D\xff\xff\xff', 0xFFFF00FF, 64*1024)
477
self.assertEncode('\x8B\xff\xff\xff', 0xFF00FFFF, 64*1024)
478
self.assertEncode('\x87\xff\xff\xff', 0x00FFFFFF, 64*1024)
479
self.assertEncode('\x8F\x04\x03\x02\x01', 0x01020304, 64*1024)
489
481
def test_encode_no_offset(self):
490
self.assertEncode(b'\x90\x01', 0, 1)
491
self.assertEncode(b'\x90\x0a', 0, 10)
492
self.assertEncode(b'\x90\xff', 0, 255)
493
self.assertEncode(b'\xA0\x01', 0, 256)
494
self.assertEncode(b'\xB0\x01\x01', 0, 257)
495
self.assertEncode(b'\xB0\xff\xff', 0, 0xFFFF)
482
self.assertEncode('\x90\x01', 0, 1)
483
self.assertEncode('\x90\x0a', 0, 10)
484
self.assertEncode('\x90\xff', 0, 255)
485
self.assertEncode('\xA0\x01', 0, 256)
486
self.assertEncode('\xB0\x01\x01', 0, 257)
487
self.assertEncode('\xB0\xff\xff', 0, 0xFFFF)
496
488
# Special case, if copy == 64KiB, then we store exactly 0
497
489
# Note that this puns with a copy of exactly 0 bytes, but we don't care
498
490
# about that, as we would never actually copy 0 bytes
499
self.assertEncode(b'\x80', 0, 64 * 1024)
491
self.assertEncode('\x80', 0, 64*1024)
501
493
def test_encode(self):
502
self.assertEncode(b'\x91\x01\x01', 1, 1)
503
self.assertEncode(b'\x91\x09\x0a', 9, 10)
504
self.assertEncode(b'\x91\xfe\xff', 254, 255)
505
self.assertEncode(b'\xA2\x02\x01', 512, 256)
506
self.assertEncode(b'\xB3\x02\x01\x01\x01', 258, 257)
507
self.assertEncode(b'\xB0\x01\x01', 0, 257)
494
self.assertEncode('\x91\x01\x01', 1, 1)
495
self.assertEncode('\x91\x09\x0a', 9, 10)
496
self.assertEncode('\x91\xfe\xff', 254, 255)
497
self.assertEncode('\xA2\x02\x01', 512, 256)
498
self.assertEncode('\xB3\x02\x01\x01\x01', 258, 257)
499
self.assertEncode('\xB0\x01\x01', 0, 257)
508
500
# Special case, if copy == 64KiB, then we store exactly 0
509
501
# Note that this puns with a copy of exactly 0 bytes, but we don't care
510
502
# about that, as we would never actually copy 0 bytes
511
self.assertEncode(b'\x81\x0a', 10, 64 * 1024)
503
self.assertEncode('\x81\x0a', 10, 64*1024)
513
505
def test_decode_no_length(self):
514
506
# If length is 0, it is interpreted as 64KiB
515
507
# The shortest possible instruction is a copy of 64KiB from offset 0
516
self.assertDecode(0, 65536, 1, b'\x80', 0)
517
self.assertDecode(1, 65536, 2, b'\x81\x01', 0)
518
self.assertDecode(10, 65536, 2, b'\x81\x0a', 0)
519
self.assertDecode(255, 65536, 2, b'\x81\xff', 0)
520
self.assertDecode(256, 65536, 2, b'\x82\x01', 0)
521
self.assertDecode(257, 65536, 3, b'\x83\x01\x01', 0)
522
self.assertDecode(0xFFFFFFFF, 65536, 5, b'\x8F\xff\xff\xff\xff', 0)
523
self.assertDecode(0xFFFFFF00, 65536, 4, b'\x8E\xff\xff\xff', 0)
524
self.assertDecode(0xFFFF00FF, 65536, 4, b'\x8D\xff\xff\xff', 0)
525
self.assertDecode(0xFF00FFFF, 65536, 4, b'\x8B\xff\xff\xff', 0)
526
self.assertDecode(0x00FFFFFF, 65536, 4, b'\x87\xff\xff\xff', 0)
527
self.assertDecode(0x01020304, 65536, 5, b'\x8F\x04\x03\x02\x01', 0)
508
self.assertDecode(0, 65536, 1, '\x80', 0)
509
self.assertDecode(1, 65536, 2, '\x81\x01', 0)
510
self.assertDecode(10, 65536, 2, '\x81\x0a', 0)
511
self.assertDecode(255, 65536, 2, '\x81\xff', 0)
512
self.assertDecode(256, 65536, 2, '\x82\x01', 0)
513
self.assertDecode(257, 65536, 3, '\x83\x01\x01', 0)
514
self.assertDecode(0xFFFFFFFF, 65536, 5, '\x8F\xff\xff\xff\xff', 0)
515
self.assertDecode(0xFFFFFF00, 65536, 4, '\x8E\xff\xff\xff', 0)
516
self.assertDecode(0xFFFF00FF, 65536, 4, '\x8D\xff\xff\xff', 0)
517
self.assertDecode(0xFF00FFFF, 65536, 4, '\x8B\xff\xff\xff', 0)
518
self.assertDecode(0x00FFFFFF, 65536, 4, '\x87\xff\xff\xff', 0)
519
self.assertDecode(0x01020304, 65536, 5, '\x8F\x04\x03\x02\x01', 0)
529
521
def test_decode_no_offset(self):
530
self.assertDecode(0, 1, 2, b'\x90\x01', 0)
531
self.assertDecode(0, 10, 2, b'\x90\x0a', 0)
532
self.assertDecode(0, 255, 2, b'\x90\xff', 0)
533
self.assertDecode(0, 256, 2, b'\xA0\x01', 0)
534
self.assertDecode(0, 257, 3, b'\xB0\x01\x01', 0)
535
self.assertDecode(0, 65535, 3, b'\xB0\xff\xff', 0)
522
self.assertDecode(0, 1, 2, '\x90\x01', 0)
523
self.assertDecode(0, 10, 2, '\x90\x0a', 0)
524
self.assertDecode(0, 255, 2, '\x90\xff', 0)
525
self.assertDecode(0, 256, 2, '\xA0\x01', 0)
526
self.assertDecode(0, 257, 3, '\xB0\x01\x01', 0)
527
self.assertDecode(0, 65535, 3, '\xB0\xff\xff', 0)
536
528
# Special case, if copy == 64KiB, then we store exactly 0
537
529
# Note that this puns with a copy of exactly 0 bytes, but we don't care
538
530
# about that, as we would never actually copy 0 bytes
539
self.assertDecode(0, 65536, 1, b'\x80', 0)
531
self.assertDecode(0, 65536, 1, '\x80', 0)
541
533
def test_decode(self):
542
self.assertDecode(1, 1, 3, b'\x91\x01\x01', 0)
543
self.assertDecode(9, 10, 3, b'\x91\x09\x0a', 0)
544
self.assertDecode(254, 255, 3, b'\x91\xfe\xff', 0)
545
self.assertDecode(512, 256, 3, b'\xA2\x02\x01', 0)
546
self.assertDecode(258, 257, 5, b'\xB3\x02\x01\x01\x01', 0)
547
self.assertDecode(0, 257, 3, b'\xB0\x01\x01', 0)
534
self.assertDecode(1, 1, 3, '\x91\x01\x01', 0)
535
self.assertDecode(9, 10, 3, '\x91\x09\x0a', 0)
536
self.assertDecode(254, 255, 3, '\x91\xfe\xff', 0)
537
self.assertDecode(512, 256, 3, '\xA2\x02\x01', 0)
538
self.assertDecode(258, 257, 5, '\xB3\x02\x01\x01\x01', 0)
539
self.assertDecode(0, 257, 3, '\xB0\x01\x01', 0)
549
541
def test_decode_not_start(self):
550
self.assertDecode(1, 1, 6, b'abc\x91\x01\x01def', 3)
551
self.assertDecode(9, 10, 5, b'ab\x91\x09\x0ade', 2)
552
self.assertDecode(254, 255, 6, b'not\x91\xfe\xffcopy', 3)
542
self.assertDecode(1, 1, 6, 'abc\x91\x01\x01def', 3)
543
self.assertDecode(9, 10, 5, 'ab\x91\x09\x0ade', 2)
544
self.assertDecode(254, 255, 6, 'not\x91\xfe\xffcopy', 3)
555
547
class TestBase128Int(tests.TestCase):
557
549
scenarios = module_scenarios()
559
_gc_module = None # Set by load_tests
551
_gc_module = None # Set by load_tests
561
553
def assertEqualEncode(self, bytes, val):
562
554
self.assertEqual(bytes, self._gc_module.encode_base128_int(val))