125
138
self.apply_delta_to_source = self._gc_module.apply_delta_to_source
127
140
def test_make_delta_is_typesafe(self):
128
self.make_delta('a string', 'another string')
141
self.make_delta(b'a string', b'another string')
130
143
def _check_make_delta(string1, string2):
131
144
self.assertRaises(TypeError, self.make_delta, string1, string2)
133
_check_make_delta('a string', object())
134
_check_make_delta('a string', u'not a string')
135
_check_make_delta(object(), 'a string')
136
_check_make_delta(u'not a string', 'a string')
146
_check_make_delta(b'a string', object())
147
_check_make_delta(b'a string', u'not a string')
148
_check_make_delta(object(), b'a string')
149
_check_make_delta(u'not a string', b'a string')
138
151
def test_make_noop_delta(self):
139
152
ident_delta = self.make_delta(_text1, _text1)
140
self.assertEqual('M\x90M', ident_delta)
153
self.assertEqual(b'M\x90M', ident_delta)
141
154
ident_delta = self.make_delta(_text2, _text2)
142
self.assertEqual('N\x90N', ident_delta)
155
self.assertEqual(b'N\x90N', ident_delta)
143
156
ident_delta = self.make_delta(_text3, _text3)
144
self.assertEqual('\x87\x01\x90\x87', ident_delta)
157
self.assertEqual(b'\x87\x01\x90\x87', ident_delta)
146
159
def assertDeltaIn(self, delta1, delta2, delta):
147
160
"""Make sure that the delta bytes match one of the expectations."""
148
161
# In general, the python delta matcher gives different results than the
149
162
# pyrex delta matcher. Both should be valid deltas, though.
150
163
if delta not in (delta1, delta2):
151
self.fail("Delta bytes:\n"
164
self.fail(b"Delta bytes:\n"
155
168
% (delta, delta1, delta2))
157
170
def test_make_delta(self):
158
171
delta = self.make_delta(_text1, _text2)
159
172
self.assertDeltaIn(
160
'N\x90/\x1fdiffer from\nagainst other text\n',
161
'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
173
b'N\x90/\x1fdiffer from\nagainst other text\n',
174
b'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
163
176
delta = self.make_delta(_text2, _text1)
164
177
self.assertDeltaIn(
165
'M\x90/\x1ebe matched\nagainst other text\n',
166
'M\x90\x1d\x1dwhich is meant to be matched\n\x91;\x13',
178
b'M\x90/\x1ebe matched\nagainst other text\n',
179
b'M\x90\x1d\x1dwhich is meant to be matched\n\x91;\x13',
168
181
delta = self.make_delta(_text3, _text1)
169
self.assertEqual('M\x90M', delta)
182
self.assertEqual(b'M\x90M', delta)
170
183
delta = self.make_delta(_text3, _text2)
171
184
self.assertDeltaIn(
172
'N\x90/\x1fdiffer from\nagainst other text\n',
173
'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
185
b'N\x90/\x1fdiffer from\nagainst other text\n',
186
b'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
176
189
def test_make_delta_with_large_copies(self):
258
273
di = self._gc_module.DeltaIndex('test text\n')
259
274
self.assertEqual('DeltaIndex(1, 10)', repr(di))
276
def test__dump_no_index(self):
277
di = self._gc_module.DeltaIndex()
278
self.assertEqual(None, di._dump_index())
280
def test__dump_index_simple(self):
281
di = self._gc_module.DeltaIndex()
282
di.add_source(_text1, 0)
283
self.assertFalse(di._has_index())
284
self.assertEqual(None, di._dump_index())
285
_ = di.make_delta(_text1)
286
self.assertTrue(di._has_index())
287
hash_list, entry_list = di._dump_index()
288
self.assertEqual(16, len(hash_list))
289
self.assertEqual(68, len(entry_list))
290
just_entries = [(idx, text_offset, hash_val)
291
for idx, (text_offset, hash_val)
292
in enumerate(entry_list)
293
if text_offset != 0 or hash_val != 0]
294
rabin_hash = self._gc_module._rabin_hash
295
self.assertEqual([(8, 16, rabin_hash(_text1[1:17])),
296
(25, 48, rabin_hash(_text1[33:49])),
297
(34, 32, rabin_hash(_text1[17:33])),
298
(47, 64, rabin_hash(_text1[49:65])),
300
# This ensures that the hash map points to the location we expect it to
301
for entry_idx, text_offset, hash_val in just_entries:
302
self.assertEqual(entry_idx, hash_list[hash_val & 0xf])
304
def test__dump_index_two_sources(self):
305
di = self._gc_module.DeltaIndex()
306
di.add_source(_text1, 0)
307
di.add_source(_text2, 2)
308
start2 = len(_text1) + 2
309
self.assertTrue(di._has_index())
310
hash_list, entry_list = di._dump_index()
311
self.assertEqual(16, len(hash_list))
312
self.assertEqual(68, len(entry_list))
313
just_entries = [(idx, text_offset, hash_val)
314
for idx, (text_offset, hash_val)
315
in enumerate(entry_list)
316
if text_offset != 0 or hash_val != 0]
317
rabin_hash = self._gc_module._rabin_hash
318
self.assertEqual([(8, 16, rabin_hash(_text1[1:17])),
319
(9, start2+16, rabin_hash(_text2[1:17])),
320
(25, 48, rabin_hash(_text1[33:49])),
321
(30, start2+64, rabin_hash(_text2[49:65])),
322
(34, 32, rabin_hash(_text1[17:33])),
323
(35, start2+32, rabin_hash(_text2[17:33])),
324
(43, start2+48, rabin_hash(_text2[33:49])),
325
(47, 64, rabin_hash(_text1[49:65])),
327
# Each entry should be in the appropriate hash bucket.
328
for entry_idx, text_offset, hash_val in just_entries:
329
hash_idx = hash_val & 0xf
331
hash_list[hash_idx] <= entry_idx < hash_list[hash_idx+1])
261
333
def test_first_add_source_doesnt_index_until_make_delta(self):
262
334
di = self._gc_module.DeltaIndex()
263
335
self.assertFalse(di._has_index())
364
457
class TestCopyInstruction(tests.TestCase):
366
459
def assertEncode(self, expected, offset, length):
367
bytes = _groupcompress_py.encode_copy_instruction(offset, length)
368
if expected != bytes:
369
self.assertEqual([hex(ord(e)) for e in expected],
370
[hex(ord(b)) for b in bytes])
460
data = _groupcompress_py.encode_copy_instruction(offset, length)
461
self.assertEqual(expected, data)
372
def assertDecode(self, exp_offset, exp_length, exp_newpos, bytes, pos):
373
cmd = ord(bytes[pos])
463
def assertDecode(self, exp_offset, exp_length, exp_newpos, data, pos):
464
cmd = indexbytes(data, pos)
375
out = _groupcompress_py.decode_copy_instruction(bytes, cmd, pos)
466
out = _groupcompress_py.decode_copy_instruction(data, cmd, pos)
376
467
self.assertEqual((exp_offset, exp_length, exp_newpos), out)
378
469
def test_encode_no_length(self):
379
self.assertEncode('\x80', 0, 64*1024)
380
self.assertEncode('\x81\x01', 1, 64*1024)
381
self.assertEncode('\x81\x0a', 10, 64*1024)
382
self.assertEncode('\x81\xff', 255, 64*1024)
383
self.assertEncode('\x82\x01', 256, 64*1024)
384
self.assertEncode('\x83\x01\x01', 257, 64*1024)
385
self.assertEncode('\x8F\xff\xff\xff\xff', 0xFFFFFFFF, 64*1024)
386
self.assertEncode('\x8E\xff\xff\xff', 0xFFFFFF00, 64*1024)
387
self.assertEncode('\x8D\xff\xff\xff', 0xFFFF00FF, 64*1024)
388
self.assertEncode('\x8B\xff\xff\xff', 0xFF00FFFF, 64*1024)
389
self.assertEncode('\x87\xff\xff\xff', 0x00FFFFFF, 64*1024)
390
self.assertEncode('\x8F\x04\x03\x02\x01', 0x01020304, 64*1024)
470
self.assertEncode(b'\x80', 0, 64*1024)
471
self.assertEncode(b'\x81\x01', 1, 64*1024)
472
self.assertEncode(b'\x81\x0a', 10, 64*1024)
473
self.assertEncode(b'\x81\xff', 255, 64*1024)
474
self.assertEncode(b'\x82\x01', 256, 64*1024)
475
self.assertEncode(b'\x83\x01\x01', 257, 64*1024)
476
self.assertEncode(b'\x8F\xff\xff\xff\xff', 0xFFFFFFFF, 64*1024)
477
self.assertEncode(b'\x8E\xff\xff\xff', 0xFFFFFF00, 64*1024)
478
self.assertEncode(b'\x8D\xff\xff\xff', 0xFFFF00FF, 64*1024)
479
self.assertEncode(b'\x8B\xff\xff\xff', 0xFF00FFFF, 64*1024)
480
self.assertEncode(b'\x87\xff\xff\xff', 0x00FFFFFF, 64*1024)
481
self.assertEncode(b'\x8F\x04\x03\x02\x01', 0x01020304, 64*1024)
392
483
def test_encode_no_offset(self):
393
self.assertEncode('\x90\x01', 0, 1)
394
self.assertEncode('\x90\x0a', 0, 10)
395
self.assertEncode('\x90\xff', 0, 255)
396
self.assertEncode('\xA0\x01', 0, 256)
397
self.assertEncode('\xB0\x01\x01', 0, 257)
398
self.assertEncode('\xB0\xff\xff', 0, 0xFFFF)
484
self.assertEncode(b'\x90\x01', 0, 1)
485
self.assertEncode(b'\x90\x0a', 0, 10)
486
self.assertEncode(b'\x90\xff', 0, 255)
487
self.assertEncode(b'\xA0\x01', 0, 256)
488
self.assertEncode(b'\xB0\x01\x01', 0, 257)
489
self.assertEncode(b'\xB0\xff\xff', 0, 0xFFFF)
399
490
# Special case, if copy == 64KiB, then we store exactly 0
400
491
# Note that this puns with a copy of exactly 0 bytes, but we don't care
401
492
# about that, as we would never actually copy 0 bytes
402
self.assertEncode('\x80', 0, 64*1024)
493
self.assertEncode(b'\x80', 0, 64*1024)
404
495
def test_encode(self):
405
self.assertEncode('\x91\x01\x01', 1, 1)
406
self.assertEncode('\x91\x09\x0a', 9, 10)
407
self.assertEncode('\x91\xfe\xff', 254, 255)
408
self.assertEncode('\xA2\x02\x01', 512, 256)
409
self.assertEncode('\xB3\x02\x01\x01\x01', 258, 257)
410
self.assertEncode('\xB0\x01\x01', 0, 257)
496
self.assertEncode(b'\x91\x01\x01', 1, 1)
497
self.assertEncode(b'\x91\x09\x0a', 9, 10)
498
self.assertEncode(b'\x91\xfe\xff', 254, 255)
499
self.assertEncode(b'\xA2\x02\x01', 512, 256)
500
self.assertEncode(b'\xB3\x02\x01\x01\x01', 258, 257)
501
self.assertEncode(b'\xB0\x01\x01', 0, 257)
411
502
# Special case, if copy == 64KiB, then we store exactly 0
412
503
# Note that this puns with a copy of exactly 0 bytes, but we don't care
413
504
# about that, as we would never actually copy 0 bytes
414
self.assertEncode('\x81\x0a', 10, 64*1024)
505
self.assertEncode(b'\x81\x0a', 10, 64*1024)
416
507
def test_decode_no_length(self):
417
508
# If length is 0, it is interpreted as 64KiB
418
509
# The shortest possible instruction is a copy of 64KiB from offset 0
419
self.assertDecode(0, 65536, 1, '\x80', 0)
420
self.assertDecode(1, 65536, 2, '\x81\x01', 0)
421
self.assertDecode(10, 65536, 2, '\x81\x0a', 0)
422
self.assertDecode(255, 65536, 2, '\x81\xff', 0)
423
self.assertDecode(256, 65536, 2, '\x82\x01', 0)
424
self.assertDecode(257, 65536, 3, '\x83\x01\x01', 0)
425
self.assertDecode(0xFFFFFFFF, 65536, 5, '\x8F\xff\xff\xff\xff', 0)
426
self.assertDecode(0xFFFFFF00, 65536, 4, '\x8E\xff\xff\xff', 0)
427
self.assertDecode(0xFFFF00FF, 65536, 4, '\x8D\xff\xff\xff', 0)
428
self.assertDecode(0xFF00FFFF, 65536, 4, '\x8B\xff\xff\xff', 0)
429
self.assertDecode(0x00FFFFFF, 65536, 4, '\x87\xff\xff\xff', 0)
430
self.assertDecode(0x01020304, 65536, 5, '\x8F\x04\x03\x02\x01', 0)
510
self.assertDecode(0, 65536, 1, b'\x80', 0)
511
self.assertDecode(1, 65536, 2, b'\x81\x01', 0)
512
self.assertDecode(10, 65536, 2, b'\x81\x0a', 0)
513
self.assertDecode(255, 65536, 2, b'\x81\xff', 0)
514
self.assertDecode(256, 65536, 2, b'\x82\x01', 0)
515
self.assertDecode(257, 65536, 3, b'\x83\x01\x01', 0)
516
self.assertDecode(0xFFFFFFFF, 65536, 5, b'\x8F\xff\xff\xff\xff', 0)
517
self.assertDecode(0xFFFFFF00, 65536, 4, b'\x8E\xff\xff\xff', 0)
518
self.assertDecode(0xFFFF00FF, 65536, 4, b'\x8D\xff\xff\xff', 0)
519
self.assertDecode(0xFF00FFFF, 65536, 4, b'\x8B\xff\xff\xff', 0)
520
self.assertDecode(0x00FFFFFF, 65536, 4, b'\x87\xff\xff\xff', 0)
521
self.assertDecode(0x01020304, 65536, 5, b'\x8F\x04\x03\x02\x01', 0)
432
523
def test_decode_no_offset(self):
433
self.assertDecode(0, 1, 2, '\x90\x01', 0)
434
self.assertDecode(0, 10, 2, '\x90\x0a', 0)
435
self.assertDecode(0, 255, 2, '\x90\xff', 0)
436
self.assertDecode(0, 256, 2, '\xA0\x01', 0)
437
self.assertDecode(0, 257, 3, '\xB0\x01\x01', 0)
438
self.assertDecode(0, 65535, 3, '\xB0\xff\xff', 0)
524
self.assertDecode(0, 1, 2, b'\x90\x01', 0)
525
self.assertDecode(0, 10, 2, b'\x90\x0a', 0)
526
self.assertDecode(0, 255, 2, b'\x90\xff', 0)
527
self.assertDecode(0, 256, 2, b'\xA0\x01', 0)
528
self.assertDecode(0, 257, 3, b'\xB0\x01\x01', 0)
529
self.assertDecode(0, 65535, 3, b'\xB0\xff\xff', 0)
439
530
# Special case, if copy == 64KiB, then we store exactly 0
440
531
# Note that this puns with a copy of exactly 0 bytes, but we don't care
441
532
# about that, as we would never actually copy 0 bytes
442
self.assertDecode(0, 65536, 1, '\x80', 0)
533
self.assertDecode(0, 65536, 1, b'\x80', 0)
444
535
def test_decode(self):
445
self.assertDecode(1, 1, 3, '\x91\x01\x01', 0)
446
self.assertDecode(9, 10, 3, '\x91\x09\x0a', 0)
447
self.assertDecode(254, 255, 3, '\x91\xfe\xff', 0)
448
self.assertDecode(512, 256, 3, '\xA2\x02\x01', 0)
449
self.assertDecode(258, 257, 5, '\xB3\x02\x01\x01\x01', 0)
450
self.assertDecode(0, 257, 3, '\xB0\x01\x01', 0)
536
self.assertDecode(1, 1, 3, b'\x91\x01\x01', 0)
537
self.assertDecode(9, 10, 3, b'\x91\x09\x0a', 0)
538
self.assertDecode(254, 255, 3, b'\x91\xfe\xff', 0)
539
self.assertDecode(512, 256, 3, b'\xA2\x02\x01', 0)
540
self.assertDecode(258, 257, 5, b'\xB3\x02\x01\x01\x01', 0)
541
self.assertDecode(0, 257, 3, b'\xB0\x01\x01', 0)
452
543
def test_decode_not_start(self):
453
self.assertDecode(1, 1, 6, 'abc\x91\x01\x01def', 3)
454
self.assertDecode(9, 10, 5, 'ab\x91\x09\x0ade', 2)
455
self.assertDecode(254, 255, 6, 'not\x91\xfe\xffcopy', 3)
544
self.assertDecode(1, 1, 6, b'abc\x91\x01\x01def', 3)
545
self.assertDecode(9, 10, 5, b'ab\x91\x09\x0ade', 2)
546
self.assertDecode(254, 255, 6, b'not\x91\xfe\xffcopy', 3)
458
549
class TestBase128Int(tests.TestCase):
551
scenarios = module_scenarios()
460
553
_gc_module = None # Set by load_tests
462
555
def assertEqualEncode(self, bytes, val):
467
560
self._gc_module.decode_base128_int(bytes))
469
562
def test_encode(self):
470
self.assertEqualEncode('\x01', 1)
471
self.assertEqualEncode('\x02', 2)
472
self.assertEqualEncode('\x7f', 127)
473
self.assertEqualEncode('\x80\x01', 128)
474
self.assertEqualEncode('\xff\x01', 255)
475
self.assertEqualEncode('\x80\x02', 256)
476
self.assertEqualEncode('\xff\xff\xff\xff\x0f', 0xFFFFFFFF)
563
self.assertEqualEncode(b'\x01', 1)
564
self.assertEqualEncode(b'\x02', 2)
565
self.assertEqualEncode(b'\x7f', 127)
566
self.assertEqualEncode(b'\x80\x01', 128)
567
self.assertEqualEncode(b'\xff\x01', 255)
568
self.assertEqualEncode(b'\x80\x02', 256)
569
self.assertEqualEncode(b'\xff\xff\xff\xff\x0f', 0xFFFFFFFF)
478
571
def test_decode(self):
479
self.assertEqualDecode(1, 1, '\x01')
480
self.assertEqualDecode(2, 1, '\x02')
481
self.assertEqualDecode(127, 1, '\x7f')
482
self.assertEqualDecode(128, 2, '\x80\x01')
483
self.assertEqualDecode(255, 2, '\xff\x01')
484
self.assertEqualDecode(256, 2, '\x80\x02')
485
self.assertEqualDecode(0xFFFFFFFF, 5, '\xff\xff\xff\xff\x0f')
572
self.assertEqualDecode(1, 1, b'\x01')
573
self.assertEqualDecode(2, 1, b'\x02')
574
self.assertEqualDecode(127, 1, b'\x7f')
575
self.assertEqualDecode(128, 2, b'\x80\x01')
576
self.assertEqualDecode(255, 2, b'\xff\x01')
577
self.assertEqualDecode(256, 2, b'\x80\x02')
578
self.assertEqualDecode(0xFFFFFFFF, 5, b'\xff\xff\xff\xff\x0f')
487
580
def test_decode_with_trailing_bytes(self):
488
self.assertEqualDecode(1, 1, '\x01abcdef')
489
self.assertEqualDecode(127, 1, '\x7f\x01')
490
self.assertEqualDecode(128, 2, '\x80\x01abcdef')
491
self.assertEqualDecode(255, 2, '\xff\x01\xff')
581
self.assertEqualDecode(1, 1, b'\x01abcdef')
582
self.assertEqualDecode(127, 1, b'\x7f\x01')
583
self.assertEqualDecode(128, 2, b'\x80\x01abcdef')
584
self.assertEqualDecode(255, 2, b'\xff\x01\xff')