1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
from cStringIO import StringIO
22
from tempfile import TemporaryFile
24
from bzrlib.diff import internal_diff, external_diff, show_diff_trees
25
from bzrlib.errors import BinaryFile, NoDiff
26
import bzrlib.patiencediff
27
from bzrlib.tests import (TestCase, TestCaseWithTransport,
28
TestCaseInTempDir, TestSkipped)
31
def udiff_lines(old, new, allow_binary=False):
33
internal_diff('old', old, 'new', new, output, allow_binary)
35
return output.readlines()
38
def external_udiff_lines(old, new, use_stringio=False):
40
# StringIO has no fileno, so it tests a different codepath
43
output = TemporaryFile()
45
external_diff('old', old, 'new', new, output, diff_opts=['-u'])
47
raise TestSkipped('external "diff" not present to test')
49
lines = output.readlines()
54
class TestDiff(TestCase):
56
def test_add_nl(self):
57
"""diff generates a valid diff for patches that add a newline"""
58
lines = udiff_lines(['boo'], ['boo\n'])
59
self.check_patch(lines)
60
self.assertEquals(lines[4], '\\ No newline at end of file\n')
61
## "expected no-nl, got %r" % lines[4]
63
def test_add_nl_2(self):
64
"""diff generates a valid diff for patches that change last line and
67
lines = udiff_lines(['boo'], ['goo\n'])
68
self.check_patch(lines)
69
self.assertEquals(lines[4], '\\ No newline at end of file\n')
70
## "expected no-nl, got %r" % lines[4]
72
def test_remove_nl(self):
73
"""diff generates a valid diff for patches that change last line and
76
lines = udiff_lines(['boo\n'], ['boo'])
77
self.check_patch(lines)
78
self.assertEquals(lines[5], '\\ No newline at end of file\n')
79
## "expected no-nl, got %r" % lines[5]
81
def check_patch(self, lines):
82
self.assert_(len(lines) > 1)
83
## "Not enough lines for a file header for patch:\n%s" % "".join(lines)
84
self.assert_(lines[0].startswith ('---'))
85
## 'No orig line for patch:\n%s' % "".join(lines)
86
self.assert_(lines[1].startswith ('+++'))
87
## 'No mod line for patch:\n%s' % "".join(lines)
88
self.assert_(len(lines) > 2)
89
## "No hunks for patch:\n%s" % "".join(lines)
90
self.assert_(lines[2].startswith('@@'))
91
## "No hunk header for patch:\n%s" % "".join(lines)
92
self.assert_('@@' in lines[2][2:])
93
## "Unterminated hunk header for patch:\n%s" % "".join(lines)
95
def test_binary_lines(self):
96
self.assertRaises(BinaryFile, udiff_lines, [1023 * 'a' + '\x00'], [])
97
self.assertRaises(BinaryFile, udiff_lines, [], [1023 * 'a' + '\x00'])
98
udiff_lines([1023 * 'a' + '\x00'], [], allow_binary=True)
99
udiff_lines([], [1023 * 'a' + '\x00'], allow_binary=True)
101
def test_external_diff(self):
102
lines = external_udiff_lines(['boo\n'], ['goo\n'])
103
self.check_patch(lines)
104
self.assertEqual('\n', lines[-1])
106
def test_external_diff_no_fileno(self):
107
# Make sure that we can handle not having a fileno, even
108
# if the diff is large
109
lines = external_udiff_lines(['boo\n']*10000,
112
self.check_patch(lines)
114
def test_external_diff_binary_lang_c(self):
116
langs = ('LANG', 'LC_ALL', 'LANGUAGE')
118
_old_env[name] = os.environ.get(name)
120
os.environ['LANG'] = 'C'
121
os.environ['LC_ALL'] = 'C'
122
if sys.platform == 'win32':
123
# only LANGUAGE has effect on win32
124
os.environ['LANGUAGE'] = 'C'
125
lines = external_udiff_lines(['\x00foobar\n'], ['foo\x00bar\n'])
126
# Older versions of diffutils say "Binary files", newer
127
# versions just say "Files".
128
self.assertContainsRe(lines[0],
129
'(Binary f|F)iles old and new differ\n')
130
self.assertEquals(lines[1:], ['\n'])
133
value = _old_env[name]
135
if os.environ.get(name) is not None:
138
os.environ[name] = value
140
def test_no_external_diff(self):
141
"""Check that NoDiff is raised when diff is not available"""
142
# Use os.environ['PATH'] to make sure no 'diff' command is available
143
orig_path = os.environ['PATH']
145
os.environ['PATH'] = ''
146
self.assertRaises(NoDiff, external_diff,
147
'old', ['boo\n'], 'new', ['goo\n'],
148
StringIO(), diff_opts=['-u'])
150
os.environ['PATH'] = orig_path
152
def test_internal_diff_default(self):
153
# Default internal diff encoding is utf8
155
internal_diff(u'old_\xb5', ['old_text\n'],
156
u'new_\xe5', ['new_text\n'], output)
157
lines = output.getvalue().splitlines(True)
158
self.check_patch(lines)
159
self.assertEquals(['--- old_\xc2\xb5\n',
160
'+++ new_\xc3\xa5\n',
168
def test_internal_diff_utf8(self):
170
internal_diff(u'old_\xb5', ['old_text\n'],
171
u'new_\xe5', ['new_text\n'], output,
172
path_encoding='utf8')
173
lines = output.getvalue().splitlines(True)
174
self.check_patch(lines)
175
self.assertEquals(['--- old_\xc2\xb5\n',
176
'+++ new_\xc3\xa5\n',
184
def test_internal_diff_iso_8859_1(self):
186
internal_diff(u'old_\xb5', ['old_text\n'],
187
u'new_\xe5', ['new_text\n'], output,
188
path_encoding='iso-8859-1')
189
lines = output.getvalue().splitlines(True)
190
self.check_patch(lines)
191
self.assertEquals(['--- old_\xb5\n',
200
def test_internal_diff_returns_bytes(self):
202
output = StringIO.StringIO()
203
internal_diff(u'old_\xb5', ['old_text\n'],
204
u'new_\xe5', ['new_text\n'], output)
205
self.failUnless(isinstance(output.getvalue(), str),
206
'internal_diff should return bytestrings')
209
class TestDiffFiles(TestCaseInTempDir):
211
def test_external_diff_binary(self):
212
"""The output when using external diff should use diff's i18n error"""
213
# Make sure external_diff doesn't fail in the current LANG
214
lines = external_udiff_lines(['\x00foobar\n'], ['foo\x00bar\n'])
216
cmd = ['diff', '-u', '--binary', 'old', 'new']
217
open('old', 'wb').write('\x00foobar\n')
218
open('new', 'wb').write('foo\x00bar\n')
219
pipe = subprocess.Popen(cmd, stdout=subprocess.PIPE,
220
stdin=subprocess.PIPE)
221
out, err = pipe.communicate()
222
# Diff returns '2' on Binary files.
223
self.assertEqual(2, pipe.returncode)
224
# We should output whatever diff tells us, plus a trailing newline
225
self.assertEqual(out.splitlines(True) + ['\n'], lines)
228
class TestDiffDates(TestCaseWithTransport):
231
super(TestDiffDates, self).setUp()
232
self.wt = self.make_branch_and_tree('.')
233
self.b = self.wt.branch
234
self.build_tree_contents([
235
('file1', 'file1 contents at rev 1\n'),
236
('file2', 'file2 contents at rev 1\n')
238
self.wt.add(['file1', 'file2'])
240
message='Revision 1',
241
timestamp=1143849600, # 2006-04-01 00:00:00 UTC
244
self.build_tree_contents([('file1', 'file1 contents at rev 2\n')])
246
message='Revision 2',
247
timestamp=1143936000, # 2006-04-02 00:00:00 UTC
250
self.build_tree_contents([('file2', 'file2 contents at rev 3\n')])
252
message='Revision 3',
253
timestamp=1144022400, # 2006-04-03 00:00:00 UTC
256
self.wt.remove(['file2'])
258
message='Revision 4',
259
timestamp=1144108800, # 2006-04-04 00:00:00 UTC
262
self.build_tree_contents([
263
('file1', 'file1 contents in working tree\n')
265
# set the date stamps for files in the working tree to known values
266
os.utime('file1', (1144195200, 1144195200)) # 2006-04-05 00:00:00 UTC
268
def get_diff(self, tree1, tree2, specific_files=None, working_tree=None):
270
if working_tree is not None:
271
extra_trees = (working_tree,)
274
show_diff_trees(tree1, tree2, output, specific_files=specific_files,
275
extra_trees=extra_trees, old_label='old/',
277
return output.getvalue()
279
def test_diff_rev_tree_working_tree(self):
280
output = self.get_diff(self.wt.basis_tree(), self.wt)
281
# note that the date for old/file1 is from rev 2 rather than from
282
# the basis revision (rev 4)
283
self.assertEqualDiff(output, '''\
284
=== modified file 'file1'
285
--- old/file1\t2006-04-02 00:00:00 +0000
286
+++ new/file1\t2006-04-05 00:00:00 +0000
288
-file1 contents at rev 2
289
+file1 contents in working tree
293
def test_diff_rev_tree_rev_tree(self):
294
tree1 = self.b.repository.revision_tree('rev-2')
295
tree2 = self.b.repository.revision_tree('rev-3')
296
output = self.get_diff(tree1, tree2)
297
self.assertEqualDiff(output, '''\
298
=== modified file 'file2'
299
--- old/file2\t2006-04-01 00:00:00 +0000
300
+++ new/file2\t2006-04-03 00:00:00 +0000
302
-file2 contents at rev 1
303
+file2 contents at rev 3
307
def test_diff_add_files(self):
308
tree1 = self.b.repository.revision_tree(None)
309
tree2 = self.b.repository.revision_tree('rev-1')
310
output = self.get_diff(tree1, tree2)
311
# the files have the epoch time stamp for the tree in which
313
self.assertEqualDiff(output, '''\
314
=== added file 'file1'
315
--- old/file1\t1970-01-01 00:00:00 +0000
316
+++ new/file1\t2006-04-01 00:00:00 +0000
318
+file1 contents at rev 1
320
=== added file 'file2'
321
--- old/file2\t1970-01-01 00:00:00 +0000
322
+++ new/file2\t2006-04-01 00:00:00 +0000
324
+file2 contents at rev 1
328
def test_diff_remove_files(self):
329
tree1 = self.b.repository.revision_tree('rev-3')
330
tree2 = self.b.repository.revision_tree('rev-4')
331
output = self.get_diff(tree1, tree2)
332
# the file has the epoch time stamp for the tree in which
334
self.assertEqualDiff(output, '''\
335
=== removed file 'file2'
336
--- old/file2\t2006-04-03 00:00:00 +0000
337
+++ new/file2\t1970-01-01 00:00:00 +0000
339
-file2 contents at rev 3
343
def test_show_diff_specified(self):
344
"""A working tree filename can be used to identify a file"""
345
self.wt.rename_one('file1', 'file1b')
346
old_tree = self.b.repository.revision_tree('rev-1')
347
new_tree = self.b.repository.revision_tree('rev-4')
348
out = self.get_diff(old_tree, new_tree, specific_files=['file1b'],
349
working_tree=self.wt)
350
self.assertContainsRe(out, 'file1\t')
352
def test_recursive_diff(self):
353
"""Children of directories are matched"""
356
self.wt.add(['dir1', 'dir2'])
357
self.wt.rename_one('file1', 'dir1/file1')
358
old_tree = self.b.repository.revision_tree('rev-1')
359
new_tree = self.b.repository.revision_tree('rev-4')
360
out = self.get_diff(old_tree, new_tree, specific_files=['dir1'],
361
working_tree=self.wt)
362
self.assertContainsRe(out, 'file1\t')
363
out = self.get_diff(old_tree, new_tree, specific_files=['dir2'],
364
working_tree=self.wt)
365
self.assertNotContainsRe(out, 'file1\t')
368
class TestPatienceDiffLib(TestCase):
370
def test_unique_lcs(self):
371
unique_lcs = bzrlib.patiencediff.unique_lcs
372
self.assertEquals(unique_lcs('', ''), [])
373
self.assertEquals(unique_lcs('a', 'a'), [(0,0)])
374
self.assertEquals(unique_lcs('a', 'b'), [])
375
self.assertEquals(unique_lcs('ab', 'ab'), [(0,0), (1,1)])
376
self.assertEquals(unique_lcs('abcde', 'cdeab'), [(2,0), (3,1), (4,2)])
377
self.assertEquals(unique_lcs('cdeab', 'abcde'), [(0,2), (1,3), (2,4)])
378
self.assertEquals(unique_lcs('abXde', 'abYde'), [(0,0), (1,1),
380
self.assertEquals(unique_lcs('acbac', 'abc'), [(2,1)])
382
def test_recurse_matches(self):
383
def test_one(a, b, matches):
385
bzrlib.patiencediff.recurse_matches(a, b, 0, 0, len(a), len(b),
387
self.assertEquals(test_matches, matches)
389
test_one(['a', '', 'b', '', 'c'], ['a', 'a', 'b', 'c', 'c'],
390
[(0, 0), (2, 2), (4, 4)])
391
test_one(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'],
392
[(0, 0), (2, 1), (4, 2)])
394
# recurse_matches doesn't match non-unique
395
# lines surrounded by bogus text.
396
# The update has been done in patiencediff.SequenceMatcher instead
398
# This is what it could be
399
#test_one('aBccDe', 'abccde', [(0,0), (2,2), (3,3), (5,5)])
401
# This is what it currently gives:
402
test_one('aBccDe', 'abccde', [(0,0), (5,5)])
404
def test_matching_blocks(self):
405
def chk_blocks(a, b, expected_blocks):
406
# difflib always adds a signature of the total
407
# length, with no matching entries at the end
408
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
409
blocks = s.get_matching_blocks()
410
self.assertEquals((len(a), len(b), 0), blocks[-1])
411
self.assertEquals(expected_blocks, blocks[:-1])
413
# Some basic matching tests
414
chk_blocks('', '', [])
415
chk_blocks([], [], [])
416
chk_blocks('abcd', 'abcd', [(0, 0, 4)])
417
chk_blocks('abcd', 'abce', [(0, 0, 3)])
418
chk_blocks('eabc', 'abce', [(1, 0, 3)])
419
chk_blocks('eabce', 'abce', [(1, 0, 4)])
420
chk_blocks('abcde', 'abXde', [(0, 0, 2), (3, 3, 2)])
421
chk_blocks('abcde', 'abXYZde', [(0, 0, 2), (3, 5, 2)])
422
chk_blocks('abde', 'abXYZde', [(0, 0, 2), (2, 5, 2)])
423
# This may check too much, but it checks to see that
424
# a copied block stays attached to the previous section,
426
# difflib would tend to grab the trailing longest match
427
# which would make the diff not look right
428
chk_blocks('abcdefghijklmnop', 'abcdefxydefghijklmnop',
429
[(0, 0, 6), (6, 11, 10)])
431
# make sure it supports passing in lists
435
'how are you today?\n'],
437
'how are you today?\n'],
438
[(0, 0, 1), (2, 1, 1)])
440
# non unique lines surrounded by non-matching lines
442
chk_blocks('aBccDe', 'abccde', [(0,0,1), (5,5,1)])
444
# But they only need to be locally unique
445
chk_blocks('aBcDec', 'abcdec', [(0,0,1), (2,2,1), (4,4,2)])
447
# non unique blocks won't be matched
448
chk_blocks('aBcdEcdFg', 'abcdecdfg', [(0,0,1), (8,8,1)])
450
# but locally unique ones will
451
chk_blocks('aBcdEeXcdFg', 'abcdecdfg', [(0,0,1), (2,2,2),
452
(5,4,1), (7,5,2), (10,8,1)])
454
chk_blocks('abbabbXd', 'cabbabxd', [(7,7,1)])
455
chk_blocks('abbabbbb', 'cabbabbc', [])
456
chk_blocks('bbbbbbbb', 'cbbbbbbc', [])
458
def test_opcodes(self):
459
def chk_ops(a, b, expected_codes):
460
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
461
self.assertEquals(expected_codes, s.get_opcodes())
465
chk_ops('abcd', 'abcd', [('equal', 0,4, 0,4)])
466
chk_ops('abcd', 'abce', [('equal', 0,3, 0,3),
467
('replace', 3,4, 3,4)
469
chk_ops('eabc', 'abce', [('delete', 0,1, 0,0),
473
chk_ops('eabce', 'abce', [('delete', 0,1, 0,0),
476
chk_ops('abcde', 'abXde', [('equal', 0,2, 0,2),
477
('replace', 2,3, 2,3),
480
chk_ops('abcde', 'abXYZde', [('equal', 0,2, 0,2),
481
('replace', 2,3, 2,5),
484
chk_ops('abde', 'abXYZde', [('equal', 0,2, 0,2),
485
('insert', 2,2, 2,5),
488
chk_ops('abcdefghijklmnop', 'abcdefxydefghijklmnop',
489
[('equal', 0,6, 0,6),
490
('insert', 6,6, 6,11),
491
('equal', 6,16, 11,21)
496
, 'how are you today?\n'],
498
, 'how are you today?\n'],
499
[('equal', 0,1, 0,1),
500
('delete', 1,2, 1,1),
503
chk_ops('aBccDe', 'abccde',
504
[('equal', 0,1, 0,1),
505
('replace', 1,5, 1,5),
508
chk_ops('aBcDec', 'abcdec',
509
[('equal', 0,1, 0,1),
510
('replace', 1,2, 1,2),
512
('replace', 3,4, 3,4),
515
chk_ops('aBcdEcdFg', 'abcdecdfg',
516
[('equal', 0,1, 0,1),
517
('replace', 1,8, 1,8),
520
chk_ops('aBcdEeXcdFg', 'abcdecdfg',
521
[('equal', 0,1, 0,1),
522
('replace', 1,2, 1,2),
524
('delete', 4,5, 4,4),
526
('delete', 6,7, 5,5),
528
('replace', 9,10, 7,8),
529
('equal', 10,11, 8,9)
532
def test_multiple_ranges(self):
533
# There was an earlier bug where we used a bad set of ranges,
534
# this triggers that specific bug, to make sure it doesn't regress
535
def chk_blocks(a, b, expected_blocks):
536
# difflib always adds a signature of the total
537
# length, with no matching entries at the end
538
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
539
blocks = s.get_matching_blocks()
541
self.assertEquals(x, (len(a), len(b), 0))
542
self.assertEquals(expected_blocks, blocks)
544
chk_blocks('abcdefghijklmnop'
545
, 'abcXghiYZQRSTUVWXYZijklmnop'
546
, [(0, 0, 3), (6, 4, 3), (9, 20, 7)])
548
chk_blocks('ABCd efghIjk L'
549
, 'AxyzBCn mo pqrstuvwI1 2 L'
550
, [(0,0,1), (1, 4, 2), (9, 19, 1), (12, 23, 3)])
552
# These are rot13 code snippets.
554
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
556
gnxrf_netf = ['svyr*']
557
gnxrf_bcgvbaf = ['ab-erphefr']
559
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr):
560
sebz omeyvo.nqq vzcbeg fzneg_nqq, nqq_ercbegre_cevag, nqq_ercbegre_ahyy
562
ercbegre = nqq_ercbegre_ahyy
564
ercbegre = nqq_ercbegre_cevag
565
fzneg_nqq(svyr_yvfg, abg ab_erphefr, ercbegre)
568
pynff pzq_zxqve(Pbzznaq):
569
'''.splitlines(True), '''\
570
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
572
--qel-eha jvyy fubj juvpu svyrf jbhyq or nqqrq, ohg abg npghnyyl
575
gnxrf_netf = ['svyr*']
576
gnxrf_bcgvbaf = ['ab-erphefr', 'qel-eha']
578
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr, qel_eha=Snyfr):
583
# Guvf vf cbvagyrff, ohg V'q engure abg envfr na reebe
584
npgvba = omeyvo.nqq.nqq_npgvba_ahyy
586
npgvba = omeyvo.nqq.nqq_npgvba_cevag
588
npgvba = omeyvo.nqq.nqq_npgvba_nqq
590
npgvba = omeyvo.nqq.nqq_npgvba_nqq_naq_cevag
592
omeyvo.nqq.fzneg_nqq(svyr_yvfg, abg ab_erphefr, npgvba)
595
pynff pzq_zxqve(Pbzznaq):
597
, [(0,0,1), (1, 4, 2), (9, 19, 1), (12, 23, 3)])
599
def test_patience_unified_diff(self):
600
txt_a = ['hello there\n',
602
'how are you today?\n']
603
txt_b = ['hello there\n',
604
'how are you today?\n']
605
unified_diff = bzrlib.patiencediff.unified_diff
606
psm = bzrlib.patiencediff.PatienceSequenceMatcher
607
self.assertEquals([ '--- \n',
612
' how are you today?\n'
614
, list(unified_diff(txt_a, txt_b,
615
sequencematcher=psm)))
616
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
617
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
618
# This is the result with LongestCommonSubstring matching
619
self.assertEquals(['--- \n',
621
'@@ -1,6 +1,11 @@\n',
633
, list(unified_diff(txt_a, txt_b)))
634
# And the patience diff
635
self.assertEquals(['--- \n',
637
'@@ -4,6 +4,11 @@\n',
650
, list(unified_diff(txt_a, txt_b,
651
sequencematcher=psm)))
654
class TestPatienceDiffLibFiles(TestCaseInTempDir):
656
def test_patience_unified_diff_files(self):
657
txt_a = ['hello there\n',
659
'how are you today?\n']
660
txt_b = ['hello there\n',
661
'how are you today?\n']
662
open('a1', 'wb').writelines(txt_a)
663
open('b1', 'wb').writelines(txt_b)
665
unified_diff_files = bzrlib.patiencediff.unified_diff_files
666
psm = bzrlib.patiencediff.PatienceSequenceMatcher
667
self.assertEquals(['--- a1 \n',
672
' how are you today?\n',
674
, list(unified_diff_files('a1', 'b1',
675
sequencematcher=psm)))
677
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
678
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
679
open('a2', 'wb').writelines(txt_a)
680
open('b2', 'wb').writelines(txt_b)
682
# This is the result with LongestCommonSubstring matching
683
self.assertEquals(['--- a2 \n',
685
'@@ -1,6 +1,11 @@\n',
697
, list(unified_diff_files('a2', 'b2')))
699
# And the patience diff
700
self.assertEquals(['--- a2 \n',
702
'@@ -4,6 +4,11 @@\n',
715
, list(unified_diff_files('a2', 'b2',
716
sequencematcher=psm)))