1
from cStringIO import StringIO
3
from bzrlib.diff import internal_diff
4
from bzrlib.errors import BinaryFile
5
from bzrlib.patiencediff import (recurse_matches, PatienceSequenceMatcher, unique_lcs,
6
unified_diff, unified_diff_files)
7
from bzrlib.tests import TestCase, TestCaseInTempDir
10
def udiff_lines(old, new, allow_binary=False):
12
internal_diff('old', old, 'new', new, output, allow_binary)
14
return output.readlines()
17
class TestDiff(TestCase):
19
def test_add_nl(self):
20
"""diff generates a valid diff for patches that add a newline"""
21
lines = udiff_lines(['boo'], ['boo\n'])
22
self.check_patch(lines)
23
self.assertEquals(lines[4], '\\ No newline at end of file\n')
24
## "expected no-nl, got %r" % lines[4]
26
def test_add_nl_2(self):
27
"""diff generates a valid diff for patches that change last line and
30
lines = udiff_lines(['boo'], ['goo\n'])
31
self.check_patch(lines)
32
self.assertEquals(lines[4], '\\ No newline at end of file\n')
33
## "expected no-nl, got %r" % lines[4]
35
def test_remove_nl(self):
36
"""diff generates a valid diff for patches that change last line and
39
lines = udiff_lines(['boo\n'], ['boo'])
40
self.check_patch(lines)
41
self.assertEquals(lines[5], '\\ No newline at end of file\n')
42
## "expected no-nl, got %r" % lines[5]
44
def check_patch(self, lines):
45
self.assert_(len(lines) > 1)
46
## "Not enough lines for a file header for patch:\n%s" % "".join(lines)
47
self.assert_(lines[0].startswith ('---'))
48
## 'No orig line for patch:\n%s' % "".join(lines)
49
self.assert_(lines[1].startswith ('+++'))
50
## 'No mod line for patch:\n%s' % "".join(lines)
51
self.assert_(len(lines) > 2)
52
## "No hunks for patch:\n%s" % "".join(lines)
53
self.assert_(lines[2].startswith('@@'))
54
## "No hunk header for patch:\n%s" % "".join(lines)
55
self.assert_('@@' in lines[2][2:])
56
## "Unterminated hunk header for patch:\n%s" % "".join(lines)
58
def test_binary_lines(self):
59
self.assertRaises(BinaryFile, udiff_lines, [1023 * 'a' + '\x00'], [])
60
self.assertRaises(BinaryFile, udiff_lines, [], [1023 * 'a' + '\x00'])
61
udiff_lines([1023 * 'a' + '\x00'], [], allow_binary=True)
62
udiff_lines([], [1023 * 'a' + '\x00'], allow_binary=True)
65
class TestCDVDiffLib(TestCase):
67
def test_unique_lcs(self):
68
self.assertEquals(unique_lcs('', ''), [])
69
self.assertEquals(unique_lcs('a', 'a'), [(0,0)])
70
self.assertEquals(unique_lcs('a', 'b'), [])
71
self.assertEquals(unique_lcs('ab', 'ab'), [(0,0), (1,1)])
72
self.assertEquals(unique_lcs('abcde', 'cdeab'), [(2,0), (3,1), (4,2)])
73
self.assertEquals(unique_lcs('cdeab', 'abcde'), [(0,2), (1,3), (2,4)])
74
self.assertEquals(unique_lcs('abXde', 'abYde'), [(0,0), (1,1),
76
self.assertEquals(unique_lcs('acbac', 'abc'), [(2,1)])
78
def test_recurse_matches(self):
79
def test_one(a, b, matches):
81
recurse_matches(a, b, len(a), len(b), test_matches, 10)
82
self.assertEquals(test_matches, matches)
84
test_one(['a', None, 'b', None, 'c'], ['a', 'a', 'b', 'c', 'c'],
85
[(0, 0), (2, 2), (4, 4)])
86
test_one(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'],
87
[(0, 0), (2, 1), (4, 2)])
89
# recurse_matches doesn't match non-unique
90
# lines surrounded by bogus text.
91
# The update has been done in patiencediff.SequenceMatcher instead
93
# This is what it could be
94
#test_one('aBccDe', 'abccde', [(0,0), (2,2), (3,3), (5,5)])
96
# This is what it currently gives:
97
test_one('aBccDe', 'abccde', [(0,0), (5,5)])
99
def test_matching_blocks(self):
100
def chk_blocks(a, b, expected_blocks):
101
# difflib always adds a signature of the total
102
# length, with no matching entries at the end
103
s = PatienceSequenceMatcher(None, a, b)
104
blocks = s.get_matching_blocks()
105
self.assertEquals((len(a), len(b), 0), blocks[-1])
106
self.assertEquals(expected_blocks, blocks[:-1])
108
# Some basic matching tests
109
chk_blocks('', '', [])
110
chk_blocks([], [], [])
111
chk_blocks('abcd', 'abcd', [(0, 0, 4)])
112
chk_blocks('abcd', 'abce', [(0, 0, 3)])
113
chk_blocks('eabc', 'abce', [(1, 0, 3)])
114
chk_blocks('eabce', 'abce', [(1, 0, 4)])
115
chk_blocks('abcde', 'abXde', [(0, 0, 2), (3, 3, 2)])
116
chk_blocks('abcde', 'abXYZde', [(0, 0, 2), (3, 5, 2)])
117
chk_blocks('abde', 'abXYZde', [(0, 0, 2), (2, 5, 2)])
118
# This may check too much, but it checks to see that
119
# a copied block stays attached to the previous section,
121
# difflib would tend to grab the trailing longest match
122
# which would make the diff not look right
123
chk_blocks('abcdefghijklmnop', 'abcdefxydefghijklmnop',
124
[(0, 0, 6), (6, 11, 10)])
126
# make sure it supports passing in lists
130
'how are you today?\n'],
132
'how are you today?\n'],
133
[(0, 0, 1), (2, 1, 1)])
135
chk_blocks('aBccDe', 'abccde', [(0,0,1), (2,2,2), (5,5,1)])
137
chk_blocks('aBcdEcdFg', 'abcdecdfg', [(0,0,1), (2,2,2),
140
chk_blocks('abbabbXd', 'cabbabxd', [(0,1,5), (7,7,1)])
141
chk_blocks('abbabbbb', 'cabbabbc', [(0,1,6)])
142
chk_blocks('bbbbbbbb', 'cbbbbbbc', [(0,1,6)])
144
def test_opcodes(self):
145
def chk_ops(a, b, expected_codes):
146
s = PatienceSequenceMatcher(None, a, b)
147
self.assertEquals(expected_codes, s.get_opcodes())
151
chk_ops('abcd', 'abcd', [('equal', 0,4, 0,4)])
152
chk_ops('abcd', 'abce', [('equal', 0,3, 0,3),
153
('replace', 3,4, 3,4)
155
chk_ops('eabc', 'abce', [('delete', 0,1, 0,0),
159
chk_ops('eabce', 'abce', [('delete', 0,1, 0,0),
162
chk_ops('abcde', 'abXde', [('equal', 0,2, 0,2),
163
('replace', 2,3, 2,3),
166
chk_ops('abcde', 'abXYZde', [('equal', 0,2, 0,2),
167
('replace', 2,3, 2,5),
170
chk_ops('abde', 'abXYZde', [('equal', 0,2, 0,2),
171
('insert', 2,2, 2,5),
174
chk_ops('abcdefghijklmnop', 'abcdefxydefghijklmnop',
175
[('equal', 0,6, 0,6),
176
('insert', 6,6, 6,11),
177
('equal', 6,16, 11,21)
182
, 'how are you today?\n'],
184
, 'how are you today?\n'],
185
[('equal', 0,1, 0,1),
186
('delete', 1,2, 1,1),
189
chk_ops('aBccDe', 'abccde',
190
[('equal', 0,1, 0,1),
191
('replace', 1,2, 1,2),
193
('replace', 4,5, 4,5),
196
chk_ops('aBcdEcdFg', 'abcdecdfg',
197
[('equal', 0,1, 0,1),
198
('replace', 1,2, 1,2),
200
('replace', 4,5, 4,5),
202
('replace', 7,8, 7,8),
206
def test_multiple_ranges(self):
207
# There was an earlier bug where we used a bad set of ranges,
208
# this triggers that specific bug, to make sure it doesn't regress
209
def chk_blocks(a, b, expected_blocks):
210
# difflib always adds a signature of the total
211
# length, with no matching entries at the end
212
s = PatienceSequenceMatcher(None, a, b)
213
blocks = s.get_matching_blocks()
215
self.assertEquals(x, (len(a), len(b), 0))
216
self.assertEquals(expected_blocks, blocks)
218
chk_blocks('abcdefghijklmnop'
219
, 'abcXghiYZQRSTUVWXYZijklmnop'
220
, [(0, 0, 3), (6, 4, 3), (9, 20, 7)])
222
chk_blocks('ABCd efghIjk L'
223
, 'AxyzBCn mo pqrstuvwI1 2 L'
224
, [(0,0,1), (1, 4, 2), (4, 7, 1), (9, 19, 1), (12, 23, 3)])
226
# These are rot13 code snippets.
228
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
230
gnxrf_netf = ['svyr*']
231
gnxrf_bcgvbaf = ['ab-erphefr']
233
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr):
234
sebz omeyvo.nqq vzcbeg fzneg_nqq, nqq_ercbegre_cevag, nqq_ercbegre_ahyy
236
ercbegre = nqq_ercbegre_ahyy
238
ercbegre = nqq_ercbegre_cevag
239
fzneg_nqq(svyr_yvfg, abg ab_erphefr, ercbegre)
242
pynff pzq_zxqve(Pbzznaq):
243
'''.splitlines(True), '''\
244
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
246
--qel-eha jvyy fubj juvpu svyrf jbhyq or nqqrq, ohg abg npghnyyl
249
gnxrf_netf = ['svyr*']
250
gnxrf_bcgvbaf = ['ab-erphefr', 'qel-eha']
252
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr, qel_eha=Snyfr):
257
# Guvf vf cbvagyrff, ohg V'q engure abg envfr na reebe
258
npgvba = omeyvo.nqq.nqq_npgvba_ahyy
260
npgvba = omeyvo.nqq.nqq_npgvba_cevag
262
npgvba = omeyvo.nqq.nqq_npgvba_nqq
264
npgvba = omeyvo.nqq.nqq_npgvba_nqq_naq_cevag
266
omeyvo.nqq.fzneg_nqq(svyr_yvfg, abg ab_erphefr, npgvba)
269
pynff pzq_zxqve(Pbzznaq):
271
, [(0,0,1), (1, 4, 2), (9, 19, 1), (12, 23, 3)])
273
def test_patience_unified_diff(self):
274
txt_a = ['hello there\n',
276
'how are you today?\n']
277
txt_b = ['hello there\n',
278
'how are you today?\n']
279
self.assertEquals([ '--- \n',
284
' how are you today?\n'
286
, list(unified_diff(txt_a, txt_b
287
, sequencematcher=PatienceSequenceMatcher)))
288
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
289
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
290
# This is the result with LongestCommonSubstring matching
291
self.assertEquals(['--- \n',
293
'@@ -1,6 +1,11 @@\n',
305
, list(unified_diff(txt_a, txt_b)))
306
# And the patience diff
307
self.assertEquals(['--- \n',
309
'@@ -4,6 +4,11 @@\n',
322
, list(unified_diff(txt_a, txt_b,
323
sequencematcher=PatienceSequenceMatcher)))
326
class TestCDVDiffLibFiles(TestCaseInTempDir):
328
def test_patience_unified_diff_files(self):
329
txt_a = ['hello there\n',
331
'how are you today?\n']
332
txt_b = ['hello there\n',
333
'how are you today?\n']
334
open('a1', 'wb').writelines(txt_a)
335
open('b1', 'wb').writelines(txt_b)
337
self.assertEquals(['--- a1 \n',
342
' how are you today?\n',
344
, list(unified_diff_files('a1', 'b1',
345
sequencematcher=PatienceSequenceMatcher)))
347
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
348
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
349
open('a2', 'wb').writelines(txt_a)
350
open('b2', 'wb').writelines(txt_b)
352
# This is the result with LongestCommonSubstring matching
353
self.assertEquals(['--- a2 \n',
355
'@@ -1,6 +1,11 @@\n',
367
, list(unified_diff_files('a2', 'b2')))
369
# And the patience diff
370
self.assertEquals(['--- a2 \n',
372
'@@ -4,6 +4,11 @@\n',
385
, list(unified_diff_files('a2', 'b2',
386
sequencematcher=PatienceSequenceMatcher)))