2
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
from __future__ import absolute_import
20
from .lazy_import import lazy_import
21
lazy_import(globals(), """
29
__all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_bytes',
33
# This is a version of unified_diff which only adds a factory parameter
34
# so that you can override the default SequenceMatcher
35
# this has been submitted as a patch to python
36
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
37
tofiledate='', n=3, lineterm='\n',
38
sequencematcher=None):
40
Compare two sequences of lines; generate the delta as a unified diff.
42
Unified diffs are a compact way of showing line changes and a few
43
lines of context. The number of context lines is set by 'n' which
46
By default, the diff control lines (those with ---, +++, or @@) are
47
created with a trailing newline. This is helpful so that inputs
48
created from file.readlines() result in diffs that are suitable for
49
file.writelines() since both the inputs and outputs have trailing
52
For inputs that do not have trailing newlines, set the lineterm
53
argument to "" so that the output will be uniformly newline free.
55
The unidiff format normally has a header for filenames and modification
56
times. Any or all of these may be specified using strings for
57
'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification
58
times are normally expressed in the format returned by time.ctime().
62
>>> for line in unified_diff('one two three four'.split(),
63
... 'zero one tree four'.split(), 'Original', 'Current',
64
... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
67
--- Original Sat Jan 26 23:30:50 1991
68
+++ Current Fri Jun 06 10:20:52 2003
77
if sequencematcher is None:
78
sequencematcher = difflib.SequenceMatcher
81
fromfiledate = '\t' + str(fromfiledate)
83
tofiledate = '\t' + str(tofiledate)
86
for group in sequencematcher(None, a, b).get_grouped_opcodes(n):
88
yield '--- %s%s%s' % (fromfile, fromfiledate, lineterm)
89
yield '+++ %s%s%s' % (tofile, tofiledate, lineterm)
91
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
92
yield "@@ -%d,%d +%d,%d @@%s" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1, lineterm)
93
for tag, i1, i2, j1, j2 in group:
98
if tag == 'replace' or tag == 'delete':
101
if tag == 'replace' or tag == 'insert':
102
for line in b[j1:j2]:
106
def unified_diff_bytes(a, b, fromfile=b'', tofile=b'', fromfiledate=b'',
107
tofiledate=b'', n=3, lineterm=b'\n', sequencematcher=None):
109
Compare two sequences of lines; generate the delta as a unified diff.
111
Unified diffs are a compact way of showing line changes and a few
112
lines of context. The number of context lines is set by 'n' which
115
By default, the diff control lines (those with ---, +++, or @@) are
116
created with a trailing newline. This is helpful so that inputs
117
created from file.readlines() result in diffs that are suitable for
118
file.writelines() since both the inputs and outputs have trailing
121
For inputs that do not have trailing newlines, set the lineterm
122
argument to "" so that the output will be uniformly newline free.
124
The unidiff format normally has a header for filenames and modification
125
times. Any or all of these may be specified using strings for
126
'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification
127
times are normally expressed in the format returned by time.ctime().
131
>>> for line in bytes_unified_diff(b'one two three four'.split(),
132
... b'zero one tree four'.split(), b'Original', b'Current',
133
... b'Sat Jan 26 23:30:50 1991', b'Fri Jun 06 10:20:52 2003',
136
--- Original Sat Jan 26 23:30:50 1991
137
+++ Current Fri Jun 06 10:20:52 2003
146
if sequencematcher is None:
147
sequencematcher = difflib.SequenceMatcher
150
fromfiledate = b'\t' + bytes(fromfiledate)
152
tofiledate = b'\t' + bytes(tofiledate)
155
for group in sequencematcher(None, a, b).get_grouped_opcodes(n):
157
yield b'--- %s%s%s' % (fromfile, fromfiledate, lineterm)
158
yield b'+++ %s%s%s' % (tofile, tofiledate, lineterm)
160
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
161
yield b"@@ -%d,%d +%d,%d @@%s" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1, lineterm)
162
for tag, i1, i2, j1, j2 in group:
164
for line in a[i1:i2]:
167
if tag == 'replace' or tag == 'delete':
168
for line in a[i1:i2]:
170
if tag == 'replace' or tag == 'insert':
171
for line in b[j1:j2]:
175
def unified_diff_files(a, b, sequencematcher=None):
176
"""Generate the diff for two files.
178
# Should this actually be an error?
185
file_a = open(a, 'rb')
186
time_a = os.stat(a).st_mtime
192
file_b = open(b, 'rb')
193
time_b = os.stat(b).st_mtime
195
# TODO: Include fromfiledate and tofiledate
196
return unified_diff_bytes(file_a.readlines(), file_b.readlines(),
197
fromfile=a, tofile=b,
198
sequencematcher=sequencematcher)
202
from ._patiencediff_c import (
203
unique_lcs_c as unique_lcs,
204
recurse_matches_c as recurse_matches,
205
PatienceSequenceMatcher_c as PatienceSequenceMatcher
208
from ._patiencediff_py import (
209
unique_lcs_py as unique_lcs,
210
recurse_matches_py as recurse_matches,
211
PatienceSequenceMatcher_py as PatienceSequenceMatcher
217
p = optparse.OptionParser(usage='%prog [options] file_a file_b'
218
'\nFiles can be "-" to read from stdin')
219
p.add_option('--patience', dest='matcher', action='store_const', const='patience',
220
default='patience', help='Use the patience difference algorithm')
221
p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',
222
default='patience', help='Use python\'s difflib algorithm')
224
algorithms = {'patience': PatienceSequenceMatcher,
225
'difflib': difflib.SequenceMatcher}
227
(opts, args) = p.parse_args(args)
228
matcher = algorithms[opts.matcher]
231
print('You must supply 2 filenames to diff')
234
for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):
235
sys.stdout.write(line)
238
if __name__ == '__main__':
239
sys.exit(main(sys.argv[1:]))