bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
2781.1.1
by Martin Pool
 merge cpatiencediff from Lukas  | 
1  | 
#!/usr/bin/env python
 | 
2  | 
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
 | 
|
3  | 
#
 | 
|
4  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
5  | 
# it under the terms of the GNU General Public License as published by
 | 
|
6  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
7  | 
# (at your option) any later version.
 | 
|
8  | 
#
 | 
|
9  | 
# This program is distributed in the hope that it will be useful,
 | 
|
10  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
11  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
12  | 
# GNU General Public License for more details.
 | 
|
13  | 
#
 | 
|
14  | 
# You should have received a copy of the GNU General Public License
 | 
|
15  | 
# along with this program; if not, write to the Free Software
 | 
|
16  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
17  | 
||
18  | 
||
19  | 
from bzrlib.lazy_import import lazy_import  | 
|
20  | 
lazy_import(globals(), """  | 
|
21  | 
import os
 | 
|
22  | 
import sys
 | 
|
23  | 
import time
 | 
|
24  | 
import difflib
 | 
|
25  | 
""")  | 
|
26  | 
||
27  | 
||
28  | 
__all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_files']  | 
|
29  | 
||
30  | 
||
31  | 
# This is a version of unified_diff which only adds a factory parameter
 | 
|
32  | 
# so that you can override the default SequenceMatcher
 | 
|
33  | 
# this has been submitted as a patch to python
 | 
|
34  | 
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',  | 
|
35  | 
tofiledate='', n=3, lineterm='\n',  | 
|
36  | 
sequencematcher=None):  | 
|
37  | 
r"""  | 
|
38  | 
    Compare two sequences of lines; generate the delta as a unified diff.
 | 
|
39  | 
||
40  | 
    Unified diffs are a compact way of showing line changes and a few
 | 
|
41  | 
    lines of context.  The number of context lines is set by 'n' which
 | 
|
42  | 
    defaults to three.
 | 
|
43  | 
||
44  | 
    By default, the diff control lines (those with ---, +++, or @@) are
 | 
|
45  | 
    created with a trailing newline.  This is helpful so that inputs
 | 
|
46  | 
    created from file.readlines() result in diffs that are suitable for
 | 
|
47  | 
    file.writelines() since both the inputs and outputs have trailing
 | 
|
48  | 
    newlines.
 | 
|
49  | 
||
50  | 
    For inputs that do not have trailing newlines, set the lineterm
 | 
|
51  | 
    argument to "" so that the output will be uniformly newline free.
 | 
|
52  | 
||
53  | 
    The unidiff format normally has a header for filenames and modification
 | 
|
54  | 
    times.  Any or all of these may be specified using strings for
 | 
|
55  | 
    'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification
 | 
|
56  | 
    times are normally expressed in the format returned by time.ctime().
 | 
|
57  | 
||
58  | 
    Example:
 | 
|
59  | 
||
60  | 
    >>> for line in unified_diff('one two three four'.split(),
 | 
|
61  | 
    ...             'zero one tree four'.split(), 'Original', 'Current',
 | 
|
62  | 
    ...             'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
 | 
|
63  | 
    ...             lineterm=''):
 | 
|
64  | 
    ...     print line
 | 
|
65  | 
    --- Original Sat Jan 26 23:30:50 1991
 | 
|
66  | 
    +++ Current Fri Jun 06 10:20:52 2003
 | 
|
67  | 
    @@ -1,4 +1,4 @@
 | 
|
68  | 
    +zero
 | 
|
69  | 
     one
 | 
|
70  | 
    -two
 | 
|
71  | 
    -three
 | 
|
72  | 
    +tree
 | 
|
73  | 
     four
 | 
|
74  | 
    """
 | 
|
75  | 
if sequencematcher is None:  | 
|
76  | 
import difflib  | 
|
77  | 
sequencematcher = difflib.SequenceMatcher  | 
|
78  | 
||
79  | 
started = False  | 
|
80  | 
for group in sequencematcher(None,a,b).get_grouped_opcodes(n):  | 
|
81  | 
if not started:  | 
|
82  | 
yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)  | 
|
83  | 
yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)  | 
|
84  | 
started = True  | 
|
85  | 
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]  | 
|
86  | 
yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)  | 
|
87  | 
for tag, i1, i2, j1, j2 in group:  | 
|
88  | 
if tag == 'equal':  | 
|
89  | 
for line in a[i1:i2]:  | 
|
90  | 
yield ' ' + line  | 
|
91  | 
                continue
 | 
|
92  | 
if tag == 'replace' or tag == 'delete':  | 
|
93  | 
for line in a[i1:i2]:  | 
|
94  | 
yield '-' + line  | 
|
95  | 
if tag == 'replace' or tag == 'insert':  | 
|
96  | 
for line in b[j1:j2]:  | 
|
97  | 
yield '+' + line  | 
|
98  | 
||
99  | 
||
100  | 
def unified_diff_files(a, b, sequencematcher=None):  | 
|
101  | 
"""Generate the diff for two files.  | 
|
102  | 
    """
 | 
|
103  | 
    # Should this actually be an error?
 | 
|
104  | 
if a == b:  | 
|
105  | 
return []  | 
|
106  | 
if a == '-':  | 
|
107  | 
file_a = sys.stdin  | 
|
108  | 
time_a = time.time()  | 
|
109  | 
else:  | 
|
110  | 
file_a = open(a, 'rb')  | 
|
111  | 
time_a = os.stat(a).st_mtime  | 
|
112  | 
||
113  | 
if b == '-':  | 
|
114  | 
file_b = sys.stdin  | 
|
115  | 
time_b = time.time()  | 
|
116  | 
else:  | 
|
117  | 
file_b = open(b, 'rb')  | 
|
118  | 
time_b = os.stat(b).st_mtime  | 
|
119  | 
||
120  | 
    # TODO: Include fromfiledate and tofiledate
 | 
|
121  | 
return unified_diff(file_a.readlines(), file_b.readlines(),  | 
|
122  | 
fromfile=a, tofile=b,  | 
|
123  | 
sequencematcher=sequencematcher)  | 
|
124  | 
||
125  | 
||
126  | 
try:  | 
|
127  | 
from bzrlib._patiencediff_c import (  | 
|
128  | 
unique_lcs_c as unique_lcs,  | 
|
129  | 
recurse_matches_c as recurse_matches,  | 
|
130  | 
PatienceSequenceMatcher_c as PatienceSequenceMatcher  | 
|
131  | 
        )
 | 
|
132  | 
except ImportError:  | 
|
133  | 
from bzrlib._patiencediff_py import (  | 
|
134  | 
unique_lcs_py as unique_lcs,  | 
|
135  | 
recurse_matches_py as recurse_matches,  | 
|
136  | 
PatienceSequenceMatcher_py as PatienceSequenceMatcher  | 
|
137  | 
        )
 | 
|
138  | 
||
139  | 
||
140  | 
def main(args):  | 
|
141  | 
import optparse  | 
|
142  | 
p = optparse.OptionParser(usage='%prog [options] file_a file_b'  | 
|
143  | 
'\nFiles can be "-" to read from stdin')  | 
|
144  | 
p.add_option('--patience', dest='matcher', action='store_const', const='patience',  | 
|
145  | 
default='patience', help='Use the patience difference algorithm')  | 
|
146  | 
p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',  | 
|
147  | 
default='patience', help='Use python\'s difflib algorithm')  | 
|
148  | 
||
149  | 
algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}  | 
|
150  | 
||
151  | 
(opts, args) = p.parse_args(args)  | 
|
152  | 
matcher = algorithms[opts.matcher]  | 
|
153  | 
||
154  | 
if len(args) != 2:  | 
|
155  | 
print 'You must supply 2 filenames to diff'  | 
|
156  | 
return -1  | 
|
157  | 
||
158  | 
for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):  | 
|
159  | 
sys.stdout.write(line)  | 
|
160  | 
||
161  | 
||
162  | 
if __name__ == '__main__':  | 
|
163  | 
sys.exit(main(sys.argv[1:]))  |