bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
1185.81.14
by John Arbash Meinel
 Added a main function for running cdvdifflib manually, included tests for unified_diff interfaces  | 
1  | 
#!/usr/bin/env python
 | 
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
2  | 
# Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd
 | 
3  | 
#
 | 
|
| 
1185.81.1
by John Arbash Meinel
 Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.  | 
4  | 
# This program is free software; you can redistribute it and/or modify
 | 
5  | 
# it under the terms of the GNU General Public License as published by
 | 
|
6  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
7  | 
# (at your option) any later version.
 | 
|
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
8  | 
#
 | 
| 
1185.81.1
by John Arbash Meinel
 Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.  | 
9  | 
# This program is distributed in the hope that it will be useful,
 | 
10  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
11  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
12  | 
# GNU General Public License for more details.
 | 
|
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
13  | 
#
 | 
| 
1185.81.1
by John Arbash Meinel
 Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.  | 
14  | 
# You should have received a copy of the GNU General Public License
 | 
15  | 
# along with this program; if not, write to the Free Software
 | 
|
16  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
17  | 
||
18  | 
||
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
19  | 
from bisect import bisect  | 
| 
1185.81.1
by John Arbash Meinel
 Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.  | 
20  | 
import difflib  | 
| 
1185.81.14
by John Arbash Meinel
 Added a main function for running cdvdifflib manually, included tests for unified_diff interfaces  | 
21  | 
import os  | 
22  | 
import sys  | 
|
| 
1185.81.29
by Aaron Bentley
 Fix style issues and duplicated tests  | 
23  | 
import time  | 
24  | 
||
| 
1711.2.12
by John Arbash Meinel
 Make a mention when the maximum recursion length is reached.  | 
25  | 
from bzrlib.trace import mutter  | 
26  | 
||
| 
1185.81.1
by John Arbash Meinel
 Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.  | 
27  | 
|
| 
1711.2.11
by John Arbash Meinel
 Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher  | 
28  | 
__all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_files']  | 
| 
1185.81.9
by John Arbash Meinel
 Added (failing) tests for cdv.recurse_matches with common sections,  | 
29  | 
|
| 
1185.81.29
by Aaron Bentley
 Fix style issues and duplicated tests  | 
30  | 
|
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
31  | 
def unique_lcs(a, b):  | 
32  | 
"""Find the longest common subset for unique lines.  | 
|
33  | 
||
34  | 
    :param a: An indexable object (such as string or list of strings)
 | 
|
35  | 
    :param b: Another indexable object (such as string or list of strings)
 | 
|
36  | 
    :return: A list of tuples, one for each line which is matched.
 | 
|
37  | 
            [(line_in_a, line_in_b), ...]
 | 
|
38  | 
||
39  | 
    This only matches lines which are unique on both sides.
 | 
|
40  | 
    This helps prevent common lines from over influencing match
 | 
|
41  | 
    results.
 | 
|
42  | 
    The longest common subset uses the Patience Sorting algorithm:
 | 
|
43  | 
    http://en.wikipedia.org/wiki/Patience_sorting
 | 
|
44  | 
    """
 | 
|
45  | 
    # set index[line in a] = position of line in a unless
 | 
|
| 
2100.2.1
by wang
 Replace python's difflib by patiencediff because the worst case  | 
46  | 
    # a is a duplicate, in which case it's set to None
 | 
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
47  | 
index = {}  | 
48  | 
for i in xrange(len(a)):  | 
|
49  | 
line = a[i]  | 
|
50  | 
if line in index:  | 
|
51  | 
index[line] = None  | 
|
52  | 
else:  | 
|
53  | 
index[line]= i  | 
|
54  | 
    # make btoa[i] = position of line i in a, unless
 | 
|
55  | 
    # that line doesn't occur exactly once in both, 
 | 
|
56  | 
    # in which case it's set to None
 | 
|
57  | 
btoa = [None] * len(b)  | 
|
58  | 
index2 = {}  | 
|
59  | 
for pos, line in enumerate(b):  | 
|
60  | 
next = index.get(line)  | 
|
61  | 
if next is not None:  | 
|
62  | 
if line in index2:  | 
|
63  | 
                # unset the previous mapping, which we now know to
 | 
|
64  | 
                # be invalid because the line isn't unique
 | 
|
65  | 
btoa[index2[line]] = None  | 
|
66  | 
del index[line]  | 
|
67  | 
else:  | 
|
68  | 
index2[line] = pos  | 
|
69  | 
btoa[pos] = next  | 
|
70  | 
    # this is the Patience sorting algorithm
 | 
|
71  | 
    # see http://en.wikipedia.org/wiki/Patience_sorting
 | 
|
72  | 
backpointers = [None] * len(b)  | 
|
73  | 
stacks = []  | 
|
74  | 
lasts = []  | 
|
75  | 
k = 0  | 
|
76  | 
for bpos, apos in enumerate(btoa):  | 
|
77  | 
if apos is None:  | 
|
78  | 
            continue
 | 
|
79  | 
        # as an optimization, check if the next line comes at the end,
 | 
|
80  | 
        # because it usually does
 | 
|
81  | 
if stacks and stacks[-1] < apos:  | 
|
82  | 
k = len(stacks)  | 
|
83  | 
        # as an optimization, check if the next line comes right after
 | 
|
84  | 
        # the previous line, because usually it does
 | 
|
| 
1185.81.29
by Aaron Bentley
 Fix style issues and duplicated tests  | 
85  | 
elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or  | 
86  | 
stacks[k+1] > apos):  | 
|
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
87  | 
k += 1  | 
88  | 
else:  | 
|
89  | 
k = bisect(stacks, apos)  | 
|
90  | 
if k > 0:  | 
|
91  | 
backpointers[bpos] = lasts[k-1]  | 
|
92  | 
if k < len(stacks):  | 
|
93  | 
stacks[k] = apos  | 
|
94  | 
lasts[k] = bpos  | 
|
95  | 
else:  | 
|
96  | 
stacks.append(apos)  | 
|
97  | 
lasts.append(bpos)  | 
|
98  | 
if len(lasts) == 0:  | 
|
99  | 
return []  | 
|
100  | 
result = []  | 
|
101  | 
k = lasts[-1]  | 
|
102  | 
while k is not None:  | 
|
103  | 
result.append((btoa[k], k))  | 
|
104  | 
k = backpointers[k]  | 
|
105  | 
result.reverse()  | 
|
106  | 
return result  | 
|
107  | 
||
108  | 
||
| 
1711.2.22
by John Arbash Meinel
 Passing the alo parameter to recurse_matches shaves of 5% of the diff time.  | 
109  | 
def recurse_matches(a, b, alo, blo, ahi, bhi, answer, maxrecursion):  | 
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
110  | 
"""Find all of the matching text in the lines of a and b.  | 
111  | 
||
112  | 
    :param a: A sequence
 | 
|
113  | 
    :param b: Another sequence
 | 
|
| 
1711.2.22
by John Arbash Meinel
 Passing the alo parameter to recurse_matches shaves of 5% of the diff time.  | 
114  | 
    :param alo: The start location of a to check, typically 0
 | 
115  | 
    :param ahi: The start location of b to check, typically 0
 | 
|
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
116  | 
    :param ahi: The maximum length of a to check, typically len(a)
 | 
117  | 
    :param bhi: The maximum length of b to check, typically len(b)
 | 
|
118  | 
    :param answer: The return array. Will be filled with tuples
 | 
|
| 
1711.2.17
by John Arbash Meinel
 Small cleanups to patience_diff code.  | 
119  | 
                   indicating [(line_in_a, line_in_b)]
 | 
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
120  | 
    :param maxrecursion: The maximum depth to recurse.
 | 
121  | 
                         Must be a positive integer.
 | 
|
122  | 
    :return: None, the return value is in the parameter answer, which
 | 
|
123  | 
             should be a list
 | 
|
124  | 
||
125  | 
    """
 | 
|
126  | 
if maxrecursion < 0:  | 
|
| 
1711.2.12
by John Arbash Meinel
 Make a mention when the maximum recursion length is reached.  | 
127  | 
mutter('max recursion depth reached')  | 
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
128  | 
        # this will never happen normally, this check is to prevent DOS attacks
 | 
129  | 
        return
 | 
|
130  | 
oldlength = len(answer)  | 
|
131  | 
if alo == ahi or blo == bhi:  | 
|
132  | 
        return
 | 
|
| 
1711.2.22
by John Arbash Meinel
 Passing the alo parameter to recurse_matches shaves of 5% of the diff time.  | 
133  | 
last_a_pos = alo-1  | 
134  | 
last_b_pos = blo-1  | 
|
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
135  | 
for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):  | 
136  | 
        # recurse between lines which are unique in each file and match
 | 
|
137  | 
apos += alo  | 
|
138  | 
bpos += blo  | 
|
| 
1711.2.18
by John Arbash Meinel
 Optimize common case where unique_lcs returns a set of lines all in a row  | 
139  | 
        # Most of the time, you will have a sequence of similar entries
 | 
140  | 
if last_a_pos+1 != apos or last_b_pos+1 != bpos:  | 
|
| 
1711.2.22
by John Arbash Meinel
 Passing the alo parameter to recurse_matches shaves of 5% of the diff time.  | 
141  | 
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,  | 
142  | 
apos, bpos, answer, maxrecursion - 1)  | 
|
| 
1711.2.18
by John Arbash Meinel
 Optimize common case where unique_lcs returns a set of lines all in a row  | 
143  | 
last_a_pos = apos  | 
144  | 
last_b_pos = bpos  | 
|
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
145  | 
answer.append((apos, bpos))  | 
146  | 
if len(answer) > oldlength:  | 
|
147  | 
        # find matches between the last match and the end
 | 
|
| 
1711.2.22
by John Arbash Meinel
 Passing the alo parameter to recurse_matches shaves of 5% of the diff time.  | 
148  | 
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,  | 
149  | 
ahi, bhi, answer, maxrecursion - 1)  | 
|
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
150  | 
elif a[alo] == b[blo]:  | 
151  | 
        # find matching lines at the very beginning
 | 
|
152  | 
while alo < ahi and blo < bhi and a[alo] == b[blo]:  | 
|
153  | 
answer.append((alo, blo))  | 
|
154  | 
alo += 1  | 
|
155  | 
blo += 1  | 
|
| 
1711.2.22
by John Arbash Meinel
 Passing the alo parameter to recurse_matches shaves of 5% of the diff time.  | 
156  | 
recurse_matches(a, b, alo, blo,  | 
157  | 
ahi, bhi, answer, maxrecursion - 1)  | 
|
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
158  | 
elif a[ahi - 1] == b[bhi - 1]:  | 
159  | 
        # find matching lines at the very end
 | 
|
160  | 
nahi = ahi - 1  | 
|
161  | 
nbhi = bhi - 1  | 
|
162  | 
while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:  | 
|
163  | 
nahi -= 1  | 
|
164  | 
nbhi -= 1  | 
|
| 
1711.2.22
by John Arbash Meinel
 Passing the alo parameter to recurse_matches shaves of 5% of the diff time.  | 
165  | 
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,  | 
166  | 
nahi, nbhi, answer, maxrecursion - 1)  | 
|
| 
1185.81.24
by Aaron Bentley
 Reoganize patience-related code  | 
167  | 
for i in xrange(ahi - nahi):  | 
168  | 
answer.append((nahi + i, nbhi + i))  | 
|
169  | 
||
170  | 
||
| 
1711.2.21
by John Arbash Meinel
 Cleanup patiencediff, remove the use of difflib.SequenceMatcher.  | 
171  | 
def _collapse_sequences(matches):  | 
172  | 
"""Find sequences of lines.  | 
|
173  | 
||
174  | 
    Given a sequence of [(line_in_a, line_in_b),]
 | 
|
175  | 
    find regions where they both increment at the same time
 | 
|
176  | 
    """
 | 
|
177  | 
answer = []  | 
|
178  | 
start_a = start_b = None  | 
|
179  | 
length = 0  | 
|
180  | 
for i_a, i_b in matches:  | 
|
181  | 
if (start_a is not None  | 
|
182  | 
and (i_a == start_a + length)  | 
|
183  | 
and (i_b == start_b + length)):  | 
|
184  | 
length += 1  | 
|
185  | 
else:  | 
|
186  | 
if start_a is not None:  | 
|
187  | 
answer.append((start_a, start_b, length))  | 
|
188  | 
start_a = i_a  | 
|
189  | 
start_b = i_b  | 
|
190  | 
length = 1  | 
|
191  | 
||
192  | 
if length != 0:  | 
|
193  | 
answer.append((start_a, start_b, length))  | 
|
194  | 
||
195  | 
return answer  | 
|
196  | 
||
197  | 
||
198  | 
def _check_consistency(answer):  | 
|
199  | 
    # For consistency sake, make sure all matches are only increasing
 | 
|
200  | 
next_a = -1  | 
|
201  | 
next_b = -1  | 
|
202  | 
for a,b,match_len in answer:  | 
|
203  | 
assert a >= next_a, 'Non increasing matches for a'  | 
|
204  | 
assert b >= next_b, 'Not increasing matches for b'  | 
|
205  | 
next_a = a + match_len  | 
|
206  | 
next_b = b + match_len  | 
|
207  | 
||
208  | 
||
| 
1711.2.11
by John Arbash Meinel
 Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher  | 
209  | 
class PatienceSequenceMatcher(difflib.SequenceMatcher):  | 
| 
1185.81.5
by John Arbash Meinel
 Fix up SequenceMatcher, add comments to nofrillsprecisemerge  | 
210  | 
"""Compare a pair of sequences using longest common subset."""  | 
| 
1185.81.1
by John Arbash Meinel
 Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.  | 
211  | 
|
| 
1711.2.21
by John Arbash Meinel
 Cleanup patiencediff, remove the use of difflib.SequenceMatcher.  | 
212  | 
_do_check_consistency = True  | 
213  | 
||
| 
1185.81.5
by John Arbash Meinel
 Fix up SequenceMatcher, add comments to nofrillsprecisemerge  | 
214  | 
def __init__(self, isjunk=None, a='', b=''):  | 
215  | 
if isjunk is not None:  | 
|
216  | 
raise NotImplementedError('Currently we do not support'  | 
|
217  | 
' isjunk for sequence matching')  | 
|
218  | 
difflib.SequenceMatcher.__init__(self, isjunk, a, b)  | 
|
| 
1185.81.1
by John Arbash Meinel
 Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.  | 
219  | 
|
| 
1711.2.7
by John Arbash Meinel
 Override get_matching_blocks  | 
220  | 
def get_matching_blocks(self):  | 
221  | 
"""Return list of triples describing matching subsequences.  | 
|
222  | 
||
223  | 
        Each triple is of the form (i, j, n), and means that
 | 
|
224  | 
        a[i:i+n] == b[j:j+n].  The triples are monotonically increasing in
 | 
|
225  | 
        i and in j.
 | 
|
226  | 
||
227  | 
        The last triple is a dummy, (len(a), len(b), 0), and is the only
 | 
|
228  | 
        triple with n==0.
 | 
|
229  | 
||
| 
1711.2.11
by John Arbash Meinel
 Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher  | 
230  | 
        >>> s = PatienceSequenceMatcher(None, "abxcd", "abcd")
 | 
| 
1711.2.7
by John Arbash Meinel
 Override get_matching_blocks  | 
231  | 
        >>> s.get_matching_blocks()
 | 
232  | 
        [(0, 0, 2), (3, 2, 2), (5, 4, 0)]
 | 
|
233  | 
        """
 | 
|
234  | 
        # jam 20060525 This is the python 2.4.1 difflib get_matching_blocks 
 | 
|
235  | 
        # implementation which uses __helper. 2.4.3 got rid of helper for
 | 
|
236  | 
        # doing it inline with a queue.
 | 
|
237  | 
        # We should consider doing the same for recurse_matches
 | 
|
238  | 
||
239  | 
if self.matching_blocks is not None:  | 
|
240  | 
return self.matching_blocks  | 
|
241  | 
||
| 
1185.81.1
by John Arbash Meinel
 Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.  | 
242  | 
matches = []  | 
| 
1711.2.22
by John Arbash Meinel
 Passing the alo parameter to recurse_matches shaves of 5% of the diff time.  | 
243  | 
recurse_matches(self.a, self.b, 0, 0,  | 
244  | 
len(self.a), len(self.b), matches, 10)  | 
|
| 
1185.81.1
by John Arbash Meinel
 Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.  | 
245  | 
        # Matches now has individual line pairs of
 | 
246  | 
        # line A matches line B, at the given offsets
 | 
|
| 
1711.2.21
by John Arbash Meinel
 Cleanup patiencediff, remove the use of difflib.SequenceMatcher.  | 
247  | 
self.matching_blocks = _collapse_sequences(matches)  | 
248  | 
self.matching_blocks.append( (len(self.a), len(self.b), 0) )  | 
|
249  | 
if PatienceSequenceMatcher._do_check_consistency:  | 
|
250  | 
if __debug__:  | 
|
251  | 
_check_consistency(self.matching_blocks)  | 
|
252  | 
||
253  | 
return self.matching_blocks  | 
|
| 
1185.81.16
by John Arbash Meinel
 Added tests, and an assert check to make sure ranges are always increasing.  | 
254  | 
|
| 
1185.81.29
by Aaron Bentley
 Fix style issues and duplicated tests  | 
255  | 
|
| 
1185.81.8
by John Arbash Meinel
 Updating unified_diff to take a factory, using the new diff algorithm in the code.  | 
256  | 
# This is a version of unified_diff which only adds a factory parameter
 | 
257  | 
# so that you can override the default SequenceMatcher
 | 
|
258  | 
# this has been submitted as a patch to python
 | 
|
259  | 
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',  | 
|
260  | 
tofiledate='', n=3, lineterm='\n',  | 
|
261  | 
sequencematcher=None):  | 
|
262  | 
r"""  | 
|
263  | 
    Compare two sequences of lines; generate the delta as a unified diff.
 | 
|
264  | 
||
265  | 
    Unified diffs are a compact way of showing line changes and a few
 | 
|
266  | 
    lines of context.  The number of context lines is set by 'n' which
 | 
|
267  | 
    defaults to three.
 | 
|
268  | 
||
269  | 
    By default, the diff control lines (those with ---, +++, or @@) are
 | 
|
270  | 
    created with a trailing newline.  This is helpful so that inputs
 | 
|
271  | 
    created from file.readlines() result in diffs that are suitable for
 | 
|
272  | 
    file.writelines() since both the inputs and outputs have trailing
 | 
|
273  | 
    newlines.
 | 
|
274  | 
||
275  | 
    For inputs that do not have trailing newlines, set the lineterm
 | 
|
276  | 
    argument to "" so that the output will be uniformly newline free.
 | 
|
277  | 
||
278  | 
    The unidiff format normally has a header for filenames and modification
 | 
|
279  | 
    times.  Any or all of these may be specified using strings for
 | 
|
280  | 
    'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification
 | 
|
281  | 
    times are normally expressed in the format returned by time.ctime().
 | 
|
282  | 
||
283  | 
    Example:
 | 
|
284  | 
||
285  | 
    >>> for line in unified_diff('one two three four'.split(),
 | 
|
286  | 
    ...             'zero one tree four'.split(), 'Original', 'Current',
 | 
|
287  | 
    ...             'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
 | 
|
288  | 
    ...             lineterm=''):
 | 
|
289  | 
    ...     print line
 | 
|
290  | 
    --- Original Sat Jan 26 23:30:50 1991
 | 
|
291  | 
    +++ Current Fri Jun 06 10:20:52 2003
 | 
|
292  | 
    @@ -1,4 +1,4 @@
 | 
|
293  | 
    +zero
 | 
|
294  | 
     one
 | 
|
295  | 
    -two
 | 
|
296  | 
    -three
 | 
|
297  | 
    +tree
 | 
|
298  | 
     four
 | 
|
299  | 
    """
 | 
|
300  | 
if sequencematcher is None:  | 
|
301  | 
sequencematcher = difflib.SequenceMatcher  | 
|
302  | 
||
303  | 
started = False  | 
|
304  | 
for group in sequencematcher(None,a,b).get_grouped_opcodes(n):  | 
|
305  | 
if not started:  | 
|
306  | 
yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)  | 
|
307  | 
yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)  | 
|
308  | 
started = True  | 
|
309  | 
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]  | 
|
310  | 
yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)  | 
|
311  | 
for tag, i1, i2, j1, j2 in group:  | 
|
312  | 
if tag == 'equal':  | 
|
313  | 
for line in a[i1:i2]:  | 
|
314  | 
yield ' ' + line  | 
|
315  | 
                continue
 | 
|
316  | 
if tag == 'replace' or tag == 'delete':  | 
|
317  | 
for line in a[i1:i2]:  | 
|
318  | 
yield '-' + line  | 
|
319  | 
if tag == 'replace' or tag == 'insert':  | 
|
320  | 
for line in b[j1:j2]:  | 
|
321  | 
yield '+' + line  | 
|
322  | 
||
| 
1185.81.29
by Aaron Bentley
 Fix style issues and duplicated tests  | 
323  | 
|
| 
1185.81.14
by John Arbash Meinel
 Added a main function for running cdvdifflib manually, included tests for unified_diff interfaces  | 
324  | 
def unified_diff_files(a, b, sequencematcher=None):  | 
325  | 
"""Generate the diff for two files.  | 
|
326  | 
    """
 | 
|
327  | 
    # Should this actually be an error?
 | 
|
328  | 
if a == b:  | 
|
329  | 
return []  | 
|
330  | 
if a == '-':  | 
|
331  | 
file_a = sys.stdin  | 
|
332  | 
time_a = time.time()  | 
|
333  | 
else:  | 
|
334  | 
file_a = open(a, 'rb')  | 
|
335  | 
time_a = os.stat(a).st_mtime  | 
|
336  | 
||
337  | 
if b == '-':  | 
|
338  | 
file_b = sys.stdin  | 
|
339  | 
time_b = time.time()  | 
|
340  | 
else:  | 
|
341  | 
file_b = open(b, 'rb')  | 
|
342  | 
time_b = os.stat(b).st_mtime  | 
|
343  | 
||
344  | 
    # TODO: Include fromfiledate and tofiledate
 | 
|
345  | 
return unified_diff(file_a.readlines(), file_b.readlines(),  | 
|
346  | 
fromfile=a, tofile=b,  | 
|
347  | 
sequencematcher=sequencematcher)  | 
|
348  | 
||
| 
1185.81.29
by Aaron Bentley
 Fix style issues and duplicated tests  | 
349  | 
|
| 
1185.81.14
by John Arbash Meinel
 Added a main function for running cdvdifflib manually, included tests for unified_diff interfaces  | 
350  | 
def main(args):  | 
351  | 
import optparse  | 
|
352  | 
p = optparse.OptionParser(usage='%prog [options] file_a file_b'  | 
|
353  | 
'\nFiles can be "-" to read from stdin')  | 
|
| 
1711.2.9
by John Arbash Meinel
 Rename cdv => patience  | 
354  | 
p.add_option('--patience', dest='matcher', action='store_const', const='patience',  | 
355  | 
default='patience', help='Use the patience difference algorithm')  | 
|
| 
1185.81.14
by John Arbash Meinel
 Added a main function for running cdvdifflib manually, included tests for unified_diff interfaces  | 
356  | 
p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',  | 
| 
1711.2.9
by John Arbash Meinel
 Rename cdv => patience  | 
357  | 
default='patience', help='Use python\'s difflib algorithm')  | 
| 
1185.81.14
by John Arbash Meinel
 Added a main function for running cdvdifflib manually, included tests for unified_diff interfaces  | 
358  | 
|
| 
1711.2.11
by John Arbash Meinel
 Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher  | 
359  | 
algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}  | 
| 
1185.81.14
by John Arbash Meinel
 Added a main function for running cdvdifflib manually, included tests for unified_diff interfaces  | 
360  | 
|
361  | 
(opts, args) = p.parse_args(args)  | 
|
362  | 
matcher = algorithms[opts.matcher]  | 
|
363  | 
||
364  | 
if len(args) != 2:  | 
|
365  | 
print 'You must supply 2 filenames to diff'  | 
|
366  | 
return -1  | 
|
367  | 
||
368  | 
for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):  | 
|
369  | 
sys.stdout.write(line)  | 
|
370  | 
||
371  | 
if __name__ == '__main__':  | 
|
372  | 
sys.exit(main(sys.argv[1:]))  |