/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
1185.81.14 by John Arbash Meinel
Added a main function for running cdvdifflib manually, included tests for unified_diff interfaces
1
#!/usr/bin/env python
1185.81.24 by Aaron Bentley
Reoganize patience-related code
2
# Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd
3
#
1185.81.1 by John Arbash Meinel
Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
1185.81.24 by Aaron Bentley
Reoganize patience-related code
8
#
1185.81.1 by John Arbash Meinel
Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
1185.81.24 by Aaron Bentley
Reoganize patience-related code
13
#
1185.81.1 by John Arbash Meinel
Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1185.81.1 by John Arbash Meinel
Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.
17
6379.6.3 by Jelmer Vernooij
Use absolute_import.
18
from __future__ import absolute_import
1185.81.1 by John Arbash Meinel
Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.
19
1185.81.24 by Aaron Bentley
Reoganize patience-related code
20
from bisect import bisect
1185.81.1 by John Arbash Meinel
Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.
21
import difflib
1185.81.29 by Aaron Bentley
Fix style issues and duplicated tests
22
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
23
from .trace import mutter
1711.2.12 by John Arbash Meinel
Make a mention when the maximum recursion length is reached.
24
1185.81.1 by John Arbash Meinel
Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.
25
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
26
def unique_lcs_py(a, b):
1185.81.24 by Aaron Bentley
Reoganize patience-related code
27
    """Find the longest common subset for unique lines.
28
29
    :param a: An indexable object (such as string or list of strings)
30
    :param b: Another indexable object (such as string or list of strings)
31
    :return: A list of tuples, one for each line which is matched.
32
            [(line_in_a, line_in_b), ...]
33
34
    This only matches lines which are unique on both sides.
35
    This helps prevent common lines from over influencing match
36
    results.
37
    The longest common subset uses the Patience Sorting algorithm:
38
    http://en.wikipedia.org/wiki/Patience_sorting
39
    """
40
    # set index[line in a] = position of line in a unless
2100.2.1 by wang
Replace python's difflib by patiencediff because the worst case
41
    # a is a duplicate, in which case it's set to None
1185.81.24 by Aaron Bentley
Reoganize patience-related code
42
    index = {}
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
43
    for i, line in enumerate(a):
1185.81.24 by Aaron Bentley
Reoganize patience-related code
44
        if line in index:
45
            index[line] = None
46
        else:
47
            index[line]= i
48
    # make btoa[i] = position of line i in a, unless
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
49
    # that line doesn't occur exactly once in both,
1185.81.24 by Aaron Bentley
Reoganize patience-related code
50
    # in which case it's set to None
51
    btoa = [None] * len(b)
52
    index2 = {}
53
    for pos, line in enumerate(b):
54
        next = index.get(line)
55
        if next is not None:
56
            if line in index2:
57
                # unset the previous mapping, which we now know to
58
                # be invalid because the line isn't unique
59
                btoa[index2[line]] = None
60
                del index[line]
61
            else:
62
                index2[line] = pos
63
                btoa[pos] = next
64
    # this is the Patience sorting algorithm
65
    # see http://en.wikipedia.org/wiki/Patience_sorting
66
    backpointers = [None] * len(b)
67
    stacks = []
68
    lasts = []
69
    k = 0
70
    for bpos, apos in enumerate(btoa):
71
        if apos is None:
72
            continue
73
        # as an optimization, check if the next line comes at the end,
74
        # because it usually does
75
        if stacks and stacks[-1] < apos:
76
            k = len(stacks)
77
        # as an optimization, check if the next line comes right after
78
        # the previous line, because usually it does
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
79
        elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or
1185.81.29 by Aaron Bentley
Fix style issues and duplicated tests
80
                                              stacks[k+1] > apos):
1185.81.24 by Aaron Bentley
Reoganize patience-related code
81
            k += 1
82
        else:
83
            k = bisect(stacks, apos)
84
        if k > 0:
85
            backpointers[bpos] = lasts[k-1]
86
        if k < len(stacks):
87
            stacks[k] = apos
88
            lasts[k] = bpos
89
        else:
90
            stacks.append(apos)
91
            lasts.append(bpos)
92
    if len(lasts) == 0:
93
        return []
94
    result = []
95
    k = lasts[-1]
96
    while k is not None:
97
        result.append((btoa[k], k))
98
        k = backpointers[k]
99
    result.reverse()
100
    return result
101
102
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
103
def recurse_matches_py(a, b, alo, blo, ahi, bhi, answer, maxrecursion):
1185.81.24 by Aaron Bentley
Reoganize patience-related code
104
    """Find all of the matching text in the lines of a and b.
105
106
    :param a: A sequence
107
    :param b: Another sequence
1711.2.22 by John Arbash Meinel
Passing the alo parameter to recurse_matches shaves of 5% of the diff time.
108
    :param alo: The start location of a to check, typically 0
109
    :param ahi: The start location of b to check, typically 0
1185.81.24 by Aaron Bentley
Reoganize patience-related code
110
    :param ahi: The maximum length of a to check, typically len(a)
111
    :param bhi: The maximum length of b to check, typically len(b)
112
    :param answer: The return array. Will be filled with tuples
1711.2.17 by John Arbash Meinel
Small cleanups to patience_diff code.
113
                   indicating [(line_in_a, line_in_b)]
1185.81.24 by Aaron Bentley
Reoganize patience-related code
114
    :param maxrecursion: The maximum depth to recurse.
115
                         Must be a positive integer.
116
    :return: None, the return value is in the parameter answer, which
117
             should be a list
118
119
    """
120
    if maxrecursion < 0:
1711.2.12 by John Arbash Meinel
Make a mention when the maximum recursion length is reached.
121
        mutter('max recursion depth reached')
1185.81.24 by Aaron Bentley
Reoganize patience-related code
122
        # this will never happen normally, this check is to prevent DOS attacks
123
        return
124
    oldlength = len(answer)
125
    if alo == ahi or blo == bhi:
126
        return
1711.2.22 by John Arbash Meinel
Passing the alo parameter to recurse_matches shaves of 5% of the diff time.
127
    last_a_pos = alo-1
128
    last_b_pos = blo-1
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
129
    for apos, bpos in unique_lcs_py(a[alo:ahi], b[blo:bhi]):
1185.81.24 by Aaron Bentley
Reoganize patience-related code
130
        # recurse between lines which are unique in each file and match
131
        apos += alo
132
        bpos += blo
1711.2.18 by John Arbash Meinel
Optimize common case where unique_lcs returns a set of lines all in a row
133
        # Most of the time, you will have a sequence of similar entries
134
        if last_a_pos+1 != apos or last_b_pos+1 != bpos:
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
135
            recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1,
1711.2.22 by John Arbash Meinel
Passing the alo parameter to recurse_matches shaves of 5% of the diff time.
136
                apos, bpos, answer, maxrecursion - 1)
1711.2.18 by John Arbash Meinel
Optimize common case where unique_lcs returns a set of lines all in a row
137
        last_a_pos = apos
138
        last_b_pos = bpos
1185.81.24 by Aaron Bentley
Reoganize patience-related code
139
        answer.append((apos, bpos))
140
    if len(answer) > oldlength:
141
        # find matches between the last match and the end
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
142
        recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1,
143
                           ahi, bhi, answer, maxrecursion - 1)
1185.81.24 by Aaron Bentley
Reoganize patience-related code
144
    elif a[alo] == b[blo]:
145
        # find matching lines at the very beginning
146
        while alo < ahi and blo < bhi and a[alo] == b[blo]:
147
            answer.append((alo, blo))
148
            alo += 1
149
            blo += 1
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
150
        recurse_matches_py(a, b, alo, blo,
151
                           ahi, bhi, answer, maxrecursion - 1)
1185.81.24 by Aaron Bentley
Reoganize patience-related code
152
    elif a[ahi - 1] == b[bhi - 1]:
153
        # find matching lines at the very end
154
        nahi = ahi - 1
155
        nbhi = bhi - 1
156
        while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:
157
            nahi -= 1
158
            nbhi -= 1
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
159
        recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1,
160
                           nahi, nbhi, answer, maxrecursion - 1)
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
161
        for i in range(ahi - nahi):
1185.81.24 by Aaron Bentley
Reoganize patience-related code
162
            answer.append((nahi + i, nbhi + i))
163
164
1711.2.21 by John Arbash Meinel
Cleanup patiencediff, remove the use of difflib.SequenceMatcher.
165
def _collapse_sequences(matches):
166
    """Find sequences of lines.
167
168
    Given a sequence of [(line_in_a, line_in_b),]
169
    find regions where they both increment at the same time
170
    """
171
    answer = []
172
    start_a = start_b = None
173
    length = 0
174
    for i_a, i_b in matches:
175
        if (start_a is not None
3565.3.1 by Robert Collins
* The generic fetch code now uses two attributes on Repository objects
176
            and (i_a == start_a + length)
1711.2.21 by John Arbash Meinel
Cleanup patiencediff, remove the use of difflib.SequenceMatcher.
177
            and (i_b == start_b + length)):
178
            length += 1
179
        else:
180
            if start_a is not None:
181
                answer.append((start_a, start_b, length))
182
            start_a = i_a
183
            start_b = i_b
184
            length = 1
185
186
    if length != 0:
187
        answer.append((start_a, start_b, length))
188
189
    return answer
190
191
192
def _check_consistency(answer):
193
    # For consistency sake, make sure all matches are only increasing
194
    next_a = -1
195
    next_b = -1
3376.2.8 by Martin Pool
Some review cleanups for assertion removal
196
    for (a, b, match_len) in answer:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
197
        if a < next_a:
198
            raise ValueError('Non increasing matches for a')
199
        if b < next_b:
3376.2.8 by Martin Pool
Some review cleanups for assertion removal
200
            raise ValueError('Non increasing matches for b')
1711.2.21 by John Arbash Meinel
Cleanup patiencediff, remove the use of difflib.SequenceMatcher.
201
        next_a = a + match_len
202
        next_b = b + match_len
203
204
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
205
class PatienceSequenceMatcher_py(difflib.SequenceMatcher):
1185.81.5 by John Arbash Meinel
Fix up SequenceMatcher, add comments to nofrillsprecisemerge
206
    """Compare a pair of sequences using longest common subset."""
1185.81.1 by John Arbash Meinel
Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.
207
1711.2.21 by John Arbash Meinel
Cleanup patiencediff, remove the use of difflib.SequenceMatcher.
208
    _do_check_consistency = True
209
1185.81.5 by John Arbash Meinel
Fix up SequenceMatcher, add comments to nofrillsprecisemerge
210
    def __init__(self, isjunk=None, a='', b=''):
211
        if isjunk is not None:
212
            raise NotImplementedError('Currently we do not support'
213
                                      ' isjunk for sequence matching')
214
        difflib.SequenceMatcher.__init__(self, isjunk, a, b)
1185.81.1 by John Arbash Meinel
Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.
215
1711.2.7 by John Arbash Meinel
Override get_matching_blocks
216
    def get_matching_blocks(self):
217
        """Return list of triples describing matching subsequences.
218
219
        Each triple is of the form (i, j, n), and means that
220
        a[i:i+n] == b[j:j+n].  The triples are monotonically increasing in
221
        i and in j.
222
223
        The last triple is a dummy, (len(a), len(b), 0), and is the only
224
        triple with n==0.
225
1711.2.11 by John Arbash Meinel
Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher
226
        >>> s = PatienceSequenceMatcher(None, "abxcd", "abcd")
1711.2.7 by John Arbash Meinel
Override get_matching_blocks
227
        >>> s.get_matching_blocks()
228
        [(0, 0, 2), (3, 2, 2), (5, 4, 0)]
229
        """
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
230
        # jam 20060525 This is the python 2.4.1 difflib get_matching_blocks
1711.2.7 by John Arbash Meinel
Override get_matching_blocks
231
        # implementation which uses __helper. 2.4.3 got rid of helper for
232
        # doing it inline with a queue.
233
        # We should consider doing the same for recurse_matches
234
235
        if self.matching_blocks is not None:
236
            return self.matching_blocks
237
1185.81.1 by John Arbash Meinel
Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.
238
        matches = []
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
239
        recurse_matches_py(self.a, self.b, 0, 0,
240
                           len(self.a), len(self.b), matches, 10)
1185.81.1 by John Arbash Meinel
Adding nofrillsprecisemerge's diff algorithm, wrapped in difflib.
241
        # Matches now has individual line pairs of
242
        # line A matches line B, at the given offsets
1711.2.21 by John Arbash Meinel
Cleanup patiencediff, remove the use of difflib.SequenceMatcher.
243
        self.matching_blocks = _collapse_sequences(matches)
244
        self.matching_blocks.append( (len(self.a), len(self.b), 0) )
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
245
        if PatienceSequenceMatcher_py._do_check_consistency:
1711.2.21 by John Arbash Meinel
Cleanup patiencediff, remove the use of difflib.SequenceMatcher.
246
            if __debug__:
247
                _check_consistency(self.matching_blocks)
248
249
        return self.matching_blocks