80
82
# as an optimization, check if the next line comes right after
81
83
# the previous line, because usually it does
82
elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or stacks[k+1] > apos):
84
elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or
85
88
k = bisect(stacks, apos)
104
assert unique_lcs('', '') == []
105
assert unique_lcs('a', 'a') == [(0, 0)]
106
assert unique_lcs('a', 'b') == []
107
assert unique_lcs('ab', 'ab') == [(0, 0), (1, 1)]
108
assert unique_lcs('abcde', 'cdeab') == [(2, 0), (3, 1), (4, 2)]
109
assert unique_lcs('cdeab', 'abcde') == [(0, 2), (1, 3), (2, 4)]
110
assert unique_lcs('abXde', 'abYde') == [(0, 0), (1, 1), (3, 3), (4, 4)]
111
assert unique_lcs('acbac', 'abc') == [(2, 1)]
113
108
def recurse_matches(a, b, ahi, bhi, answer, maxrecursion):
114
109
"""Find all of the matching text in the lines of a and b.
165
160
for i in xrange(ahi - nahi):
166
161
answer.append((nahi + i, nbhi + i))
169
recurse_matches(['a', None, 'b', None, 'c'], ['a', 'a', 'b', 'c', 'c'], 5, 5, a1, 10)
170
assert a1 == [(0, 0), (2, 2), (4, 4)]
172
recurse_matches(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'], 5, 3, a2, 10)
173
assert a2 == [(0, 0), (2, 1), (4, 2)]
176
recurse_matches(['a', 'B', 'c', 'c', 'D', 'e'], ['a', 'b', 'c', 'c', 'd', 'e'], 6, 6, a3, 10)
177
# FIXME: recurse_matches won't match non-unique lines, surrounded by bogus text
178
# This is what it should be
179
#assert a2 == [(0,0), (2,2), (3,3), (5,5)]
180
# This is what it currently gives:
181
assert a3 == [(0,0), (5,5)]
184
164
class SequenceMatcher(difflib.SequenceMatcher):
185
165
"""Compare a pair of sequences using longest common subset."""
245
225
if start_a is None:
246
226
# We need to check from 0,0 until the current match
247
self._check_with_diff(alo-1, i_a+alo, blo-1, i_b+blo, answer)
227
self._check_with_diff(alo-1, i_a+alo, blo-1, i_b+blo,
249
230
answer.append((start_a+alo, start_b+blo, length))
250
231
self._check_with_diff(start_a+alo+length, i_a+alo,
274
255
next_a = a + match_len
275
256
next_b = b + match_len
277
259
# This is a version of unified_diff which only adds a factory parameter
278
260
# so that you can override the default SequenceMatcher
279
261
# this has been submitted as a patch to python
281
262
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
282
263
tofiledate='', n=3, lineterm='\n',
283
264
sequencematcher=None):