/brz/remove-bazaar : revision 1185.81.24

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/cdv/nofrillsprecisemerge.py

Committer: Aaron Bentley
Date: 2006-05-23 01:07:31 UTC
mto: This revision was merged to the branch mainline in revision 1727.
Revision ID: aaron.bentley@utoronto.ca-20060523010731-8ffa480f08c46c21

Reoganize patience-related code

files removed:
bzrlib/cdv

bzrlib/cdv/__init__.py

bzrlib/cdv/nofrillsprecisemerge.py

files renamed:
bzrlib/cdv/cdvdifflib.py => bzrlib/patiencediff.py

files modified:
bzrlib/diff.py

bzrlib/merge3.py

bzrlib/tests/test_diff.py

bzrlib/weave.py

Show diffs side-by-side

added added

removed removed

bzrlib/cdv/nofrillsprecisemerge.py

from sets import Set as set

from copy import copy

from bisect import bisect

def unique_lcs(a, b):

"""Find the longest common subset for unique lines.

:param a: An indexable object (such as string or list of strings)

:param b: Another indexable object (such as string or list of strings)

:return: A list of tuples, one for each line which is matched.

[(line_in_a, line_in_b), ...]

This only matches lines which are unique on both sides.

This helps prevent common lines from over influencing match

results.

The longest common subset uses the Patience Sorting algorithm:

http://en.wikipedia.org/wiki/Patience_sorting

"""

# set index[line in a] = position of line in a unless

# unless a is a duplicate, in which case it's set to None

index = {}

for i in xrange(len(a)):

line = a[i]

if line in index:

index[line] = None

else:

index[line]= i

# make btoa[i] = position of line i in a, unless

# that line doesn't occur exactly once in both,

# in which case it's set to None

btoa = [None] * len(b)

index2 = {}

for pos, line in enumerate(b):

next = index.get(line)

if next is not None:

if line in index2:

# unset the previous mapping, which we now know to

# be invalid because the line isn't unique

btoa[index2[line]] = None

del index[line]

else:

index2[line] = pos

btoa[pos] = next

# this is the Patience sorting algorithm

# see http://en.wikipedia.org/wiki/Patience_sorting

backpointers = [None] * len(b)

stacks = []

lasts = []

k = 0

for bpos, apos in enumerate(btoa):

if apos is None:

continue

# as an optimization, check if the next line comes at the end,

# because it usually does

if stacks and stacks[-1] < apos:

k = len(stacks)

# as an optimization, check if the next line comes right after

# the previous line, because usually it does

elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or stacks[k+1] > apos):

k += 1

else:

k = bisect(stacks, apos)

if k > 0:

backpointers[bpos] = lasts[k-1]

if k < len(stacks):

stacks[k] = apos

lasts[k] = bpos

else:

stacks.append(apos)

lasts.append(bpos)

if len(lasts) == 0:

return []

result = []

k = lasts[-1]

while k is not None:

result.append((btoa[k], k))

k = backpointers[k]

result.reverse()

return result

assert unique_lcs('', '') == []

assert unique_lcs('a', 'a') == [(0, 0)]

assert unique_lcs('a', 'b') == []

assert unique_lcs('ab', 'ab') == [(0, 0), (1, 1)]

assert unique_lcs('abcde', 'cdeab') == [(2, 0), (3, 1), (4, 2)]

assert unique_lcs('cdeab', 'abcde') == [(0, 2), (1, 3), (2, 4)]

assert unique_lcs('abXde', 'abYde') == [(0, 0), (1, 1), (3, 3), (4, 4)]

assert unique_lcs('acbac', 'abc') == [(2, 1)]

def recurse_matches(a, b, ahi, bhi, answer, maxrecursion):

"""Find all of the matching text in the lines of a and b.

:param a: A sequence

:param b: Another sequence

:param ahi: The maximum length of a to check, typically len(a)

:param bhi: The maximum length of b to check, typically len(b)

:param answer: The return array. Will be filled with tuples

indicating [(line_in_a), (line_in_b)]

:param maxrecursion: The maximum depth to recurse.

100

Must be a positive integer.

101

:return: None, the return value is in the parameter answer, which

102

should be a list

103

104

"""

105

oldlen = len(answer)

106

if maxrecursion < 0:

107

# this will never happen normally, this check is to prevent DOS attacks

108

return

109

oldlength = len(answer)

110

if len(answer) == 0:

111

alo, blo = 0, 0

112

else:

113

alo, blo = answer[-1]

114

alo += 1

115

blo += 1

116

if alo == ahi or blo == bhi:

117

return

118

for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):

119

# recurse between lines which are unique in each file and match

120

apos += alo

121

bpos += blo

122

recurse_matches(a, b, apos, bpos, answer, maxrecursion - 1)

123

answer.append((apos, bpos))

124

if len(answer) > oldlength:

125

# find matches between the last match and the end

126

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

127

elif a[alo] == b[blo]:

128

# find matching lines at the very beginning

129

while alo < ahi and blo < bhi and a[alo] == b[blo]:

130

answer.append((alo, blo))

131

alo += 1

132

blo += 1

133

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

134

elif a[ahi - 1] == b[bhi - 1]:

135

# find matching lines at the very end

136

nahi = ahi - 1

137

nbhi = bhi - 1

138

while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:

139

nahi -= 1

140

nbhi -= 1

141

recurse_matches(a, b, nahi, nbhi, answer, maxrecursion - 1)

142

for i in xrange(ahi - nahi):

143

answer.append((nahi + i, nbhi + i))

144

145

a1 = []

146

recurse_matches(['a', None, 'b', None, 'c'], ['a', 'a', 'b', 'c', 'c'], 5, 5, a1, 10)

147

assert a1 == [(0, 0), (2, 2), (4, 4)]

148

a2 = []

149

recurse_matches(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'], 5, 3, a2, 10)

150

assert a2 == [(0, 0), (2, 1), (4, 2)]

151

152

a3 = []

153

recurse_matches(['a', 'B', 'c', 'c', 'D', 'e'], ['a', 'b', 'c', 'c', 'd', 'e'], 6, 6, a3, 10)

154

# FIXME: recurse_matches won't match non-unique lines, surrounded by bogus text

155

# This is what it should be

156

#assert a2 == [(0,0), (2,2), (3,3), (5,5)]

157

# This is what it currently gives:

158

assert a3 == [(0,0), (5,5)]

Older »