1
# Copyright (C) 2004, 2005 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# mbp: "you know that thing where cvs gives you conflict markers?"
23
def intersect(ra, rb):
24
"""Given two ranges return the range where they intersect or None.
26
>>> intersect((0, 10), (0, 6))
28
>>> intersect((0, 10), (5, 15))
30
>>> intersect((0, 10), (10, 15))
31
>>> intersect((0, 9), (10, 15))
32
>>> intersect((0, 9), (7, 15))
38
sa = max(ra[0], rb[0])
39
sb = min(ra[1], rb[1])
48
"""3-way merge of texts.
50
Given BASE, OTHER, THIS, tries to produce a combined text
51
incorporating the changes from both BASE->OTHER and BASE->THIS.
52
All three will typically be sequences of lines."""
53
def __init__(self, base, a, b):
57
from difflib import SequenceMatcher
58
self.a_ops = SequenceMatcher(None, base, a).get_opcodes()
59
self.b_ops = SequenceMatcher(None, base, b).get_opcodes()
66
start_marker='<<<<<<<<',
67
mid_marker='========',
68
end_marker='>>>>>>>>',
70
"""Return merge in cvs-like form.
73
start_marker = start_marker + ' ' + name_a
75
end_marker = end_marker + ' ' + name_b
77
for t in self.merge_regions():
79
if what == 'unchanged':
80
for i in range(t[1], t[2]):
82
elif what == 'a' or what == 'same':
83
for i in range(t[1], t[2]):
86
for i in range(t[1], t[2]):
88
elif what == 'conflict':
89
yield start_marker + '\n'
90
for i in range(t[3], t[4]):
92
yield mid_marker + '\n'
93
for i in range(t[5], t[6]):
95
yield end_marker + '\n'
97
raise ValueError(what)
103
def merge_annotated(self):
104
"""Return merge with conflicts, showing origin of lines.
106
Most useful for debugging merge.
108
for t in self.merge_regions():
110
if what == 'unchanged':
111
for i in range(t[1], t[2]):
112
yield 'u | ' + self.base[i]
113
elif what == 'a' or what == 'same':
114
for i in range(t[1], t[2]):
115
yield what[0] + ' | ' + self.a[i]
117
for i in range(t[1], t[2]):
118
yield 'b | ' + self.b[i]
119
elif what == 'conflict':
121
for i in range(t[3], t[4]):
122
yield 'A | ' + self.a[i]
124
for i in range(t[5], t[6]):
125
yield 'B | ' + self.b[i]
128
raise ValueError(what)
134
def merge_groups(self):
135
"""Yield sequence of line groups. Each one is a tuple:
138
Lines unchanged from base
144
Lines taken from a (and equal to b)
149
'conflict', base_lines, a_lines, b_lines
150
Lines from base were changed to either a or b and conflict.
152
for t in self.merge_regions():
154
if what == 'unchanged':
155
yield what, self.base[t[1]:t[2]]
156
elif what == 'a' or what == 'same':
157
yield what, self.a[t[1]:t[2]]
159
yield what, self.b[t[1]:t[2]]
160
elif what == 'conflict':
162
self.base[t[1]:t[2]],
166
raise ValueError(what)
169
def merge_regions(self):
170
"""Return sequences of matching and conflicting regions.
172
This returns tuples, where the first value says what kind we
175
'unchanged', start, end
176
Take a region of base[start:end]
179
b and a are different from base but give the same result
182
Non-clashing insertion from a[start:end]
184
Method is as follows:
186
The two sequences align only on regions which match the base
187
and both descendents. These are found by doing a two-way diff
188
of each one against the base, and then finding the
189
intersections between those regions. These "sync regions"
190
are by definition unchanged in both and easily dealt with.
192
The regions in between can be in any of three cases:
193
conflicted, or changed on only one side.
196
# section a[0:ia] has been disposed of, etc
199
for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions():
200
matchlen = zend - zmatch
202
assert matchlen == (aend - amatch)
203
assert matchlen == (bend - bmatch)
207
len_base = zmatch - iz
213
lines_base = self.base[iz:zmatch]
214
lines_a = self.a[ia:amatch]
215
lines_b = self.b[ib:bmatch]
217
# we check the len just as a shortcut
218
equal_a = (len_a == len_base
219
and lines_a == lines_base)
220
equal_b = (len_b == len_base
221
and lines_b == lines_base)
222
same = (len_a == len_b
223
and lines_a == lines_b)
226
yield 'same', ia, amatch
227
elif equal_a and not equal_b:
228
yield 'b', ib, bmatch
229
elif equal_b and not equal_a:
230
yield 'a', ia, amatch
231
elif not equal_a and not equal_b:
232
yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
240
# if the same part of the base was deleted on both sides
241
# that's OK, we can just skip it.
249
yield 'unchanged', zmatch, zend
256
def find_sync_regions(self):
257
"""Return a list of sync regions, where both descendents match the base.
259
Generates a list of (base1, base2, a1, a2, b1, b2). There is
260
always a zero-length sync region at the end of all the files.
262
from difflib import SequenceMatcher
263
aiter = iter(SequenceMatcher(None, self.base, self.a).get_matching_blocks())
264
biter = iter(SequenceMatcher(None, self.base, self.b).get_matching_blocks())
266
abase, amatch, alen = aiter.next()
267
bbase, bmatch, blen = biter.next()
269
while aiter and biter:
270
# there is an unconflicted block at i; how long does it
271
# extend? until whichever one ends earlier.
272
i = intersect((abase, abase+alen), (bbase, bbase+blen))
276
intlen = intend - intbase
278
# found a match of base[i[0], i[1]]; this may be less than
279
# the region that matches in either one
280
assert intlen <= alen
281
assert intlen <= blen
282
assert abase <= intbase
283
assert bbase <= intbase
285
asub = amatch + (intbase - abase)
286
bsub = bmatch + (intbase - bbase)
290
assert self.base[intbase:intend] == self.a[asub:aend], \
291
(self.base[intbase:intend], self.a[asub:aend])
293
assert self.base[intbase:intend] == self.b[bsub:bend]
295
yield (intbase, intend,
299
# advance whichever one ends first in the base text
300
if (abase + alen) < (bbase + blen):
301
abase, amatch, alen = aiter.next()
303
bbase, bmatch, blen = biter.next()
305
intbase = len(self.base)
308
yield (intbase, intbase, abase, abase, bbase, bbase)
312
def find_unconflicted(self):
313
"""Return a list of ranges in base that are not conflicted."""
314
from difflib import SequenceMatcher
318
# don't sync-up on lines containing only blanks or pounds
319
junk_re = re.compile(r'^[ \t#]*$')
321
am = SequenceMatcher(junk_re.match, self.base, self.a).get_matching_blocks()
322
bm = SequenceMatcher(junk_re.match, self.base, self.b).get_matching_blocks()
327
# there is an unconflicted block at i; how long does it
328
# extend? until whichever one ends earlier.
333
i = intersect((a1, a2), (b1, b2))
346
# as for diff3 and meld the syntax is "MINE BASE OTHER"
347
a = file(argv[1], 'rt').readlines()
348
base = file(argv[2], 'rt').readlines()
349
b = file(argv[3], 'rt').readlines()
351
m3 = Merge3(base, a, b)
353
# sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3]))
354
sys.stdout.writelines(m3.merge_annotated())
357
if __name__ == '__main__':
359
sys.exit(main(sys.argv))