1
# Copyright (C) 2004, 2005 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# mbp: "you know that thing where cvs gives you conflict markers?"
22
from difflib import SequenceMatcher
24
def intersect(ra, rb):
25
"""Given two ranges return the range where they intersect or None.
27
>>> intersect((0, 10), (0, 6))
29
>>> intersect((0, 10), (5, 15))
31
>>> intersect((0, 10), (10, 15))
32
>>> intersect((0, 9), (10, 15))
33
>>> intersect((0, 9), (7, 15))
39
sa = max(ra[0], rb[0])
40
sb = min(ra[1], rb[1])
47
def compare_range(a, astart, aend, b, bstart, bend):
48
"""Compare a[astart:aend] == b[bstart:bend], without slicing.
50
if (aend-astart) != (bend-bstart):
52
for ia, ib in zip(xrange(astart, aend), xrange(bstart, bend)):
62
"""3-way merge of texts.
64
Given BASE, OTHER, THIS, tries to produce a combined text
65
incorporating the changes from both BASE->OTHER and BASE->THIS.
66
All three will typically be sequences of lines."""
67
def __init__(self, base, a, b):
78
start_marker='<<<<<<<',
82
"""Return merge in cvs-like form.
85
start_marker = start_marker + ' ' + name_a
87
end_marker = end_marker + ' ' + name_b
88
if name_base and base_marker:
89
base_marker = base_marker + ' ' + name_base
91
for t in self.merge_regions():
93
if what == 'unchanged':
94
for i in range(t[1], t[2]):
96
elif what == 'a' or what == 'same':
97
for i in range(t[1], t[2]):
100
for i in range(t[1], t[2]):
102
elif what == 'conflict':
103
yield start_marker + '\n'
104
for i in range(t[3], t[4]):
106
if base_marker is not None:
107
yield base_marker + '\n'
108
for i in range(t[1], t[2]):
110
yield mid_marker + '\n'
111
for i in range(t[5], t[6]):
113
yield end_marker + '\n'
115
raise ValueError(what)
121
def merge_annotated(self):
122
"""Return merge with conflicts, showing origin of lines.
124
Most useful for debugging merge.
126
for t in self.merge_regions():
128
if what == 'unchanged':
129
for i in range(t[1], t[2]):
130
yield 'u | ' + self.base[i]
131
elif what == 'a' or what == 'same':
132
for i in range(t[1], t[2]):
133
yield what[0] + ' | ' + self.a[i]
135
for i in range(t[1], t[2]):
136
yield 'b | ' + self.b[i]
137
elif what == 'conflict':
139
for i in range(t[3], t[4]):
140
yield 'A | ' + self.a[i]
142
for i in range(t[5], t[6]):
143
yield 'B | ' + self.b[i]
146
raise ValueError(what)
152
def merge_groups(self):
153
"""Yield sequence of line groups. Each one is a tuple:
156
Lines unchanged from base
162
Lines taken from a (and equal to b)
167
'conflict', base_lines, a_lines, b_lines
168
Lines from base were changed to either a or b and conflict.
170
for t in self.merge_regions():
172
if what == 'unchanged':
173
yield what, self.base[t[1]:t[2]]
174
elif what == 'a' or what == 'same':
175
yield what, self.a[t[1]:t[2]]
177
yield what, self.b[t[1]:t[2]]
178
elif what == 'conflict':
180
self.base[t[1]:t[2]],
184
raise ValueError(what)
187
def merge_regions(self):
188
"""Return sequences of matching and conflicting regions.
190
This returns tuples, where the first value says what kind we
193
'unchanged', start, end
194
Take a region of base[start:end]
197
b and a are different from base but give the same result
200
Non-clashing insertion from a[start:end]
202
Method is as follows:
204
The two sequences align only on regions which match the base
205
and both descendents. These are found by doing a two-way diff
206
of each one against the base, and then finding the
207
intersections between those regions. These "sync regions"
208
are by definition unchanged in both and easily dealt with.
210
The regions in between can be in any of three cases:
211
conflicted, or changed on only one side.
214
# section a[0:ia] has been disposed of, etc
217
for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions():
218
#print 'match base [%d:%d]' % (zmatch, zend)
220
matchlen = zend - zmatch
222
assert matchlen == (aend - amatch)
223
assert matchlen == (bend - bmatch)
227
len_base = zmatch - iz
232
#print 'unmatched a=%d, b=%d' % (len_a, len_b)
235
# try to avoid actually slicing the lists
236
equal_a = compare_range(self.a, ia, amatch,
237
self.base, iz, zmatch)
238
equal_b = compare_range(self.b, ib, bmatch,
239
self.base, iz, zmatch)
240
same = compare_range(self.a, ia, amatch,
244
yield 'same', ia, amatch
245
elif equal_a and not equal_b:
246
yield 'b', ib, bmatch
247
elif equal_b and not equal_a:
248
yield 'a', ia, amatch
249
elif not equal_a and not equal_b:
250
yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
252
raise AssertionError("can't handle a=b=base but unmatched")
258
# if the same part of the base was deleted on both sides
259
# that's OK, we can just skip it.
267
yield 'unchanged', zmatch, zend
274
def find_sync_regions(self):
275
"""Return a list of sync regions, where both descendents match the base.
277
Generates a list of (base1, base2, a1, a2, b1, b2). There is
278
always a zero-length sync region at the end of all the files.
282
amatches = SequenceMatcher(None, self.base, self.a).get_matching_blocks()
283
bmatches = SequenceMatcher(None, self.base, self.b).get_matching_blocks()
284
len_a = len(amatches)
285
len_b = len(bmatches)
289
while ia < len_a and ib < len_b:
290
abase, amatch, alen = amatches[ia]
291
bbase, bmatch, blen = bmatches[ib]
293
# there is an unconflicted block at i; how long does it
294
# extend? until whichever one ends earlier.
295
i = intersect((abase, abase+alen), (bbase, bbase+blen))
299
intlen = intend - intbase
301
# found a match of base[i[0], i[1]]; this may be less than
302
# the region that matches in either one
303
assert intlen <= alen
304
assert intlen <= blen
305
assert abase <= intbase
306
assert bbase <= intbase
308
asub = amatch + (intbase - abase)
309
bsub = bmatch + (intbase - bbase)
313
assert self.base[intbase:intend] == self.a[asub:aend], \
314
(self.base[intbase:intend], self.a[asub:aend])
316
assert self.base[intbase:intend] == self.b[bsub:bend]
318
sl.append((intbase, intend,
322
# advance whichever one ends first in the base text
323
if (abase + alen) < (bbase + blen):
328
intbase = len(self.base)
331
sl.append((intbase, intbase, abase, abase, bbase, bbase))
337
def find_unconflicted(self):
338
"""Return a list of ranges in base that are not conflicted."""
342
# don't sync-up on lines containing only blanks or pounds
343
junk_re = re.compile(r'^[ \t#]*$')
345
am = SequenceMatcher(junk_re.match, self.base, self.a).get_matching_blocks()
346
bm = SequenceMatcher(junk_re.match, self.base, self.b).get_matching_blocks()
351
# there is an unconflicted block at i; how long does it
352
# extend? until whichever one ends earlier.
357
i = intersect((a1, a2), (b1, b2))
370
# as for diff3 and meld the syntax is "MINE BASE OTHER"
371
a = file(argv[1], 'rt').readlines()
372
base = file(argv[2], 'rt').readlines()
373
b = file(argv[3], 'rt').readlines()
375
m3 = Merge3(base, a, b)
377
#for sr in m3.find_sync_regions():
380
# sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3]))
381
sys.stdout.writelines(m3.merge_annotated())
384
if __name__ == '__main__':
386
sys.exit(main(sys.argv))