1
# Copyright (C) 2004, 2005 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# mbp: "you know that thing where cvs gives you conflict markers?"
23
def intersect(ra, rb):
24
"""Given two ranges return the range where they intersect or None.
26
>>> intersect((0, 10), (0, 6))
28
>>> intersect((0, 10), (5, 15))
30
>>> intersect((0, 10), (10, 15))
31
>>> intersect((0, 9), (10, 15))
32
>>> intersect((0, 9), (7, 15))
38
sa = max(ra[0], rb[0])
39
sb = min(ra[1], rb[1])
46
def compare_range(a, astart, aend, b, bstart, bend):
47
"""Compare a[astart:aend] == b[bstart:bend], without slicing.
49
if (aend-astart) != (bend-bstart):
51
for ia, ib in zip(xrange(astart, aend), xrange(bstart, bend)):
61
"""3-way merge of texts.
63
Given BASE, OTHER, THIS, tries to produce a combined text
64
incorporating the changes from both BASE->OTHER and BASE->THIS.
65
All three will typically be sequences of lines."""
66
def __init__(self, base, a, b):
70
from difflib import SequenceMatcher
71
self.a_ops = SequenceMatcher(None, base, a).get_opcodes()
72
self.b_ops = SequenceMatcher(None, base, b).get_opcodes()
80
start_marker='<<<<<<<',
84
"""Return merge in cvs-like form.
87
start_marker = start_marker + ' ' + name_a
89
end_marker = end_marker + ' ' + name_b
90
if name_base and base_marker:
91
base_marker = base_marker + ' ' + name_base
93
for t in self.merge_regions():
95
if what == 'unchanged':
96
for i in range(t[1], t[2]):
98
elif what == 'a' or what == 'same':
99
for i in range(t[1], t[2]):
102
for i in range(t[1], t[2]):
104
elif what == 'conflict':
105
yield start_marker + '\n'
106
for i in range(t[3], t[4]):
108
if base_marker is not None:
109
yield base_marker + '\n'
110
for i in range(t[1], t[2]):
112
yield mid_marker + '\n'
113
for i in range(t[5], t[6]):
115
yield end_marker + '\n'
117
raise ValueError(what)
123
def merge_annotated(self):
124
"""Return merge with conflicts, showing origin of lines.
126
Most useful for debugging merge.
128
for t in self.merge_regions():
130
if what == 'unchanged':
131
for i in range(t[1], t[2]):
132
yield 'u | ' + self.base[i]
133
elif what == 'a' or what == 'same':
134
for i in range(t[1], t[2]):
135
yield what[0] + ' | ' + self.a[i]
137
for i in range(t[1], t[2]):
138
yield 'b | ' + self.b[i]
139
elif what == 'conflict':
141
for i in range(t[3], t[4]):
142
yield 'A | ' + self.a[i]
144
for i in range(t[5], t[6]):
145
yield 'B | ' + self.b[i]
148
raise ValueError(what)
154
def merge_groups(self):
155
"""Yield sequence of line groups. Each one is a tuple:
158
Lines unchanged from base
164
Lines taken from a (and equal to b)
169
'conflict', base_lines, a_lines, b_lines
170
Lines from base were changed to either a or b and conflict.
172
for t in self.merge_regions():
174
if what == 'unchanged':
175
yield what, self.base[t[1]:t[2]]
176
elif what == 'a' or what == 'same':
177
yield what, self.a[t[1]:t[2]]
179
yield what, self.b[t[1]:t[2]]
180
elif what == 'conflict':
182
self.base[t[1]:t[2]],
186
raise ValueError(what)
189
def merge_regions(self):
190
"""Return sequences of matching and conflicting regions.
192
This returns tuples, where the first value says what kind we
195
'unchanged', start, end
196
Take a region of base[start:end]
199
b and a are different from base but give the same result
202
Non-clashing insertion from a[start:end]
204
Method is as follows:
206
The two sequences align only on regions which match the base
207
and both descendents. These are found by doing a two-way diff
208
of each one against the base, and then finding the
209
intersections between those regions. These "sync regions"
210
are by definition unchanged in both and easily dealt with.
212
The regions in between can be in any of three cases:
213
conflicted, or changed on only one side.
216
# section a[0:ia] has been disposed of, etc
219
for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions():
220
#print 'match base [%d:%d]' % (zmatch, zend)
222
matchlen = zend - zmatch
224
assert matchlen == (aend - amatch)
225
assert matchlen == (bend - bmatch)
229
len_base = zmatch - iz
234
#print 'unmatched a=%d, b=%d' % (len_a, len_b)
237
# try to avoid actually slicing the lists
238
equal_a = compare_range(self.a, ia, amatch,
239
self.base, iz, zmatch)
240
equal_b = compare_range(self.b, ib, bmatch,
241
self.base, iz, zmatch)
242
same = compare_range(self.a, ia, amatch,
246
yield 'same', ia, amatch
247
elif equal_a and not equal_b:
248
yield 'b', ib, bmatch
249
elif equal_b and not equal_a:
250
yield 'a', ia, amatch
251
elif not equal_a and not equal_b:
252
yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
254
raise AssertionError("can't handle a=b=base but unmatched")
260
# if the same part of the base was deleted on both sides
261
# that's OK, we can just skip it.
269
yield 'unchanged', zmatch, zend
276
def find_sync_regions(self):
277
"""Return a list of sync regions, where both descendents match the base.
279
Generates a list of (base1, base2, a1, a2, b1, b2). There is
280
always a zero-length sync region at the end of all the files.
282
from difflib import SequenceMatcher
285
amatches = SequenceMatcher(None, self.base, self.a).get_matching_blocks()
286
bmatches = SequenceMatcher(None, self.base, self.b).get_matching_blocks()
287
len_a = len(amatches)
288
len_b = len(bmatches)
292
while ia < len_a and ib < len_b:
293
abase, amatch, alen = amatches[ia]
294
bbase, bmatch, blen = bmatches[ib]
296
# there is an unconflicted block at i; how long does it
297
# extend? until whichever one ends earlier.
298
i = intersect((abase, abase+alen), (bbase, bbase+blen))
302
intlen = intend - intbase
304
# found a match of base[i[0], i[1]]; this may be less than
305
# the region that matches in either one
306
assert intlen <= alen
307
assert intlen <= blen
308
assert abase <= intbase
309
assert bbase <= intbase
311
asub = amatch + (intbase - abase)
312
bsub = bmatch + (intbase - bbase)
316
assert self.base[intbase:intend] == self.a[asub:aend], \
317
(self.base[intbase:intend], self.a[asub:aend])
319
assert self.base[intbase:intend] == self.b[bsub:bend]
321
sl.append((intbase, intend,
325
# advance whichever one ends first in the base text
326
if (abase + alen) < (bbase + blen):
331
intbase = len(self.base)
334
sl.append((intbase, intbase, abase, abase, bbase, bbase))
340
def find_unconflicted(self):
341
"""Return a list of ranges in base that are not conflicted."""
342
from difflib import SequenceMatcher
346
# don't sync-up on lines containing only blanks or pounds
347
junk_re = re.compile(r'^[ \t#]*$')
349
am = SequenceMatcher(junk_re.match, self.base, self.a).get_matching_blocks()
350
bm = SequenceMatcher(junk_re.match, self.base, self.b).get_matching_blocks()
355
# there is an unconflicted block at i; how long does it
356
# extend? until whichever one ends earlier.
361
i = intersect((a1, a2), (b1, b2))
374
# as for diff3 and meld the syntax is "MINE BASE OTHER"
375
a = file(argv[1], 'rt').readlines()
376
base = file(argv[2], 'rt').readlines()
377
b = file(argv[3], 'rt').readlines()
379
m3 = Merge3(base, a, b)
381
#for sr in m3.find_sync_regions():
384
# sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3]))
385
sys.stdout.writelines(m3.merge_annotated())
388
if __name__ == '__main__':
390
sys.exit(main(sys.argv))