1
# Copyright (C) 2004, 2005 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# mbp: "you know that thing where cvs gives you conflict markers?"
22
from bzrlib.errors import CantReprocessAndShowBase
23
from bzrlib.patiencediff import PatienceSequenceMatcher
24
from bzrlib.textfile import check_text_lines
26
SequenceMatcher = PatienceSequenceMatcher
29
def intersect(ra, rb):
30
"""Given two ranges return the range where they intersect or None.
32
>>> intersect((0, 10), (0, 6))
34
>>> intersect((0, 10), (5, 15))
36
>>> intersect((0, 10), (10, 15))
37
>>> intersect((0, 9), (10, 15))
38
>>> intersect((0, 9), (7, 15))
44
sa = max(ra[0], rb[0])
45
sb = min(ra[1], rb[1])
52
def compare_range(a, astart, aend, b, bstart, bend):
53
"""Compare a[astart:aend] == b[bstart:bend], without slicing.
55
if (aend-astart) != (bend-bstart):
57
for ia, ib in zip(xrange(astart, aend), xrange(bstart, bend)):
67
"""3-way merge of texts.
69
Given BASE, OTHER, THIS, tries to produce a combined text
70
incorporating the changes from both BASE->OTHER and BASE->THIS.
71
All three will typically be sequences of lines."""
72
def __init__(self, base, a, b):
73
check_text_lines(base)
86
start_marker='<<<<<<<',
91
"""Return merge in cvs-like form.
93
if base_marker and reprocess:
94
raise CantReprocessAndShowBase()
96
start_marker = start_marker + ' ' + name_a
98
end_marker = end_marker + ' ' + name_b
99
if name_base and base_marker:
100
base_marker = base_marker + ' ' + name_base
101
merge_regions = self.merge_regions()
102
if reprocess is True:
103
merge_regions = self.reprocess_merge_regions(merge_regions)
104
for t in merge_regions:
106
if what == 'unchanged':
107
for i in range(t[1], t[2]):
109
elif what == 'a' or what == 'same':
110
for i in range(t[1], t[2]):
113
for i in range(t[1], t[2]):
115
elif what == 'conflict':
116
yield start_marker + '\n'
117
for i in range(t[3], t[4]):
119
if base_marker is not None:
120
yield base_marker + '\n'
121
for i in range(t[1], t[2]):
123
yield mid_marker + '\n'
124
for i in range(t[5], t[6]):
126
yield end_marker + '\n'
128
raise ValueError(what)
134
def merge_annotated(self):
135
"""Return merge with conflicts, showing origin of lines.
137
Most useful for debugging merge.
139
for t in self.merge_regions():
141
if what == 'unchanged':
142
for i in range(t[1], t[2]):
143
yield 'u | ' + self.base[i]
144
elif what == 'a' or what == 'same':
145
for i in range(t[1], t[2]):
146
yield what[0] + ' | ' + self.a[i]
148
for i in range(t[1], t[2]):
149
yield 'b | ' + self.b[i]
150
elif what == 'conflict':
152
for i in range(t[3], t[4]):
153
yield 'A | ' + self.a[i]
155
for i in range(t[5], t[6]):
156
yield 'B | ' + self.b[i]
159
raise ValueError(what)
165
def merge_groups(self):
166
"""Yield sequence of line groups. Each one is a tuple:
169
Lines unchanged from base
175
Lines taken from a (and equal to b)
180
'conflict', base_lines, a_lines, b_lines
181
Lines from base were changed to either a or b and conflict.
183
for t in self.merge_regions():
185
if what == 'unchanged':
186
yield what, self.base[t[1]:t[2]]
187
elif what == 'a' or what == 'same':
188
yield what, self.a[t[1]:t[2]]
190
yield what, self.b[t[1]:t[2]]
191
elif what == 'conflict':
193
self.base[t[1]:t[2]],
197
raise ValueError(what)
200
def merge_regions(self):
201
"""Return sequences of matching and conflicting regions.
203
This returns tuples, where the first value says what kind we
206
'unchanged', start, end
207
Take a region of base[start:end]
210
b and a are different from base but give the same result
213
Non-clashing insertion from a[start:end]
215
Method is as follows:
217
The two sequences align only on regions which match the base
218
and both descendents. These are found by doing a two-way diff
219
of each one against the base, and then finding the
220
intersections between those regions. These "sync regions"
221
are by definition unchanged in both and easily dealt with.
223
The regions in between can be in any of three cases:
224
conflicted, or changed on only one side.
227
# section a[0:ia] has been disposed of, etc
230
for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions():
231
#print 'match base [%d:%d]' % (zmatch, zend)
233
matchlen = zend - zmatch
235
assert matchlen == (aend - amatch)
236
assert matchlen == (bend - bmatch)
240
len_base = zmatch - iz
245
#print 'unmatched a=%d, b=%d' % (len_a, len_b)
248
# try to avoid actually slicing the lists
249
equal_a = compare_range(self.a, ia, amatch,
250
self.base, iz, zmatch)
251
equal_b = compare_range(self.b, ib, bmatch,
252
self.base, iz, zmatch)
253
same = compare_range(self.a, ia, amatch,
257
yield 'same', ia, amatch
258
elif equal_a and not equal_b:
259
yield 'b', ib, bmatch
260
elif equal_b and not equal_a:
261
yield 'a', ia, amatch
262
elif not equal_a and not equal_b:
263
yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
265
raise AssertionError("can't handle a=b=base but unmatched")
271
# if the same part of the base was deleted on both sides
272
# that's OK, we can just skip it.
280
yield 'unchanged', zmatch, zend
286
def reprocess_merge_regions(self, merge_regions):
287
"""Where there are conflict regions, remove the agreed lines.
289
Lines where both A and B have made the same changes are
292
for region in merge_regions:
293
if region[0] != "conflict":
296
type, iz, zmatch, ia, amatch, ib, bmatch = region
297
a_region = self.a[ia:amatch]
298
b_region = self.b[ib:bmatch]
299
matches = SequenceMatcher(None, a_region,
300
b_region).get_matching_blocks()
303
for region_ia, region_ib, region_len in matches[:-1]:
306
reg = self.mismatch_region(next_a, region_ia, next_b,
310
yield 'same', region_ia, region_len+region_ia
311
next_a = region_ia + region_len
312
next_b = region_ib + region_len
313
reg = self.mismatch_region(next_a, amatch, next_b, bmatch)
319
def mismatch_region(next_a, region_ia, next_b, region_ib):
320
if next_a < region_ia or next_b < region_ib:
321
return 'conflict', None, None, next_a, region_ia, next_b, region_ib
324
def find_sync_regions(self):
325
"""Return a list of sync regions, where both descendents match the base.
327
Generates a list of (base1, base2, a1, a2, b1, b2). There is
328
always a zero-length sync region at the end of all the files.
332
amatches = SequenceMatcher(None, self.base, self.a).get_matching_blocks()
333
bmatches = SequenceMatcher(None, self.base, self.b).get_matching_blocks()
334
len_a = len(amatches)
335
len_b = len(bmatches)
339
while ia < len_a and ib < len_b:
340
abase, amatch, alen = amatches[ia]
341
bbase, bmatch, blen = bmatches[ib]
343
# there is an unconflicted block at i; how long does it
344
# extend? until whichever one ends earlier.
345
i = intersect((abase, abase+alen), (bbase, bbase+blen))
349
intlen = intend - intbase
351
# found a match of base[i[0], i[1]]; this may be less than
352
# the region that matches in either one
353
assert intlen <= alen
354
assert intlen <= blen
355
assert abase <= intbase
356
assert bbase <= intbase
358
asub = amatch + (intbase - abase)
359
bsub = bmatch + (intbase - bbase)
363
assert self.base[intbase:intend] == self.a[asub:aend], \
364
(self.base[intbase:intend], self.a[asub:aend])
366
assert self.base[intbase:intend] == self.b[bsub:bend]
368
sl.append((intbase, intend,
372
# advance whichever one ends first in the base text
373
if (abase + alen) < (bbase + blen):
378
intbase = len(self.base)
381
sl.append((intbase, intbase, abase, abase, bbase, bbase))
387
def find_unconflicted(self):
388
"""Return a list of ranges in base that are not conflicted."""
389
am = SequenceMatcher(None, self.base, self.a).get_matching_blocks()
390
bm = SequenceMatcher(None, self.base, self.b).get_matching_blocks()
395
# there is an unconflicted block at i; how long does it
396
# extend? until whichever one ends earlier.
401
i = intersect((a1, a2), (b1, b2))
414
# as for diff3 and meld the syntax is "MINE BASE OTHER"
415
a = file(argv[1], 'rt').readlines()
416
base = file(argv[2], 'rt').readlines()
417
b = file(argv[3], 'rt').readlines()
419
m3 = Merge3(base, a, b)
421
#for sr in m3.find_sync_regions():
424
# sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3]))
425
sys.stdout.writelines(m3.merge_annotated())
428
if __name__ == '__main__':
430
sys.exit(main(sys.argv))