1
# Copyright (C) 2004, 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# mbp: "you know that thing where cvs gives you conflict markers?"
22
from bzrlib.errors import CantReprocessAndShowBase
23
import bzrlib.patiencediff
24
from bzrlib.textfile import check_text_lines
27
def intersect(ra, rb):
28
"""Given two ranges return the range where they intersect or None.
30
>>> intersect((0, 10), (0, 6))
32
>>> intersect((0, 10), (5, 15))
34
>>> intersect((0, 10), (10, 15))
35
>>> intersect((0, 9), (10, 15))
36
>>> intersect((0, 9), (7, 15))
42
sa = max(ra[0], rb[0])
43
sb = min(ra[1], rb[1])
50
def compare_range(a, astart, aend, b, bstart, bend):
51
"""Compare a[astart:aend] == b[bstart:bend], without slicing.
53
if (aend-astart) != (bend-bstart):
55
for ia, ib in zip(xrange(astart, aend), xrange(bstart, bend)):
65
"""3-way merge of texts.
67
Given BASE, OTHER, THIS, tries to produce a combined text
68
incorporating the changes from both BASE->OTHER and BASE->THIS.
69
All three will typically be sequences of lines."""
70
def __init__(self, base, a, b, is_cherrypick=False):
71
check_text_lines(base)
77
self.is_cherrypick = is_cherrypick
83
start_marker='<<<<<<<',
88
"""Return merge in cvs-like form.
92
if self.a[0].endswith('\r\n'):
94
elif self.a[0].endswith('\r'):
96
if base_marker and reprocess:
97
raise CantReprocessAndShowBase()
99
start_marker = start_marker + ' ' + name_a
101
end_marker = end_marker + ' ' + name_b
102
if name_base and base_marker:
103
base_marker = base_marker + ' ' + name_base
104
merge_regions = self.merge_regions()
105
if reprocess is True:
106
merge_regions = self.reprocess_merge_regions(merge_regions)
107
for t in merge_regions:
109
if what == 'unchanged':
110
for i in range(t[1], t[2]):
112
elif what == 'a' or what == 'same':
113
for i in range(t[1], t[2]):
116
for i in range(t[1], t[2]):
118
elif what == 'conflict':
119
yield start_marker + newline
120
for i in range(t[3], t[4]):
122
if base_marker is not None:
123
yield base_marker + newline
124
for i in range(t[1], t[2]):
126
yield mid_marker + newline
127
for i in range(t[5], t[6]):
129
yield end_marker + newline
131
raise ValueError(what)
133
def merge_annotated(self):
134
"""Return merge with conflicts, showing origin of lines.
136
Most useful for debugging merge.
138
for t in self.merge_regions():
140
if what == 'unchanged':
141
for i in range(t[1], t[2]):
142
yield 'u | ' + self.base[i]
143
elif what == 'a' or what == 'same':
144
for i in range(t[1], t[2]):
145
yield what[0] + ' | ' + self.a[i]
147
for i in range(t[1], t[2]):
148
yield 'b | ' + self.b[i]
149
elif what == 'conflict':
151
for i in range(t[3], t[4]):
152
yield 'A | ' + self.a[i]
154
for i in range(t[5], t[6]):
155
yield 'B | ' + self.b[i]
158
raise ValueError(what)
160
def merge_groups(self):
161
"""Yield sequence of line groups. Each one is a tuple:
164
Lines unchanged from base
170
Lines taken from a (and equal to b)
175
'conflict', base_lines, a_lines, b_lines
176
Lines from base were changed to either a or b and conflict.
178
for t in self.merge_regions():
180
if what == 'unchanged':
181
yield what, self.base[t[1]:t[2]]
182
elif what == 'a' or what == 'same':
183
yield what, self.a[t[1]:t[2]]
185
yield what, self.b[t[1]:t[2]]
186
elif what == 'conflict':
188
self.base[t[1]:t[2]],
192
raise ValueError(what)
194
def merge_regions(self):
195
"""Return sequences of matching and conflicting regions.
197
This returns tuples, where the first value says what kind we
200
'unchanged', start, end
201
Take a region of base[start:end]
204
b and a are different from base but give the same result
207
Non-clashing insertion from a[start:end]
209
Method is as follows:
211
The two sequences align only on regions which match the base
212
and both descendents. These are found by doing a two-way diff
213
of each one against the base, and then finding the
214
intersections between those regions. These "sync regions"
215
are by definition unchanged in both and easily dealt with.
217
The regions in between can be in any of three cases:
218
conflicted, or changed on only one side.
221
# section a[0:ia] has been disposed of, etc
224
for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions():
225
#print 'match base [%d:%d]' % (zmatch, zend)
227
matchlen = zend - zmatch
229
assert matchlen == (aend - amatch)
230
assert matchlen == (bend - bmatch)
234
len_base = zmatch - iz
239
#print 'unmatched a=%d, b=%d' % (len_a, len_b)
242
# try to avoid actually slicing the lists
243
same = compare_range(self.a, ia, amatch,
247
yield 'same', ia, amatch
249
equal_a = compare_range(self.a, ia, amatch,
250
self.base, iz, zmatch)
251
equal_b = compare_range(self.b, ib, bmatch,
252
self.base, iz, zmatch)
253
if equal_a and not equal_b:
254
yield 'b', ib, bmatch
255
elif equal_b and not equal_a:
256
yield 'a', ia, amatch
257
elif not equal_a and not equal_b:
258
if self.is_cherrypick:
259
for node in self._refine_cherrypick_conflict(
260
iz, zmatch, ia, amatch,
264
yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
266
raise AssertionError("can't handle a=b=base but unmatched")
272
# if the same part of the base was deleted on both sides
273
# that's OK, we can just skip it.
280
yield 'unchanged', zmatch, zend
285
def _refine_cherrypick_conflict(self, zstart, zend, astart, aend, bstart, bend):
286
"""When cherrypicking b => a, ignore matches with b and base."""
287
# Do not emit regions which match, only regions which do not match
288
matches = bzrlib.patiencediff.PatienceSequenceMatcher(None,
289
self.base[zstart:zend], self.b[bstart:bend]).get_matching_blocks()
293
for region_iz, region_ib, region_len in matches:
294
conflict_z_len = region_iz - zzcur
295
conflict_b_len = region_ib - bbcur
296
if conflict_b_len == 0: # There are no lines in b which conflict,
301
yield ('conflict', zstart + zzcur, zstart + region_iz,
302
aend, aend, bstart + bbcur, bstart + region_ib)
304
# The first conflict gets the a-range
306
yield ('conflict', zstart + zzcur, zstart + region_iz,
307
astart, aend, bstart + bbcur, bstart + region_ib)
308
zzcur = region_iz + region_len
309
bbcur = region_ib + region_len
310
if zzcur != zend - zstart or bbcur != bend - bstart:
312
yield ('conflict', zstart + zzcur, zstart + region_iz,
313
aend, aend, bstart + bbcur, bstart + region_ib)
315
# The first conflict gets the a-range
317
yield ('conflict', zstart + zzcur, zstart + region_iz,
318
astart, aend, bstart + bbcur, bstart + region_ib)
320
yield ('conflict', zstart, zend, astart, aend, bstart, bend)
322
def reprocess_merge_regions(self, merge_regions):
323
"""Where there are conflict regions, remove the agreed lines.
325
Lines where both A and B have made the same changes are
328
for region in merge_regions:
329
if region[0] != "conflict":
332
type, iz, zmatch, ia, amatch, ib, bmatch = region
333
a_region = self.a[ia:amatch]
334
b_region = self.b[ib:bmatch]
335
matches = bzrlib.patiencediff.PatienceSequenceMatcher(
336
None, a_region, b_region).get_matching_blocks()
339
for region_ia, region_ib, region_len in matches[:-1]:
342
reg = self.mismatch_region(next_a, region_ia, next_b,
346
yield 'same', region_ia, region_len+region_ia
347
next_a = region_ia + region_len
348
next_b = region_ib + region_len
349
reg = self.mismatch_region(next_a, amatch, next_b, bmatch)
354
def mismatch_region(next_a, region_ia, next_b, region_ib):
355
if next_a < region_ia or next_b < region_ib:
356
return 'conflict', None, None, next_a, region_ia, next_b, region_ib
358
def find_sync_regions(self):
359
"""Return a list of sync regions, where both descendents match the base.
361
Generates a list of (base1, base2, a1, a2, b1, b2). There is
362
always a zero-length sync region at the end of all the files.
366
amatches = bzrlib.patiencediff.PatienceSequenceMatcher(
367
None, self.base, self.a).get_matching_blocks()
368
bmatches = bzrlib.patiencediff.PatienceSequenceMatcher(
369
None, self.base, self.b).get_matching_blocks()
370
len_a = len(amatches)
371
len_b = len(bmatches)
375
while ia < len_a and ib < len_b:
376
abase, amatch, alen = amatches[ia]
377
bbase, bmatch, blen = bmatches[ib]
379
# there is an unconflicted block at i; how long does it
380
# extend? until whichever one ends earlier.
381
i = intersect((abase, abase+alen), (bbase, bbase+blen))
385
intlen = intend - intbase
387
# found a match of base[i[0], i[1]]; this may be less than
388
# the region that matches in either one
389
assert intlen <= alen
390
assert intlen <= blen
391
assert abase <= intbase
392
assert bbase <= intbase
394
asub = amatch + (intbase - abase)
395
bsub = bmatch + (intbase - bbase)
399
# XXX: How much overhead is involved in slicing all of these
400
# and doing an extra comparison
401
assert self.base[intbase:intend] == self.a[asub:aend], \
402
(self.base[intbase:intend], self.a[asub:aend])
404
assert self.base[intbase:intend] == self.b[bsub:bend]
406
sl.append((intbase, intend,
410
# advance whichever one ends first in the base text
411
if (abase + alen) < (bbase + blen):
416
intbase = len(self.base)
419
sl.append((intbase, intbase, abase, abase, bbase, bbase))
423
def find_unconflicted(self):
424
"""Return a list of ranges in base that are not conflicted."""
425
am = bzrlib.patiencediff.PatienceSequenceMatcher(
426
None, self.base, self.a).get_matching_blocks()
427
bm = bzrlib.patiencediff.PatienceSequenceMatcher(
428
None, self.base, self.b).get_matching_blocks()
433
# there is an unconflicted block at i; how long does it
434
# extend? until whichever one ends earlier.
439
i = intersect((a1, a2), (b1, b2))
452
# as for diff3 and meld the syntax is "MINE BASE OTHER"
453
a = file(argv[1], 'rt').readlines()
454
base = file(argv[2], 'rt').readlines()
455
b = file(argv[3], 'rt').readlines()
457
m3 = Merge3(base, a, b)
459
#for sr in m3.find_sync_regions():
462
# sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3]))
463
sys.stdout.writelines(m3.merge_annotated())
466
if __name__ == '__main__':
468
sys.exit(main(sys.argv))