1
# Copyright (C) 2004, 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
# mbp: "you know that thing where cvs gives you conflict markers?"
22
from bzrlib.errors import CantReprocessAndShowBase
23
import bzrlib.patiencediff
24
from bzrlib.textfile import check_text_lines
27
def intersect(ra, rb):
28
"""Given two ranges return the range where they intersect or None.
30
>>> intersect((0, 10), (0, 6))
32
>>> intersect((0, 10), (5, 15))
34
>>> intersect((0, 10), (10, 15))
35
>>> intersect((0, 9), (10, 15))
36
>>> intersect((0, 9), (7, 15))
39
# preconditions: (ra[0] <= ra[1]) and (rb[0] <= rb[1])
41
sa = max(ra[0], rb[0])
42
sb = min(ra[1], rb[1])
49
def compare_range(a, astart, aend, b, bstart, bend):
50
"""Compare a[astart:aend] == b[bstart:bend], without slicing.
52
if (aend-astart) != (bend-bstart):
54
for ia, ib in zip(xrange(astart, aend), xrange(bstart, bend)):
64
"""3-way merge of texts.
66
Given BASE, OTHER, THIS, tries to produce a combined text
67
incorporating the changes from both BASE->OTHER and BASE->THIS.
68
All three will typically be sequences of lines."""
70
def __init__(self, base, a, b, is_cherrypick=False, allow_objects=False):
73
:param base: lines in BASE
76
:param is_cherrypick: flag indicating if this merge is a cherrypick.
77
When cherrypicking b => a, matches with b and base do not conflict.
78
:param allow_objects: if True, do not require that base, a and b are
79
plain Python strs. Also prevents BinaryFile from being raised.
80
Lines can be any sequence of comparable and hashable Python
84
check_text_lines(base)
90
self.is_cherrypick = is_cherrypick
96
start_marker='<<<<<<<',
101
"""Return merge in cvs-like form.
105
if self.a[0].endswith('\r\n'):
107
elif self.a[0].endswith('\r'):
109
if base_marker and reprocess:
110
raise CantReprocessAndShowBase()
112
start_marker = start_marker + ' ' + name_a
114
end_marker = end_marker + ' ' + name_b
115
if name_base and base_marker:
116
base_marker = base_marker + ' ' + name_base
117
merge_regions = self.merge_regions()
118
if reprocess is True:
119
merge_regions = self.reprocess_merge_regions(merge_regions)
120
for t in merge_regions:
122
if what == 'unchanged':
123
for i in range(t[1], t[2]):
125
elif what == 'a' or what == 'same':
126
for i in range(t[1], t[2]):
129
for i in range(t[1], t[2]):
131
elif what == 'conflict':
132
yield start_marker + newline
133
for i in range(t[3], t[4]):
135
if base_marker is not None:
136
yield base_marker + newline
137
for i in range(t[1], t[2]):
139
yield mid_marker + newline
140
for i in range(t[5], t[6]):
142
yield end_marker + newline
144
raise ValueError(what)
146
def merge_annotated(self):
147
"""Return merge with conflicts, showing origin of lines.
149
Most useful for debugging merge.
151
for t in self.merge_regions():
153
if what == 'unchanged':
154
for i in range(t[1], t[2]):
155
yield 'u | ' + self.base[i]
156
elif what == 'a' or what == 'same':
157
for i in range(t[1], t[2]):
158
yield what[0] + ' | ' + self.a[i]
160
for i in range(t[1], t[2]):
161
yield 'b | ' + self.b[i]
162
elif what == 'conflict':
164
for i in range(t[3], t[4]):
165
yield 'A | ' + self.a[i]
167
for i in range(t[5], t[6]):
168
yield 'B | ' + self.b[i]
171
raise ValueError(what)
173
def merge_groups(self):
174
"""Yield sequence of line groups. Each one is a tuple:
177
Lines unchanged from base
183
Lines taken from a (and equal to b)
188
'conflict', base_lines, a_lines, b_lines
189
Lines from base were changed to either a or b and conflict.
191
for t in self.merge_regions():
193
if what == 'unchanged':
194
yield what, self.base[t[1]:t[2]]
195
elif what == 'a' or what == 'same':
196
yield what, self.a[t[1]:t[2]]
198
yield what, self.b[t[1]:t[2]]
199
elif what == 'conflict':
201
self.base[t[1]:t[2]],
205
raise ValueError(what)
207
def merge_regions(self):
208
"""Return sequences of matching and conflicting regions.
210
This returns tuples, where the first value says what kind we
213
'unchanged', start, end
214
Take a region of base[start:end]
217
b and a are different from base but give the same result
220
Non-clashing insertion from a[start:end]
222
Method is as follows:
224
The two sequences align only on regions which match the base
225
and both descendents. These are found by doing a two-way diff
226
of each one against the base, and then finding the
227
intersections between those regions. These "sync regions"
228
are by definition unchanged in both and easily dealt with.
230
The regions in between can be in any of three cases:
231
conflicted, or changed on only one side.
234
# section a[0:ia] has been disposed of, etc
237
for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions():
238
matchlen = zend - zmatch
241
# matchlen == (aend - amatch)
242
# matchlen == (bend - bmatch)
245
len_base = zmatch - iz
249
# assert len_base >= 0
251
#print 'unmatched a=%d, b=%d' % (len_a, len_b)
254
# try to avoid actually slicing the lists
255
same = compare_range(self.a, ia, amatch,
259
yield 'same', ia, amatch
261
equal_a = compare_range(self.a, ia, amatch,
262
self.base, iz, zmatch)
263
equal_b = compare_range(self.b, ib, bmatch,
264
self.base, iz, zmatch)
265
if equal_a and not equal_b:
266
yield 'b', ib, bmatch
267
elif equal_b and not equal_a:
268
yield 'a', ia, amatch
269
elif not equal_a and not equal_b:
270
if self.is_cherrypick:
271
for node in self._refine_cherrypick_conflict(
272
iz, zmatch, ia, amatch,
276
yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
278
raise AssertionError("can't handle a=b=base but unmatched")
284
# if the same part of the base was deleted on both sides
285
# that's OK, we can just skip it.
289
# assert ia == amatch
290
# assert ib == bmatch
291
# assert iz == zmatch
293
yield 'unchanged', zmatch, zend
298
def _refine_cherrypick_conflict(self, zstart, zend, astart, aend, bstart, bend):
299
"""When cherrypicking b => a, ignore matches with b and base."""
300
# Do not emit regions which match, only regions which do not match
301
matches = bzrlib.patiencediff.PatienceSequenceMatcher(None,
302
self.base[zstart:zend], self.b[bstart:bend]).get_matching_blocks()
307
for base_idx, b_idx, match_len in matches:
308
conflict_z_len = base_idx - last_base_idx
309
conflict_b_len = b_idx - last_b_idx
310
if conflict_b_len == 0: # There are no lines in b which conflict,
316
zstart + last_base_idx, zstart + base_idx,
317
aend, aend, bstart + last_b_idx, bstart + b_idx)
319
# The first conflict gets the a-range
321
yield ('conflict', zstart + last_base_idx, zstart +
323
astart, aend, bstart + last_b_idx, bstart + b_idx)
324
last_base_idx = base_idx + match_len
325
last_b_idx = b_idx + match_len
326
if last_base_idx != zend - zstart or last_b_idx != bend - bstart:
328
yield ('conflict', zstart + last_base_idx, zstart + base_idx,
329
aend, aend, bstart + last_b_idx, bstart + b_idx)
331
# The first conflict gets the a-range
333
yield ('conflict', zstart + last_base_idx, zstart + base_idx,
334
astart, aend, bstart + last_b_idx, bstart + b_idx)
336
yield ('conflict', zstart, zend, astart, aend, bstart, bend)
338
def reprocess_merge_regions(self, merge_regions):
339
"""Where there are conflict regions, remove the agreed lines.
341
Lines where both A and B have made the same changes are
344
for region in merge_regions:
345
if region[0] != "conflict":
348
type, iz, zmatch, ia, amatch, ib, bmatch = region
349
a_region = self.a[ia:amatch]
350
b_region = self.b[ib:bmatch]
351
matches = bzrlib.patiencediff.PatienceSequenceMatcher(
352
None, a_region, b_region).get_matching_blocks()
355
for region_ia, region_ib, region_len in matches[:-1]:
358
reg = self.mismatch_region(next_a, region_ia, next_b,
362
yield 'same', region_ia, region_len+region_ia
363
next_a = region_ia + region_len
364
next_b = region_ib + region_len
365
reg = self.mismatch_region(next_a, amatch, next_b, bmatch)
370
def mismatch_region(next_a, region_ia, next_b, region_ib):
371
if next_a < region_ia or next_b < region_ib:
372
return 'conflict', None, None, next_a, region_ia, next_b, region_ib
374
def find_sync_regions(self):
375
"""Return a list of sync regions, where both descendents match the base.
377
Generates a list of (base1, base2, a1, a2, b1, b2). There is
378
always a zero-length sync region at the end of all the files.
382
amatches = bzrlib.patiencediff.PatienceSequenceMatcher(
383
None, self.base, self.a).get_matching_blocks()
384
bmatches = bzrlib.patiencediff.PatienceSequenceMatcher(
385
None, self.base, self.b).get_matching_blocks()
386
len_a = len(amatches)
387
len_b = len(bmatches)
391
while ia < len_a and ib < len_b:
392
abase, amatch, alen = amatches[ia]
393
bbase, bmatch, blen = bmatches[ib]
395
# there is an unconflicted block at i; how long does it
396
# extend? until whichever one ends earlier.
397
i = intersect((abase, abase+alen), (bbase, bbase+blen))
401
intlen = intend - intbase
403
# found a match of base[i[0], i[1]]; this may be less than
404
# the region that matches in either one
405
# assert intlen <= alen
406
# assert intlen <= blen
407
# assert abase <= intbase
408
# assert bbase <= intbase
410
asub = amatch + (intbase - abase)
411
bsub = bmatch + (intbase - bbase)
415
# assert self.base[intbase:intend] == self.a[asub:aend], \
416
# (self.base[intbase:intend], self.a[asub:aend])
417
# assert self.base[intbase:intend] == self.b[bsub:bend]
419
sl.append((intbase, intend,
422
# advance whichever one ends first in the base text
423
if (abase + alen) < (bbase + blen):
428
intbase = len(self.base)
431
sl.append((intbase, intbase, abase, abase, bbase, bbase))
435
def find_unconflicted(self):
436
"""Return a list of ranges in base that are not conflicted."""
437
am = bzrlib.patiencediff.PatienceSequenceMatcher(
438
None, self.base, self.a).get_matching_blocks()
439
bm = bzrlib.patiencediff.PatienceSequenceMatcher(
440
None, self.base, self.b).get_matching_blocks()
445
# there is an unconflicted block at i; how long does it
446
# extend? until whichever one ends earlier.
451
i = intersect((a1, a2), (b1, b2))
464
# as for diff3 and meld the syntax is "MINE BASE OTHER"
465
a = file(argv[1], 'rt').readlines()
466
base = file(argv[2], 'rt').readlines()
467
b = file(argv[3], 'rt').readlines()
469
m3 = Merge3(base, a, b)
471
#for sr in m3.find_sync_regions():
474
# sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3]))
475
sys.stdout.writelines(m3.merge_annotated())
478
if __name__ == '__main__':
480
sys.exit(main(sys.argv))