1
# Copyright (C) 2005-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
19
# mbp: "you know that thing where cvs gives you conflict markers?"
29
class CantReprocessAndShowBase(errors.BzrError):
31
_fmt = ("Can't reprocess and show base, because reprocessing obscures "
32
"the relationship of conflicting lines to the base")
35
def intersect(ra, rb):
36
"""Given two ranges return the range where they intersect or None.
38
>>> intersect((0, 10), (0, 6))
40
>>> intersect((0, 10), (5, 15))
42
>>> intersect((0, 10), (10, 15))
43
>>> intersect((0, 9), (10, 15))
44
>>> intersect((0, 9), (7, 15))
47
# preconditions: (ra[0] <= ra[1]) and (rb[0] <= rb[1])
49
sa = max(ra[0], rb[0])
50
sb = min(ra[1], rb[1])
57
def compare_range(a, astart, aend, b, bstart, bend):
58
"""Compare a[astart:aend] == b[bstart:bend], without slicing.
60
if (aend-astart) != (bend-bstart):
62
for ia, ib in zip(range(astart, aend), range(bstart, bend)):
70
"""3-way merge of texts.
72
Given BASE, OTHER, THIS, tries to produce a combined text
73
incorporating the changes from both BASE->OTHER and BASE->THIS.
74
All three will typically be sequences of lines."""
76
def __init__(self, base, a, b, is_cherrypick=False, allow_objects=False):
79
:param base: lines in BASE
82
:param is_cherrypick: flag indicating if this merge is a cherrypick.
83
When cherrypicking b => a, matches with b and base do not conflict.
84
:param allow_objects: if True, do not require that base, a and b are
85
plain Python strs. Also prevents BinaryFile from being raised.
86
Lines can be any sequence of comparable and hashable Python
90
textfile.check_text_lines(base)
91
textfile.check_text_lines(a)
92
textfile.check_text_lines(b)
96
self.is_cherrypick = is_cherrypick
102
start_marker=b'<<<<<<<',
103
mid_marker=b'=======',
104
end_marker=b'>>>>>>>',
107
"""Return merge in cvs-like form.
111
if self.a[0].endswith(b'\r\n'):
113
elif self.a[0].endswith(b'\r'):
115
if base_marker and reprocess:
116
raise CantReprocessAndShowBase()
118
start_marker = start_marker + b' ' + name_a
120
end_marker = end_marker + b' ' + name_b
121
if name_base and base_marker:
122
base_marker = base_marker + b' ' + name_base
123
merge_regions = self.merge_regions()
124
if reprocess is True:
125
merge_regions = self.reprocess_merge_regions(merge_regions)
126
for t in merge_regions:
128
if what == 'unchanged':
129
for i in range(t[1], t[2]):
131
elif what == 'a' or what == 'same':
132
for i in range(t[1], t[2]):
135
for i in range(t[1], t[2]):
137
elif what == 'conflict':
138
yield start_marker + newline
139
for i in range(t[3], t[4]):
141
if base_marker is not None:
142
yield base_marker + newline
143
for i in range(t[1], t[2]):
145
yield mid_marker + newline
146
for i in range(t[5], t[6]):
148
yield end_marker + newline
150
raise ValueError(what)
152
def merge_annotated(self):
153
"""Return merge with conflicts, showing origin of lines.
155
Most useful for debugging merge.
157
for t in self.merge_regions():
159
if what == 'unchanged':
160
for i in range(t[1], t[2]):
161
yield 'u | ' + self.base[i]
162
elif what == 'a' or what == 'same':
163
for i in range(t[1], t[2]):
164
yield what[0] + ' | ' + self.a[i]
166
for i in range(t[1], t[2]):
167
yield 'b | ' + self.b[i]
168
elif what == 'conflict':
170
for i in range(t[3], t[4]):
171
yield 'A | ' + self.a[i]
173
for i in range(t[5], t[6]):
174
yield 'B | ' + self.b[i]
177
raise ValueError(what)
179
def merge_groups(self):
180
"""Yield sequence of line groups. Each one is a tuple:
183
Lines unchanged from base
189
Lines taken from a (and equal to b)
194
'conflict', base_lines, a_lines, b_lines
195
Lines from base were changed to either a or b and conflict.
197
for t in self.merge_regions():
199
if what == 'unchanged':
200
yield what, self.base[t[1]:t[2]]
201
elif what == 'a' or what == 'same':
202
yield what, self.a[t[1]:t[2]]
204
yield what, self.b[t[1]:t[2]]
205
elif what == 'conflict':
207
self.base[t[1]:t[2]],
211
raise ValueError(what)
213
def merge_regions(self):
214
"""Return sequences of matching and conflicting regions.
216
This returns tuples, where the first value says what kind we
219
'unchanged', start, end
220
Take a region of base[start:end]
223
b and a are different from base but give the same result
226
Non-clashing insertion from a[start:end]
228
Method is as follows:
230
The two sequences align only on regions which match the base
231
and both descendents. These are found by doing a two-way diff
232
of each one against the base, and then finding the
233
intersections between those regions. These "sync regions"
234
are by definition unchanged in both and easily dealt with.
236
The regions in between can be in any of three cases:
237
conflicted, or changed on only one side.
240
# section a[0:ia] has been disposed of, etc
243
for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions():
244
matchlen = zend - zmatch
247
# matchlen == (aend - amatch)
248
# matchlen == (bend - bmatch)
251
len_base = zmatch - iz
255
# assert len_base >= 0
257
#print 'unmatched a=%d, b=%d' % (len_a, len_b)
260
# try to avoid actually slicing the lists
261
same = compare_range(self.a, ia, amatch,
265
yield 'same', ia, amatch
267
equal_a = compare_range(self.a, ia, amatch,
268
self.base, iz, zmatch)
269
equal_b = compare_range(self.b, ib, bmatch,
270
self.base, iz, zmatch)
271
if equal_a and not equal_b:
272
yield 'b', ib, bmatch
273
elif equal_b and not equal_a:
274
yield 'a', ia, amatch
275
elif not equal_a and not equal_b:
276
if self.is_cherrypick:
277
for node in self._refine_cherrypick_conflict(
278
iz, zmatch, ia, amatch,
282
yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
284
raise AssertionError("can't handle a=b=base but unmatched")
290
# if the same part of the base was deleted on both sides
291
# that's OK, we can just skip it.
295
# assert ia == amatch
296
# assert ib == bmatch
297
# assert iz == zmatch
299
yield 'unchanged', zmatch, zend
304
def _refine_cherrypick_conflict(self, zstart, zend, astart, aend, bstart, bend):
305
"""When cherrypicking b => a, ignore matches with b and base."""
306
# Do not emit regions which match, only regions which do not match
307
matches = patiencediff.PatienceSequenceMatcher(None,
308
self.base[zstart:zend], self.b[bstart:bend]).get_matching_blocks()
313
for base_idx, b_idx, match_len in matches:
314
conflict_z_len = base_idx - last_base_idx
315
conflict_b_len = b_idx - last_b_idx
316
if conflict_b_len == 0: # There are no lines in b which conflict,
322
zstart + last_base_idx, zstart + base_idx,
323
aend, aend, bstart + last_b_idx, bstart + b_idx)
325
# The first conflict gets the a-range
327
yield ('conflict', zstart + last_base_idx, zstart +
329
astart, aend, bstart + last_b_idx, bstart + b_idx)
330
last_base_idx = base_idx + match_len
331
last_b_idx = b_idx + match_len
332
if last_base_idx != zend - zstart or last_b_idx != bend - bstart:
334
yield ('conflict', zstart + last_base_idx, zstart + base_idx,
335
aend, aend, bstart + last_b_idx, bstart + b_idx)
337
# The first conflict gets the a-range
339
yield ('conflict', zstart + last_base_idx, zstart + base_idx,
340
astart, aend, bstart + last_b_idx, bstart + b_idx)
342
yield ('conflict', zstart, zend, astart, aend, bstart, bend)
344
def reprocess_merge_regions(self, merge_regions):
345
"""Where there are conflict regions, remove the agreed lines.
347
Lines where both A and B have made the same changes are
350
for region in merge_regions:
351
if region[0] != "conflict":
354
type, iz, zmatch, ia, amatch, ib, bmatch = region
355
a_region = self.a[ia:amatch]
356
b_region = self.b[ib:bmatch]
357
matches = patiencediff.PatienceSequenceMatcher(
358
None, a_region, b_region).get_matching_blocks()
361
for region_ia, region_ib, region_len in matches[:-1]:
364
reg = self.mismatch_region(next_a, region_ia, next_b,
368
yield 'same', region_ia, region_len+region_ia
369
next_a = region_ia + region_len
370
next_b = region_ib + region_len
371
reg = self.mismatch_region(next_a, amatch, next_b, bmatch)
376
def mismatch_region(next_a, region_ia, next_b, region_ib):
377
if next_a < region_ia or next_b < region_ib:
378
return 'conflict', None, None, next_a, region_ia, next_b, region_ib
380
def find_sync_regions(self):
381
"""Return a list of sync regions, where both descendents match the base.
383
Generates a list of (base1, base2, a1, a2, b1, b2). There is
384
always a zero-length sync region at the end of all the files.
388
amatches = patiencediff.PatienceSequenceMatcher(
389
None, self.base, self.a).get_matching_blocks()
390
bmatches = patiencediff.PatienceSequenceMatcher(
391
None, self.base, self.b).get_matching_blocks()
392
len_a = len(amatches)
393
len_b = len(bmatches)
397
while ia < len_a and ib < len_b:
398
abase, amatch, alen = amatches[ia]
399
bbase, bmatch, blen = bmatches[ib]
401
# there is an unconflicted block at i; how long does it
402
# extend? until whichever one ends earlier.
403
i = intersect((abase, abase+alen), (bbase, bbase+blen))
407
intlen = intend - intbase
409
# found a match of base[i[0], i[1]]; this may be less than
410
# the region that matches in either one
411
# assert intlen <= alen
412
# assert intlen <= blen
413
# assert abase <= intbase
414
# assert bbase <= intbase
416
asub = amatch + (intbase - abase)
417
bsub = bmatch + (intbase - bbase)
421
# assert self.base[intbase:intend] == self.a[asub:aend], \
422
# (self.base[intbase:intend], self.a[asub:aend])
423
# assert self.base[intbase:intend] == self.b[bsub:bend]
425
sl.append((intbase, intend,
428
# advance whichever one ends first in the base text
429
if (abase + alen) < (bbase + blen):
434
intbase = len(self.base)
437
sl.append((intbase, intbase, abase, abase, bbase, bbase))
441
def find_unconflicted(self):
442
"""Return a list of ranges in base that are not conflicted."""
443
am = patiencediff.PatienceSequenceMatcher(
444
None, self.base, self.a).get_matching_blocks()
445
bm = patiencediff.PatienceSequenceMatcher(
446
None, self.base, self.b).get_matching_blocks()
451
# there is an unconflicted block at i; how long does it
452
# extend? until whichever one ends earlier.
457
i = intersect((a1, a2), (b1, b2))
470
# as for diff3 and meld the syntax is "MINE BASE OTHER"
471
with open(argv[1], 'rt') as f:
473
with open(argv[2], 'rt') as f:
475
with open(argv[3], 'rt') as f:
478
m3 = Merge3(base, a, b)
480
#for sr in m3.find_sync_regions():
483
# sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3]))
484
sys.stdout.writelines(m3.merge_annotated())
487
if __name__ == '__main__':
489
sys.exit(main(sys.argv))