1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
1
# Copyright (C) 2004 - 2006 Aaron Bentley
2
2
# <aaron.bentley@utoronto.ca>
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
from __future__ import absolute_import
28
binary_files_re = b'Binary files (.*) and (.*) differ\n'
31
class PatchSyntax(BzrError):
32
"""Base class for patch syntax errors."""
35
class BinaryFiles(BzrError):
37
_fmt = 'Binary files section encountered.'
39
def __init__(self, orig_name, mod_name):
40
self.orig_name = orig_name
41
self.mod_name = mod_name
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
class PatchSyntax(Exception):
20
def __init__(self, msg):
21
Exception.__init__(self, msg)
44
24
class MalformedPatchHeader(PatchSyntax):
46
_fmt = "Malformed patch header. %(desc)s\n%(line)r"
48
def __init__(self, desc, line):
25
def __init__(self, desc, line):
28
msg = "Malformed patch header. %s\n%r" % (self.desc, self.line)
29
PatchSyntax.__init__(self, msg)
32
class MalformedHunkHeader(PatchSyntax):
33
def __init__(self, desc, line):
36
msg = "Malformed hunk header. %s\n%r" % (self.desc, self.line)
37
PatchSyntax.__init__(self, msg)
53
40
class MalformedLine(PatchSyntax):
55
_fmt = "Malformed line. %(desc)s\n%(line)r"
57
41
def __init__(self, desc, line):
62
class PatchConflict(BzrError):
64
_fmt = ('Text contents mismatch at line %(line_no)d. Original has '
65
'"%(orig_line)s", but patch says it should be "%(patch_line)s"')
44
msg = "Malformed line. %s\n%s" % (self.desc, self.line)
45
PatchSyntax.__init__(self, msg)
48
class PatchConflict(Exception):
67
49
def __init__(self, line_no, orig_line, patch_line):
68
self.line_no = line_no
69
self.orig_line = orig_line.rstrip('\n')
70
self.patch_line = patch_line.rstrip('\n')
73
class MalformedHunkHeader(PatchSyntax):
75
_fmt = "Malformed hunk header. %(desc)s\n%(line)r"
77
def __init__(self, desc, line):
50
orig = orig_line.rstrip('\n')
51
patch = str(patch_line).rstrip('\n')
52
msg = 'Text contents mismatch at line %d. Original has "%s",'\
53
' but patch says it should be "%s"' % (line_no, orig, patch)
54
Exception.__init__(self, msg)
82
57
def get_patch_names(iter_lines):
83
line = next(iter_lines)
85
match = re.match(binary_files_re, line)
87
raise BinaryFiles(match.group(1), match.group(2))
88
if not line.startswith(b"--- "):
59
line = iter_lines.next()
60
if not line.startswith("--- "):
89
61
raise MalformedPatchHeader("No orig name", line)
91
orig_name = line[4:].rstrip(b"\n")
63
orig_name = line[4:].rstrip("\n")
92
64
except StopIteration:
93
65
raise MalformedPatchHeader("No orig line", "")
95
line = next(iter_lines)
96
if not line.startswith(b"+++ "):
67
line = iter_lines.next()
68
if not line.startswith("+++ "):
97
69
raise PatchSyntax("No mod name")
99
mod_name = line[4:].rstrip(b"\n")
71
mod_name = line[4:].rstrip("\n")
100
72
except StopIteration:
101
73
raise MalformedPatchHeader("No mod line", "")
102
74
return (orig_name, mod_name)
110
82
:return: the position and range, as a tuple
111
83
:rtype: (int, int)
113
tmp = textrange.split(b',')
85
tmp = textrange.split(',')
118
90
(pos, range) = tmp
120
92
range = int(range)
121
93
return (pos, range)
124
96
def hunk_from_header(line):
126
matches = re.match(br'\@\@ ([^@]*) \@\@( (.*))?\n', line)
128
raise MalformedHunkHeader("Does not match format.", line)
97
if not line.startswith("@@") or not line.endswith("@@\n") \
99
raise MalformedHunkHeader("Does not start and end with @@.", line)
130
(orig, mod) = matches.group(1).split(b" ")
131
except (ValueError, IndexError) as e:
101
(orig, mod) = line[3:-4].split(" ")
132
103
raise MalformedHunkHeader(str(e), line)
133
if not orig.startswith(b'-') or not mod.startswith(b'+'):
104
if not orig.startswith('-') or not mod.startswith('+'):
134
105
raise MalformedHunkHeader("Positions don't start with + or -.", line)
136
107
(orig_pos, orig_range) = parse_range(orig[1:])
137
108
(mod_pos, mod_range) = parse_range(mod[1:])
138
except (ValueError, IndexError) as e:
139
110
raise MalformedHunkHeader(str(e), line)
140
111
if mod_range < 0 or orig_range < 0:
141
112
raise MalformedHunkHeader("Hunk range is negative", line)
142
tail = matches.group(3)
143
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
146
class HunkLine(object):
113
return Hunk(orig_pos, orig_range, mod_pos, mod_range)
148
117
def __init__(self, contents):
149
118
self.contents = contents
151
120
def get_str(self, leadchar):
152
if self.contents == b"\n" and leadchar == b" " and False:
154
if not self.contents.endswith(b'\n'):
155
terminator = b'\n' + NO_NL
121
if self.contents == "\n" and leadchar == " " and False:
123
if not self.contents.endswith('\n'):
124
terminator = '\n' + NO_NL
158
127
return leadchar + self.contents + terminator
161
raise NotImplementedError
164
130
class ContextLine(HunkLine):
166
131
def __init__(self, contents):
167
132
HunkLine.__init__(self, contents)
170
return self.get_str(b" ")
135
return self.get_str(" ")
173
138
class InsertLine(HunkLine):
174
139
def __init__(self, contents):
175
140
HunkLine.__init__(self, contents)
178
return self.get_str(b"+")
143
return self.get_str("+")
181
146
class RemoveLine(HunkLine):
182
147
def __init__(self, contents):
183
148
HunkLine.__init__(self, contents)
186
return self.get_str(b"-")
189
NO_NL = b'\\ No newline at end of file\n'
190
__pychecker__ = "no-returnvalues"
151
return self.get_str("-")
153
NO_NL = '\\ No newline at end of file\n'
154
__pychecker__="no-returnvalues"
193
156
def parse_line(line):
194
if line.startswith(b"\n"):
157
if line.startswith("\n"):
195
158
return ContextLine(line)
196
elif line.startswith(b" "):
159
elif line.startswith(" "):
197
160
return ContextLine(line[1:])
198
elif line.startswith(b"+"):
161
elif line.startswith("+"):
199
162
return InsertLine(line[1:])
200
elif line.startswith(b"-"):
163
elif line.startswith("-"):
201
164
return RemoveLine(line[1:])
203
168
raise MalformedLine("Unknown line type", line)
211
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
173
def __init__(self, orig_pos, orig_range, mod_pos, mod_range):
212
174
self.orig_pos = orig_pos
213
175
self.orig_range = orig_range
214
176
self.mod_pos = mod_pos
215
177
self.mod_range = mod_range
219
180
def get_header(self):
220
if self.tail is None:
223
tail_str = b' ' + self.tail
224
return b"@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
226
self.range_str(self.mod_pos,
181
return "@@ -%s +%s @@\n" % (self.range_str(self.orig_pos,
183
self.range_str(self.mod_pos,
230
186
def range_str(self, pos, range):
231
187
"""Return a file range, special-casing for 1-line files.
237
193
:return: a string in the format 1,4 except when range == pos == 1
242
return b"%i,%i" % (pos, range)
198
return "%i,%i" % (pos, range)
245
201
lines = [self.get_header()]
246
202
for line in self.lines:
247
lines.append(line.as_bytes())
248
return b"".join(lines)
203
lines.append(str(line))
204
return "".join(lines)
252
206
def shift_to_mod(self, pos):
253
if pos < self.orig_pos - 1:
207
if pos < self.orig_pos-1:
255
elif pos > self.orig_pos + self.orig_range:
209
elif pos > self.orig_pos+self.orig_range:
256
210
return self.mod_range - self.orig_range
258
212
return self.shift_to_mod_lines(pos)
260
214
def shift_to_mod_lines(self, pos):
261
position = self.orig_pos - 1
215
assert (pos >= self.orig_pos-1 and pos <= self.orig_pos+self.orig_range)
216
position = self.orig_pos-1
263
218
for line in self.lines:
264
219
if isinstance(line, InsertLine):
278
def iter_hunks(iter_lines, allow_dirty=False):
280
:arg iter_lines: iterable of lines to parse for hunks
281
:kwarg allow_dirty: If True, when we encounter something that is not
282
a hunk header when we're looking for one, assume the rest of the lines
283
are not part of the patch (comments or other junk). Default False
233
def iter_hunks(iter_lines):
286
235
for line in iter_lines:
288
237
if hunk is not None:
292
241
if hunk is not None:
295
hunk = hunk_from_header(line)
296
except MalformedHunkHeader:
298
# If the line isn't a hunk header, then we've reached the end
299
# of this patch and there's "junk" at the end. Ignore the
300
# rest of this patch.
243
hunk = hunk_from_header(line)
305
246
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
306
hunk_line = parse_line(next(iter_lines))
247
hunk_line = parse_line(iter_lines.next())
307
248
hunk.lines.append(hunk_line)
308
249
if isinstance(hunk_line, (RemoveLine, ContextLine)):
316
class BinaryPatch(object):
318
258
def __init__(self, oldname, newname):
319
259
self.oldname = oldname
320
260
self.newname = newname
323
return b'Binary files %s and %s differ\n' % (self.oldname, self.newname)
326
class Patch(BinaryPatch):
328
def __init__(self, oldname, newname):
329
BinaryPatch.__init__(self, oldname, newname)
333
ret = self.get_header()
334
ret += b"".join([h.as_bytes() for h in self.hunks])
264
ret = self.get_header()
265
ret += "".join([str(h) for h in self.hunks])
337
268
def get_header(self):
338
return b"--- %s\n+++ %s\n" % (self.oldname, self.newname)
269
return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
340
def stats_values(self):
341
"""Calculate the number of inserts and removes."""
272
"""Return a string of patch statistics"""
344
275
for hunk in self.hunks:
345
276
for line in hunk.lines:
346
277
if isinstance(line, InsertLine):
348
279
elif isinstance(line, RemoveLine):
350
return (inserts, removes, len(self.hunks))
353
"""Return a string of patch statistics"""
354
281
return "%i inserts, %i removes in %i hunks" % \
282
(inserts, removes, len(self.hunks))
357
284
def pos_in_mod(self, position):
358
285
newpos = position
382
def parse_patch(iter_lines, allow_dirty=False):
384
:arg iter_lines: iterable of lines to parse
385
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
388
iter_lines = iter_lines_handle_nl(iter_lines)
390
(orig_name, mod_name) = get_patch_names(iter_lines)
391
except BinaryFiles as e:
392
return BinaryPatch(e.orig_name, e.mod_name)
394
patch = Patch(orig_name, mod_name)
395
for hunk in iter_hunks(iter_lines, allow_dirty):
396
patch.hunks.append(hunk)
400
def iter_file_patch(iter_lines, allow_dirty=False, keep_dirty=False):
402
:arg iter_lines: iterable of lines to parse for patches
403
:kwarg allow_dirty: If True, allow comments and other non-patch text
404
before the first patch. Note that the algorithm here can only find
405
such text before any patches have been found. Comments after the
406
first patch are stripped away in iter_hunks() if it is also passed
407
allow_dirty=True. Default False.
409
# FIXME: Docstring is not quite true. We allow certain comments no
410
# matter what, If they startwith '===', '***', or '#' Someone should
411
# reexamine this logic and decide if we should include those in
412
# allow_dirty or restrict those to only being before the patch is found
413
# (as allow_dirty does).
414
regex = re.compile(binary_files_re)
309
def parse_patch(iter_lines):
310
(orig_name, mod_name) = get_patch_names(iter_lines)
311
patch = Patch(orig_name, mod_name)
312
for hunk in iter_hunks(iter_lines):
313
patch.hunks.append(hunk)
317
def iter_file_patch(iter_lines):
420
319
for line in iter_lines:
421
if line.startswith(b'=== '):
320
if line.startswith('=== ') or line.startswith('*** '):
322
if line.startswith('#'):
324
elif line.startswith('--- '):
422
325
if len(saved_lines) > 0:
423
if keep_dirty and len(dirty_head) > 0:
424
yield {'saved_lines': saved_lines,
425
'dirty_head': dirty_head}
430
dirty_head.append(line)
432
if line.startswith(b'*** '):
434
if line.startswith(b'#'):
437
if line.startswith(b'-') or line.startswith(b' '):
439
elif line.startswith(b'--- ') or regex.match(line):
440
if allow_dirty and beginning:
441
# Patches can have "junk" at the beginning
442
# Stripping junk from the end of patches is handled when we
445
elif len(saved_lines) > 0:
446
if keep_dirty and len(dirty_head) > 0:
447
yield {'saved_lines': saved_lines,
448
'dirty_head': dirty_head}
453
elif line.startswith(b'@@'):
454
hunk = hunk_from_header(line)
455
orig_range = hunk.orig_range
456
328
saved_lines.append(line)
457
329
if len(saved_lines) > 0:
458
if keep_dirty and len(dirty_head) > 0:
459
yield {'saved_lines': saved_lines,
460
'dirty_head': dirty_head}
465
333
def iter_lines_handle_nl(iter_lines):
486
def parse_patches(iter_lines, allow_dirty=False, keep_dirty=False):
488
:arg iter_lines: iterable of lines to parse for patches
489
:kwarg allow_dirty: If True, allow text that's not part of the patch at
490
selected places. This includes comments before and after a patch
491
for instance. Default False.
492
:kwarg keep_dirty: If True, returns a dict of patches with dirty headers.
495
for patch_lines in iter_file_patch(iter_lines, allow_dirty, keep_dirty):
496
if 'dirty_head' in patch_lines:
497
yield ({'patch': parse_patch(patch_lines['saved_lines'], allow_dirty),
498
'dirty_head': patch_lines['dirty_head']})
500
yield parse_patch(patch_lines, allow_dirty)
353
def parse_patches(iter_lines):
354
iter_lines = iter_lines_handle_nl(iter_lines)
355
return [parse_patch(f.__iter__()) for f in iter_file_patch(iter_lines)]
503
358
def difference_index(atext, btext):
523
378
"""Iterate through a series of lines with a patch applied.
524
379
This handles a single file, and does exact, not fuzzy patching.
526
patch_lines = iter_lines_handle_nl(iter(patch_lines))
381
if orig_lines is not None:
382
orig_lines = orig_lines.__iter__()
384
patch_lines = iter_lines_handle_nl(patch_lines.__iter__())
527
385
get_patch_names(patch_lines)
528
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
531
def iter_patched_from_hunks(orig_lines, hunks):
532
"""Iterate through a series of lines with a patch applied.
533
This handles a single file, and does exact, not fuzzy patching.
535
:param orig_lines: The unpatched lines.
536
:param hunks: An iterable of Hunk instances.
540
if orig_lines is not None:
541
orig_lines = iter(orig_lines)
387
for hunk in iter_hunks(patch_lines):
543
388
while line_no < hunk.orig_pos:
544
orig_line = next(orig_lines)
389
orig_line = orig_lines.next()
547
392
for hunk_line in hunk.lines:
548
seen_patch.append(hunk_line.contents)
393
seen_patch.append(str(hunk_line))
549
394
if isinstance(hunk_line, InsertLine):
550
395
yield hunk_line.contents
551
396
elif isinstance(hunk_line, (ContextLine, RemoveLine)):
552
orig_line = next(orig_lines)
397
orig_line = orig_lines.next()
553
398
if orig_line != hunk_line.contents:
554
raise PatchConflict(line_no, orig_line,
555
b''.join(seen_patch))
399
raise PatchConflict(line_no, orig_line, "".join(seen_patch))
556
400
if isinstance(hunk_line, ContextLine):
559
if not isinstance(hunk_line, RemoveLine):
560
raise AssertionError(hunk_line)
403
assert isinstance(hunk_line, RemoveLine)
562
405
if orig_lines is not None:
563
406
for line in orig_lines:
567
def apply_patches(tt, patches, prefix=1):
568
"""Apply patches to a TreeTransform.
570
:param tt: TreeTransform instance
571
:param patches: List of patches
572
:param prefix: Number leading path segments to strip
575
return '/'.join(p.split('/')[1:])
577
from breezy.bzr.generate_ids import gen_file_id
578
# TODO(jelmer): Extract and set mode
579
for patch in patches:
580
if patch.oldname == b'/dev/null':
584
oldname = strip_prefix(patch.oldname.decode())
585
trans_id = tt.trans_id_tree_path(oldname)
586
orig_contents = tt._tree.get_file_text(oldname)
587
tt.delete_contents(trans_id)
589
if patch.newname != b'/dev/null':
590
newname = strip_prefix(patch.newname.decode())
591
new_contents = iter_patched_from_hunks(
592
orig_contents.splitlines(True), patch.hunks)
594
parts = os.path.split(newname)
596
for part in parts[1:-1]:
597
trans_id = tt.new_directory(part, trans_id)
599
parts[-1], trans_id, new_contents,
600
file_id=gen_file_id(newname))
602
tt.create_file(new_contents, trans_id)
605
class AppliedPatches(object):
606
"""Context that provides access to a tree with patches applied.
609
def __init__(self, tree, patches, prefix=1):
611
self.patches = patches
615
from .transform import TransformPreview
616
self._tt = TransformPreview(self.tree)
617
apply_patches(self._tt, self.patches, prefix=self.prefix)
618
return self._tt.get_preview_tree()
620
def __exit__(self, exc_type, exc_value, exc_tb):