1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
2
# <aaron.bentley@utoronto.ca>
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
from __future__ import absolute_import
28
binary_files_re = b'Binary files (.*) and (.*) differ\n'
31
class PatchSyntax(BzrError):
32
"""Base class for patch syntax errors."""
35
class BinaryFiles(BzrError):
37
_fmt = 'Binary files section encountered.'
39
def __init__(self, orig_name, mod_name):
40
self.orig_name = orig_name
41
self.mod_name = mod_name
44
class MalformedPatchHeader(PatchSyntax):
46
_fmt = "Malformed patch header. %(desc)s\n%(line)r"
48
def __init__(self, desc, line):
53
class MalformedLine(PatchSyntax):
55
_fmt = "Malformed line. %(desc)s\n%(line)r"
57
def __init__(self, desc, line):
62
class PatchConflict(BzrError):
64
_fmt = ('Text contents mismatch at line %(line_no)d. Original has '
65
'"%(orig_line)s", but patch says it should be "%(patch_line)s"')
67
def __init__(self, line_no, orig_line, patch_line):
68
self.line_no = line_no
69
self.orig_line = orig_line.rstrip('\n')
70
self.patch_line = patch_line.rstrip('\n')
73
class MalformedHunkHeader(PatchSyntax):
75
_fmt = "Malformed hunk header. %(desc)s\n%(line)r"
77
def __init__(self, desc, line):
82
def get_patch_names(iter_lines):
83
line = next(iter_lines)
85
match = re.match(binary_files_re, line)
87
raise BinaryFiles(match.group(1), match.group(2))
88
if not line.startswith(b"--- "):
89
raise MalformedPatchHeader("No orig name", line)
91
orig_name = line[4:].rstrip(b"\n")
93
raise MalformedPatchHeader("No orig line", "")
95
line = next(iter_lines)
96
if not line.startswith(b"+++ "):
97
raise PatchSyntax("No mod name")
99
mod_name = line[4:].rstrip(b"\n")
100
except StopIteration:
101
raise MalformedPatchHeader("No mod line", "")
102
return (orig_name, mod_name)
105
def parse_range(textrange):
106
"""Parse a patch range, handling the "1" special-case
108
:param textrange: The text to parse
110
:return: the position and range, as a tuple
113
tmp = textrange.split(b',')
124
def hunk_from_header(line):
126
matches = re.match(br'\@\@ ([^@]*) \@\@( (.*))?\n', line)
128
raise MalformedHunkHeader("Does not match format.", line)
130
(orig, mod) = matches.group(1).split(b" ")
131
except (ValueError, IndexError) as e:
132
raise MalformedHunkHeader(str(e), line)
133
if not orig.startswith(b'-') or not mod.startswith(b'+'):
134
raise MalformedHunkHeader("Positions don't start with + or -.", line)
136
(orig_pos, orig_range) = parse_range(orig[1:])
137
(mod_pos, mod_range) = parse_range(mod[1:])
138
except (ValueError, IndexError) as e:
139
raise MalformedHunkHeader(str(e), line)
140
if mod_range < 0 or orig_range < 0:
141
raise MalformedHunkHeader("Hunk range is negative", line)
142
tail = matches.group(3)
143
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
146
class HunkLine(object):
148
def __init__(self, contents):
149
self.contents = contents
151
def get_str(self, leadchar):
152
if self.contents == b"\n" and leadchar == b" " and False:
154
if not self.contents.endswith(b'\n'):
155
terminator = b'\n' + NO_NL
158
return leadchar + self.contents + terminator
161
raise NotImplementedError
164
class ContextLine(HunkLine):
166
def __init__(self, contents):
167
HunkLine.__init__(self, contents)
170
return self.get_str(b" ")
173
class InsertLine(HunkLine):
174
def __init__(self, contents):
175
HunkLine.__init__(self, contents)
178
return self.get_str(b"+")
181
class RemoveLine(HunkLine):
182
def __init__(self, contents):
183
HunkLine.__init__(self, contents)
186
return self.get_str(b"-")
189
NO_NL = b'\\ No newline at end of file\n'
190
__pychecker__ = "no-returnvalues"
193
def parse_line(line):
194
if line.startswith(b"\n"):
195
return ContextLine(line)
196
elif line.startswith(b" "):
197
return ContextLine(line[1:])
198
elif line.startswith(b"+"):
199
return InsertLine(line[1:])
200
elif line.startswith(b"-"):
201
return RemoveLine(line[1:])
203
raise MalformedLine("Unknown line type", line)
211
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
212
self.orig_pos = orig_pos
213
self.orig_range = orig_range
214
self.mod_pos = mod_pos
215
self.mod_range = mod_range
219
def get_header(self):
220
if self.tail is None:
223
tail_str = b' ' + self.tail
224
return b"@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
226
self.range_str(self.mod_pos,
230
def range_str(self, pos, range):
231
"""Return a file range, special-casing for 1-line files.
233
:param pos: The position in the file
235
:range: The range in the file
237
:return: a string in the format 1,4 except when range == pos == 1
242
return b"%i,%i" % (pos, range)
245
lines = [self.get_header()]
246
for line in self.lines:
247
lines.append(line.as_bytes())
248
return b"".join(lines)
252
def shift_to_mod(self, pos):
253
if pos < self.orig_pos - 1:
255
elif pos > self.orig_pos + self.orig_range:
256
return self.mod_range - self.orig_range
258
return self.shift_to_mod_lines(pos)
260
def shift_to_mod_lines(self, pos):
261
position = self.orig_pos - 1
263
for line in self.lines:
264
if isinstance(line, InsertLine):
266
elif isinstance(line, RemoveLine):
271
elif isinstance(line, ContextLine):
278
def iter_hunks(iter_lines, allow_dirty=False):
280
:arg iter_lines: iterable of lines to parse for hunks
281
:kwarg allow_dirty: If True, when we encounter something that is not
282
a hunk header when we're looking for one, assume the rest of the lines
283
are not part of the patch (comments or other junk). Default False
286
for line in iter_lines:
295
hunk = hunk_from_header(line)
296
except MalformedHunkHeader:
298
# If the line isn't a hunk header, then we've reached the end
299
# of this patch and there's "junk" at the end. Ignore the
300
# rest of this patch.
305
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
306
hunk_line = parse_line(next(iter_lines))
307
hunk.lines.append(hunk_line)
308
if isinstance(hunk_line, (RemoveLine, ContextLine)):
310
if isinstance(hunk_line, (InsertLine, ContextLine)):
316
class BinaryPatch(object):
318
def __init__(self, oldname, newname):
319
self.oldname = oldname
320
self.newname = newname
323
return b'Binary files %s and %s differ\n' % (self.oldname, self.newname)
326
class Patch(BinaryPatch):
328
def __init__(self, oldname, newname):
329
BinaryPatch.__init__(self, oldname, newname)
333
ret = self.get_header()
334
ret += b"".join([h.as_bytes() for h in self.hunks])
337
def get_header(self):
338
return b"--- %s\n+++ %s\n" % (self.oldname, self.newname)
340
def stats_values(self):
341
"""Calculate the number of inserts and removes."""
344
for hunk in self.hunks:
345
for line in hunk.lines:
346
if isinstance(line, InsertLine):
348
elif isinstance(line, RemoveLine):
350
return (inserts, removes, len(self.hunks))
353
"""Return a string of patch statistics"""
354
return "%i inserts, %i removes in %i hunks" % \
357
def pos_in_mod(self, position):
359
for hunk in self.hunks:
360
shift = hunk.shift_to_mod(position)
366
def iter_inserted(self):
367
"""Iteraties through inserted lines
369
:return: Pair of line number, line
370
:rtype: iterator of (int, InsertLine)
372
for hunk in self.hunks:
373
pos = hunk.mod_pos - 1
374
for line in hunk.lines:
375
if isinstance(line, InsertLine):
378
if isinstance(line, ContextLine):
382
def parse_patch(iter_lines, allow_dirty=False):
384
:arg iter_lines: iterable of lines to parse
385
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
388
iter_lines = iter_lines_handle_nl(iter_lines)
390
(orig_name, mod_name) = get_patch_names(iter_lines)
391
except BinaryFiles as e:
392
return BinaryPatch(e.orig_name, e.mod_name)
394
patch = Patch(orig_name, mod_name)
395
for hunk in iter_hunks(iter_lines, allow_dirty):
396
patch.hunks.append(hunk)
400
def iter_file_patch(iter_lines, allow_dirty=False, keep_dirty=False):
402
:arg iter_lines: iterable of lines to parse for patches
403
:kwarg allow_dirty: If True, allow comments and other non-patch text
404
before the first patch. Note that the algorithm here can only find
405
such text before any patches have been found. Comments after the
406
first patch are stripped away in iter_hunks() if it is also passed
407
allow_dirty=True. Default False.
409
# FIXME: Docstring is not quite true. We allow certain comments no
410
# matter what, If they startwith '===', '***', or '#' Someone should
411
# reexamine this logic and decide if we should include those in
412
# allow_dirty or restrict those to only being before the patch is found
413
# (as allow_dirty does).
414
regex = re.compile(binary_files_re)
420
for line in iter_lines:
421
if line.startswith(b'=== '):
422
if len(saved_lines) > 0:
423
if keep_dirty and len(dirty_head) > 0:
424
yield {'saved_lines': saved_lines,
425
'dirty_head': dirty_head}
430
dirty_head.append(line)
432
if line.startswith(b'*** '):
434
if line.startswith(b'#'):
437
if line.startswith(b'-') or line.startswith(b' '):
439
elif line.startswith(b'--- ') or regex.match(line):
440
if allow_dirty and beginning:
441
# Patches can have "junk" at the beginning
442
# Stripping junk from the end of patches is handled when we
445
elif len(saved_lines) > 0:
446
if keep_dirty and len(dirty_head) > 0:
447
yield {'saved_lines': saved_lines,
448
'dirty_head': dirty_head}
453
elif line.startswith(b'@@'):
454
hunk = hunk_from_header(line)
455
orig_range = hunk.orig_range
456
saved_lines.append(line)
457
if len(saved_lines) > 0:
458
if keep_dirty and len(dirty_head) > 0:
459
yield {'saved_lines': saved_lines,
460
'dirty_head': dirty_head}
465
def iter_lines_handle_nl(iter_lines):
467
Iterates through lines, ensuring that lines that originally had no
468
terminating \n are produced without one. This transformation may be
469
applied at any point up until hunk line parsing, and is safe to apply
473
for line in iter_lines:
475
if not last_line.endswith(b'\n'):
476
raise AssertionError()
477
last_line = last_line[:-1]
479
if last_line is not None:
482
if last_line is not None:
486
def parse_patches(iter_lines, allow_dirty=False, keep_dirty=False):
488
:arg iter_lines: iterable of lines to parse for patches
489
:kwarg allow_dirty: If True, allow text that's not part of the patch at
490
selected places. This includes comments before and after a patch
491
for instance. Default False.
492
:kwarg keep_dirty: If True, returns a dict of patches with dirty headers.
495
for patch_lines in iter_file_patch(iter_lines, allow_dirty, keep_dirty):
496
if 'dirty_head' in patch_lines:
497
yield ({'patch': parse_patch(patch_lines['saved_lines'], allow_dirty),
498
'dirty_head': patch_lines['dirty_head']})
500
yield parse_patch(patch_lines, allow_dirty)
503
def difference_index(atext, btext):
504
"""Find the indext of the first character that differs between two texts
506
:param atext: The first text
508
:param btext: The second text
510
:return: The index, or None if there are no differences within the range
511
:rtype: int or NoneType
514
if len(btext) < length:
516
for i in range(length):
517
if atext[i] != btext[i]:
522
def iter_patched(orig_lines, patch_lines):
523
"""Iterate through a series of lines with a patch applied.
524
This handles a single file, and does exact, not fuzzy patching.
526
patch_lines = iter_lines_handle_nl(iter(patch_lines))
527
get_patch_names(patch_lines)
528
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
531
def iter_patched_from_hunks(orig_lines, hunks):
532
"""Iterate through a series of lines with a patch applied.
533
This handles a single file, and does exact, not fuzzy patching.
535
:param orig_lines: The unpatched lines.
536
:param hunks: An iterable of Hunk instances.
540
if orig_lines is not None:
541
orig_lines = iter(orig_lines)
543
while line_no < hunk.orig_pos:
544
orig_line = next(orig_lines)
547
for hunk_line in hunk.lines:
548
seen_patch.append(hunk_line.contents)
549
if isinstance(hunk_line, InsertLine):
550
yield hunk_line.contents
551
elif isinstance(hunk_line, (ContextLine, RemoveLine)):
552
orig_line = next(orig_lines)
553
if orig_line != hunk_line.contents:
554
raise PatchConflict(line_no, orig_line,
555
b''.join(seen_patch))
556
if isinstance(hunk_line, ContextLine):
559
if not isinstance(hunk_line, RemoveLine):
560
raise AssertionError(hunk_line)
562
if orig_lines is not None:
563
for line in orig_lines:
567
def apply_patches(tt, patches, prefix=1):
568
"""Apply patches to a TreeTransform.
570
:param tt: TreeTransform instance
571
:param patches: List of patches
572
:param prefix: Number leading path segments to strip
575
return '/'.join(p.split('/')[1:])
577
from breezy.bzr.generate_ids import gen_file_id
578
# TODO(jelmer): Extract and set mode
579
for patch in patches:
580
if patch.oldname == b'/dev/null':
584
oldname = strip_prefix(patch.oldname.decode())
585
trans_id = tt.trans_id_tree_path(oldname)
586
orig_contents = tt._tree.get_file_text(oldname)
587
tt.delete_contents(trans_id)
589
if patch.newname != b'/dev/null':
590
newname = strip_prefix(patch.newname.decode())
591
new_contents = iter_patched_from_hunks(
592
orig_contents.splitlines(True), patch.hunks)
594
parts = os.path.split(newname)
596
for part in parts[1:-1]:
597
trans_id = tt.new_directory(part, trans_id)
599
parts[-1], trans_id, new_contents,
600
file_id=gen_file_id(newname))
602
tt.create_file(new_contents, trans_id)
605
class AppliedPatches(object):
606
"""Context that provides access to a tree with patches applied.
609
def __init__(self, tree, patches, prefix=1):
611
self.patches = patches
615
from .transform import TransformPreview
616
self._tt = TransformPreview(self.tree)
617
apply_patches(self._tt, self.patches, prefix=self.prefix)
618
return self._tt.get_preview_tree()
620
def __exit__(self, exc_type, exc_value, exc_tb):