1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
2
# <aaron.bentley@utoronto.ca>
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26
binary_files_re = b'Binary files (.*) and (.*) differ\n'
29
class PatchSyntax(BzrError):
30
"""Base class for patch syntax errors."""
33
class BinaryFiles(BzrError):
35
_fmt = 'Binary files section encountered.'
37
def __init__(self, orig_name, mod_name):
38
self.orig_name = orig_name
39
self.mod_name = mod_name
42
class MalformedPatchHeader(PatchSyntax):
44
_fmt = "Malformed patch header. %(desc)s\n%(line)r"
46
def __init__(self, desc, line):
51
class MalformedLine(PatchSyntax):
53
_fmt = "Malformed line. %(desc)s\n%(line)r"
55
def __init__(self, desc, line):
60
class PatchConflict(BzrError):
62
_fmt = ('Text contents mismatch at line %(line_no)d. Original has '
63
'"%(orig_line)s", but patch says it should be "%(patch_line)s"')
65
def __init__(self, line_no, orig_line, patch_line):
66
self.line_no = line_no
67
self.orig_line = orig_line.rstrip('\n')
68
self.patch_line = patch_line.rstrip('\n')
71
class MalformedHunkHeader(PatchSyntax):
73
_fmt = "Malformed hunk header. %(desc)s\n%(line)r"
75
def __init__(self, desc, line):
80
def get_patch_names(iter_lines):
81
line = next(iter_lines)
83
match = re.match(binary_files_re, line)
85
raise BinaryFiles(match.group(1), match.group(2))
86
if not line.startswith(b"--- "):
87
raise MalformedPatchHeader("No orig name", line)
89
orig_name = line[4:].rstrip(b"\n")
91
raise MalformedPatchHeader("No orig line", "")
93
line = next(iter_lines)
94
if not line.startswith(b"+++ "):
95
raise PatchSyntax("No mod name")
97
mod_name = line[4:].rstrip(b"\n")
99
raise MalformedPatchHeader("No mod line", "")
100
return (orig_name, mod_name)
103
def parse_range(textrange):
104
"""Parse a patch range, handling the "1" special-case
106
:param textrange: The text to parse
108
:return: the position and range, as a tuple
111
tmp = textrange.split(b',')
122
def hunk_from_header(line):
124
matches = re.match(br'\@\@ ([^@]*) \@\@( (.*))?\n', line)
126
raise MalformedHunkHeader("Does not match format.", line)
128
(orig, mod) = matches.group(1).split(b" ")
129
except (ValueError, IndexError) as e:
130
raise MalformedHunkHeader(str(e), line)
131
if not orig.startswith(b'-') or not mod.startswith(b'+'):
132
raise MalformedHunkHeader("Positions don't start with + or -.", line)
134
(orig_pos, orig_range) = parse_range(orig[1:])
135
(mod_pos, mod_range) = parse_range(mod[1:])
136
except (ValueError, IndexError) as e:
137
raise MalformedHunkHeader(str(e), line)
138
if mod_range < 0 or orig_range < 0:
139
raise MalformedHunkHeader("Hunk range is negative", line)
140
tail = matches.group(3)
141
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
144
class HunkLine(object):
146
def __init__(self, contents):
147
self.contents = contents
149
def get_str(self, leadchar):
150
if self.contents == b"\n" and leadchar == b" " and False:
152
if not self.contents.endswith(b'\n'):
153
terminator = b'\n' + NO_NL
156
return leadchar + self.contents + terminator
159
raise NotImplementedError
162
class ContextLine(HunkLine):
164
def __init__(self, contents):
165
HunkLine.__init__(self, contents)
168
return self.get_str(b" ")
171
class InsertLine(HunkLine):
172
def __init__(self, contents):
173
HunkLine.__init__(self, contents)
176
return self.get_str(b"+")
179
class RemoveLine(HunkLine):
180
def __init__(self, contents):
181
HunkLine.__init__(self, contents)
184
return self.get_str(b"-")
187
NO_NL = b'\\ No newline at end of file\n'
188
__pychecker__ = "no-returnvalues"
191
def parse_line(line):
192
if line.startswith(b"\n"):
193
return ContextLine(line)
194
elif line.startswith(b" "):
195
return ContextLine(line[1:])
196
elif line.startswith(b"+"):
197
return InsertLine(line[1:])
198
elif line.startswith(b"-"):
199
return RemoveLine(line[1:])
201
raise MalformedLine("Unknown line type", line)
209
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
210
self.orig_pos = orig_pos
211
self.orig_range = orig_range
212
self.mod_pos = mod_pos
213
self.mod_range = mod_range
217
def get_header(self):
218
if self.tail is None:
221
tail_str = b' ' + self.tail
222
return b"@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
224
self.range_str(self.mod_pos,
228
def range_str(self, pos, range):
229
"""Return a file range, special-casing for 1-line files.
231
:param pos: The position in the file
233
:range: The range in the file
235
:return: a string in the format 1,4 except when range == pos == 1
240
return b"%i,%i" % (pos, range)
243
lines = [self.get_header()]
244
for line in self.lines:
245
lines.append(line.as_bytes())
246
return b"".join(lines)
250
def shift_to_mod(self, pos):
251
if pos < self.orig_pos - 1:
253
elif pos > self.orig_pos + self.orig_range:
254
return self.mod_range - self.orig_range
256
return self.shift_to_mod_lines(pos)
258
def shift_to_mod_lines(self, pos):
259
position = self.orig_pos - 1
261
for line in self.lines:
262
if isinstance(line, InsertLine):
264
elif isinstance(line, RemoveLine):
269
elif isinstance(line, ContextLine):
276
def iter_hunks(iter_lines, allow_dirty=False):
278
:arg iter_lines: iterable of lines to parse for hunks
279
:kwarg allow_dirty: If True, when we encounter something that is not
280
a hunk header when we're looking for one, assume the rest of the lines
281
are not part of the patch (comments or other junk). Default False
284
for line in iter_lines:
293
hunk = hunk_from_header(line)
294
except MalformedHunkHeader:
296
# If the line isn't a hunk header, then we've reached the end
297
# of this patch and there's "junk" at the end. Ignore the
298
# rest of this patch.
303
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
304
hunk_line = parse_line(next(iter_lines))
305
hunk.lines.append(hunk_line)
306
if isinstance(hunk_line, (RemoveLine, ContextLine)):
308
if isinstance(hunk_line, (InsertLine, ContextLine)):
314
class BinaryPatch(object):
316
def __init__(self, oldname, newname):
317
self.oldname = oldname
318
self.newname = newname
321
return b'Binary files %s and %s differ\n' % (self.oldname, self.newname)
324
class Patch(BinaryPatch):
326
def __init__(self, oldname, newname):
327
BinaryPatch.__init__(self, oldname, newname)
331
ret = self.get_header()
332
ret += b"".join([h.as_bytes() for h in self.hunks])
335
def get_header(self):
336
return b"--- %s\n+++ %s\n" % (self.oldname, self.newname)
338
def stats_values(self):
339
"""Calculate the number of inserts and removes."""
342
for hunk in self.hunks:
343
for line in hunk.lines:
344
if isinstance(line, InsertLine):
346
elif isinstance(line, RemoveLine):
348
return (inserts, removes, len(self.hunks))
351
"""Return a string of patch statistics"""
352
return "%i inserts, %i removes in %i hunks" % \
355
def pos_in_mod(self, position):
357
for hunk in self.hunks:
358
shift = hunk.shift_to_mod(position)
364
def iter_inserted(self):
365
"""Iteraties through inserted lines
367
:return: Pair of line number, line
368
:rtype: iterator of (int, InsertLine)
370
for hunk in self.hunks:
371
pos = hunk.mod_pos - 1
372
for line in hunk.lines:
373
if isinstance(line, InsertLine):
376
if isinstance(line, ContextLine):
380
def parse_patch(iter_lines, allow_dirty=False):
382
:arg iter_lines: iterable of lines to parse
383
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
386
iter_lines = iter_lines_handle_nl(iter_lines)
388
(orig_name, mod_name) = get_patch_names(iter_lines)
389
except BinaryFiles as e:
390
return BinaryPatch(e.orig_name, e.mod_name)
392
patch = Patch(orig_name, mod_name)
393
for hunk in iter_hunks(iter_lines, allow_dirty):
394
patch.hunks.append(hunk)
398
def iter_file_patch(iter_lines, allow_dirty=False, keep_dirty=False):
400
:arg iter_lines: iterable of lines to parse for patches
401
:kwarg allow_dirty: If True, allow comments and other non-patch text
402
before the first patch. Note that the algorithm here can only find
403
such text before any patches have been found. Comments after the
404
first patch are stripped away in iter_hunks() if it is also passed
405
allow_dirty=True. Default False.
407
# FIXME: Docstring is not quite true. We allow certain comments no
408
# matter what, If they startwith '===', '***', or '#' Someone should
409
# reexamine this logic and decide if we should include those in
410
# allow_dirty or restrict those to only being before the patch is found
411
# (as allow_dirty does).
412
regex = re.compile(binary_files_re)
418
for line in iter_lines:
419
if line.startswith(b'=== '):
420
if allow_dirty and beginning:
421
# Patches can have "junk" at the beginning
422
# Stripping junk from the end of patches is handled when we
425
elif len(saved_lines) > 0:
426
if keep_dirty and len(dirty_head) > 0:
427
yield {'saved_lines': saved_lines,
428
'dirty_head': dirty_head}
433
dirty_head.append(line)
435
if line.startswith(b'*** '):
437
if line.startswith(b'#'):
440
if line.startswith(b'-') or line.startswith(b' '):
442
elif line.startswith(b'--- ') or regex.match(line):
443
if allow_dirty and beginning:
444
# Patches can have "junk" at the beginning
445
# Stripping junk from the end of patches is handled when we
448
elif len(saved_lines) > 0:
449
if keep_dirty and len(dirty_head) > 0:
450
yield {'saved_lines': saved_lines,
451
'dirty_head': dirty_head}
456
elif line.startswith(b'@@'):
457
hunk = hunk_from_header(line)
458
orig_range = hunk.orig_range
459
saved_lines.append(line)
460
if len(saved_lines) > 0:
461
if keep_dirty and len(dirty_head) > 0:
462
yield {'saved_lines': saved_lines,
463
'dirty_head': dirty_head}
468
def iter_lines_handle_nl(iter_lines):
470
Iterates through lines, ensuring that lines that originally had no
471
terminating \n are produced without one. This transformation may be
472
applied at any point up until hunk line parsing, and is safe to apply
476
for line in iter_lines:
478
if not last_line.endswith(b'\n'):
479
raise AssertionError()
480
last_line = last_line[:-1]
482
if last_line is not None:
485
if last_line is not None:
489
def parse_patches(iter_lines, allow_dirty=False, keep_dirty=False):
491
:arg iter_lines: iterable of lines to parse for patches
492
:kwarg allow_dirty: If True, allow text that's not part of the patch at
493
selected places. This includes comments before and after a patch
494
for instance. Default False.
495
:kwarg keep_dirty: If True, returns a dict of patches with dirty headers.
498
for patch_lines in iter_file_patch(iter_lines, allow_dirty, keep_dirty):
499
if 'dirty_head' in patch_lines:
500
yield ({'patch': parse_patch(patch_lines['saved_lines'], allow_dirty),
501
'dirty_head': patch_lines['dirty_head']})
503
yield parse_patch(patch_lines, allow_dirty)
506
def difference_index(atext, btext):
507
"""Find the indext of the first character that differs between two texts
509
:param atext: The first text
511
:param btext: The second text
513
:return: The index, or None if there are no differences within the range
514
:rtype: int or NoneType
517
if len(btext) < length:
519
for i in range(length):
520
if atext[i] != btext[i]:
525
def iter_patched(orig_lines, patch_lines):
526
"""Iterate through a series of lines with a patch applied.
527
This handles a single file, and does exact, not fuzzy patching.
529
patch_lines = iter_lines_handle_nl(iter(patch_lines))
530
get_patch_names(patch_lines)
531
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
534
def iter_patched_from_hunks(orig_lines, hunks):
535
"""Iterate through a series of lines with a patch applied.
536
This handles a single file, and does exact, not fuzzy patching.
538
:param orig_lines: The unpatched lines.
539
:param hunks: An iterable of Hunk instances.
543
if orig_lines is not None:
544
orig_lines = iter(orig_lines)
546
while line_no < hunk.orig_pos:
547
orig_line = next(orig_lines)
550
for hunk_line in hunk.lines:
551
seen_patch.append(hunk_line.contents)
552
if isinstance(hunk_line, InsertLine):
553
yield hunk_line.contents
554
elif isinstance(hunk_line, (ContextLine, RemoveLine)):
555
orig_line = next(orig_lines)
556
if orig_line != hunk_line.contents:
557
raise PatchConflict(line_no, orig_line,
558
b''.join(seen_patch))
559
if isinstance(hunk_line, ContextLine):
562
if not isinstance(hunk_line, RemoveLine):
563
raise AssertionError(hunk_line)
565
if orig_lines is not None:
566
for line in orig_lines:
570
def apply_patches(tt, patches, prefix=1):
571
"""Apply patches to a TreeTransform.
573
:param tt: TreeTransform instance
574
:param patches: List of patches
575
:param prefix: Number leading path segments to strip
578
return '/'.join(p.split('/')[1:])
580
from breezy.bzr.generate_ids import gen_file_id
581
# TODO(jelmer): Extract and set mode
582
for patch in patches:
583
if patch.oldname == b'/dev/null':
587
oldname = strip_prefix(patch.oldname.decode())
588
trans_id = tt.trans_id_tree_path(oldname)
589
orig_contents = tt._tree.get_file_text(oldname)
590
tt.delete_contents(trans_id)
592
if patch.newname != b'/dev/null':
593
newname = strip_prefix(patch.newname.decode())
594
new_contents = iter_patched_from_hunks(
595
orig_contents.splitlines(True), patch.hunks)
597
parts = os.path.split(newname)
599
for part in parts[1:-1]:
600
trans_id = tt.new_directory(part, trans_id)
602
parts[-1], trans_id, new_contents,
603
file_id=gen_file_id(newname))
605
tt.create_file(new_contents, trans_id)
608
class AppliedPatches(object):
609
"""Context that provides access to a tree with patches applied.
612
def __init__(self, tree, patches, prefix=1):
614
self.patches = patches
618
self._tt = self.tree.preview_transform()
619
apply_patches(self._tt, self.patches, prefix=self.prefix)
620
return self._tt.get_preview_tree()
622
def __exit__(self, exc_type, exc_value, exc_tb):