1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
2
# <aaron.bentley@utoronto.ca>
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
from __future__ import absolute_import
27
binary_files_re = b'Binary files (.*) and (.*) differ\n'
30
class PatchSyntax(BzrError):
31
"""Base class for patch syntax errors."""
34
class BinaryFiles(BzrError):
36
_fmt = 'Binary files section encountered.'
38
def __init__(self, orig_name, mod_name):
39
self.orig_name = orig_name
40
self.mod_name = mod_name
43
class MalformedPatchHeader(PatchSyntax):
45
_fmt = "Malformed patch header. %(desc)s\n%(line)r"
47
def __init__(self, desc, line):
52
class MalformedLine(PatchSyntax):
54
_fmt = "Malformed line. %(desc)s\n%(line)r"
56
def __init__(self, desc, line):
61
class PatchConflict(BzrError):
63
_fmt = ('Text contents mismatch at line %(line_no)d. Original has '
64
'"%(orig_line)s", but patch says it should be "%(patch_line)s"')
66
def __init__(self, line_no, orig_line, patch_line):
67
self.line_no = line_no
68
self.orig_line = orig_line.rstrip('\n')
69
self.patch_line = patch_line.rstrip('\n')
72
class MalformedHunkHeader(PatchSyntax):
74
_fmt = "Malformed hunk header. %(desc)s\n%(line)r"
76
def __init__(self, desc, line):
81
def get_patch_names(iter_lines):
82
line = next(iter_lines)
84
match = re.match(binary_files_re, line)
86
raise BinaryFiles(match.group(1), match.group(2))
87
if not line.startswith(b"--- "):
88
raise MalformedPatchHeader("No orig name", line)
90
orig_name = line[4:].rstrip(b"\n")
92
raise MalformedPatchHeader("No orig line", "")
94
line = next(iter_lines)
95
if not line.startswith(b"+++ "):
96
raise PatchSyntax("No mod name")
98
mod_name = line[4:].rstrip(b"\n")
100
raise MalformedPatchHeader("No mod line", "")
101
return (orig_name, mod_name)
104
def parse_range(textrange):
105
"""Parse a patch range, handling the "1" special-case
107
:param textrange: The text to parse
109
:return: the position and range, as a tuple
112
tmp = textrange.split(b',')
123
def hunk_from_header(line):
125
matches = re.match(br'\@\@ ([^@]*) \@\@( (.*))?\n', line)
127
raise MalformedHunkHeader("Does not match format.", line)
129
(orig, mod) = matches.group(1).split(b" ")
130
except (ValueError, IndexError) as e:
131
raise MalformedHunkHeader(str(e), line)
132
if not orig.startswith(b'-') or not mod.startswith(b'+'):
133
raise MalformedHunkHeader("Positions don't start with + or -.", line)
135
(orig_pos, orig_range) = parse_range(orig[1:])
136
(mod_pos, mod_range) = parse_range(mod[1:])
137
except (ValueError, IndexError) as e:
138
raise MalformedHunkHeader(str(e), line)
139
if mod_range < 0 or orig_range < 0:
140
raise MalformedHunkHeader("Hunk range is negative", line)
141
tail = matches.group(3)
142
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
145
class HunkLine(object):
147
def __init__(self, contents):
148
self.contents = contents
150
def get_str(self, leadchar):
151
if self.contents == b"\n" and leadchar == b" " and False:
153
if not self.contents.endswith(b'\n'):
154
terminator = b'\n' + NO_NL
157
return leadchar + self.contents + terminator
160
raise NotImplementedError
163
class ContextLine(HunkLine):
165
def __init__(self, contents):
166
HunkLine.__init__(self, contents)
169
return self.get_str(b" ")
172
class InsertLine(HunkLine):
173
def __init__(self, contents):
174
HunkLine.__init__(self, contents)
177
return self.get_str(b"+")
180
class RemoveLine(HunkLine):
181
def __init__(self, contents):
182
HunkLine.__init__(self, contents)
185
return self.get_str(b"-")
188
NO_NL = b'\\ No newline at end of file\n'
189
__pychecker__ = "no-returnvalues"
192
def parse_line(line):
193
if line.startswith(b"\n"):
194
return ContextLine(line)
195
elif line.startswith(b" "):
196
return ContextLine(line[1:])
197
elif line.startswith(b"+"):
198
return InsertLine(line[1:])
199
elif line.startswith(b"-"):
200
return RemoveLine(line[1:])
202
raise MalformedLine("Unknown line type", line)
210
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
211
self.orig_pos = orig_pos
212
self.orig_range = orig_range
213
self.mod_pos = mod_pos
214
self.mod_range = mod_range
218
def get_header(self):
219
if self.tail is None:
222
tail_str = b' ' + self.tail
223
return b"@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
225
self.range_str(self.mod_pos,
229
def range_str(self, pos, range):
230
"""Return a file range, special-casing for 1-line files.
232
:param pos: The position in the file
234
:range: The range in the file
236
:return: a string in the format 1,4 except when range == pos == 1
241
return b"%i,%i" % (pos, range)
244
lines = [self.get_header()]
245
for line in self.lines:
246
lines.append(line.as_bytes())
247
return b"".join(lines)
251
def shift_to_mod(self, pos):
252
if pos < self.orig_pos - 1:
254
elif pos > self.orig_pos + self.orig_range:
255
return self.mod_range - self.orig_range
257
return self.shift_to_mod_lines(pos)
259
def shift_to_mod_lines(self, pos):
260
position = self.orig_pos - 1
262
for line in self.lines:
263
if isinstance(line, InsertLine):
265
elif isinstance(line, RemoveLine):
270
elif isinstance(line, ContextLine):
277
def iter_hunks(iter_lines, allow_dirty=False):
279
:arg iter_lines: iterable of lines to parse for hunks
280
:kwarg allow_dirty: If True, when we encounter something that is not
281
a hunk header when we're looking for one, assume the rest of the lines
282
are not part of the patch (comments or other junk). Default False
285
for line in iter_lines:
294
hunk = hunk_from_header(line)
295
except MalformedHunkHeader:
297
# If the line isn't a hunk header, then we've reached the end
298
# of this patch and there's "junk" at the end. Ignore the
299
# rest of this patch.
304
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
305
hunk_line = parse_line(next(iter_lines))
306
hunk.lines.append(hunk_line)
307
if isinstance(hunk_line, (RemoveLine, ContextLine)):
309
if isinstance(hunk_line, (InsertLine, ContextLine)):
315
class BinaryPatch(object):
317
def __init__(self, oldname, newname):
318
self.oldname = oldname
319
self.newname = newname
322
return b'Binary files %s and %s differ\n' % (self.oldname, self.newname)
325
class Patch(BinaryPatch):
327
def __init__(self, oldname, newname):
328
BinaryPatch.__init__(self, oldname, newname)
332
ret = self.get_header()
333
ret += b"".join([h.as_bytes() for h in self.hunks])
336
def get_header(self):
337
return b"--- %s\n+++ %s\n" % (self.oldname, self.newname)
339
def stats_values(self):
340
"""Calculate the number of inserts and removes."""
343
for hunk in self.hunks:
344
for line in hunk.lines:
345
if isinstance(line, InsertLine):
347
elif isinstance(line, RemoveLine):
349
return (inserts, removes, len(self.hunks))
352
"""Return a string of patch statistics"""
353
return "%i inserts, %i removes in %i hunks" % \
356
def pos_in_mod(self, position):
358
for hunk in self.hunks:
359
shift = hunk.shift_to_mod(position)
365
def iter_inserted(self):
366
"""Iteraties through inserted lines
368
:return: Pair of line number, line
369
:rtype: iterator of (int, InsertLine)
371
for hunk in self.hunks:
372
pos = hunk.mod_pos - 1
373
for line in hunk.lines:
374
if isinstance(line, InsertLine):
377
if isinstance(line, ContextLine):
381
def parse_patch(iter_lines, allow_dirty=False):
383
:arg iter_lines: iterable of lines to parse
384
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
387
iter_lines = iter_lines_handle_nl(iter_lines)
389
(orig_name, mod_name) = get_patch_names(iter_lines)
390
except BinaryFiles as e:
391
return BinaryPatch(e.orig_name, e.mod_name)
393
patch = Patch(orig_name, mod_name)
394
for hunk in iter_hunks(iter_lines, allow_dirty):
395
patch.hunks.append(hunk)
399
def iter_file_patch(iter_lines, allow_dirty=False, keep_dirty=False):
401
:arg iter_lines: iterable of lines to parse for patches
402
:kwarg allow_dirty: If True, allow comments and other non-patch text
403
before the first patch. Note that the algorithm here can only find
404
such text before any patches have been found. Comments after the
405
first patch are stripped away in iter_hunks() if it is also passed
406
allow_dirty=True. Default False.
408
# FIXME: Docstring is not quite true. We allow certain comments no
409
# matter what, If they startwith '===', '***', or '#' Someone should
410
# reexamine this logic and decide if we should include those in
411
# allow_dirty or restrict those to only being before the patch is found
412
# (as allow_dirty does).
413
regex = re.compile(binary_files_re)
419
for line in iter_lines:
420
if line.startswith(b'=== '):
421
if len(saved_lines) > 0:
422
if keep_dirty and len(dirty_head) > 0:
423
yield {'saved_lines': saved_lines,
424
'dirty_head': dirty_head}
429
dirty_head.append(line)
431
if line.startswith(b'*** '):
433
if line.startswith(b'#'):
436
if line.startswith(b'-') or line.startswith(b' '):
438
elif line.startswith(b'--- ') or regex.match(line):
439
if allow_dirty and beginning:
440
# Patches can have "junk" at the beginning
441
# Stripping junk from the end of patches is handled when we
444
elif len(saved_lines) > 0:
445
if keep_dirty and len(dirty_head) > 0:
446
yield {'saved_lines': saved_lines,
447
'dirty_head': dirty_head}
452
elif line.startswith(b'@@'):
453
hunk = hunk_from_header(line)
454
orig_range = hunk.orig_range
455
saved_lines.append(line)
456
if len(saved_lines) > 0:
457
if keep_dirty and len(dirty_head) > 0:
458
yield {'saved_lines': saved_lines,
459
'dirty_head': dirty_head}
464
def iter_lines_handle_nl(iter_lines):
466
Iterates through lines, ensuring that lines that originally had no
467
terminating \n are produced without one. This transformation may be
468
applied at any point up until hunk line parsing, and is safe to apply
472
for line in iter_lines:
474
if not last_line.endswith(b'\n'):
475
raise AssertionError()
476
last_line = last_line[:-1]
478
if last_line is not None:
481
if last_line is not None:
485
def parse_patches(iter_lines, allow_dirty=False, keep_dirty=False):
487
:arg iter_lines: iterable of lines to parse for patches
488
:kwarg allow_dirty: If True, allow text that's not part of the patch at
489
selected places. This includes comments before and after a patch
490
for instance. Default False.
491
:kwarg keep_dirty: If True, returns a dict of patches with dirty headers.
494
for patch_lines in iter_file_patch(iter_lines, allow_dirty, keep_dirty):
495
if 'dirty_head' in patch_lines:
496
yield ({'patch': parse_patch(patch_lines['saved_lines'], allow_dirty),
497
'dirty_head': patch_lines['dirty_head']})
499
yield parse_patch(patch_lines, allow_dirty)
502
def difference_index(atext, btext):
503
"""Find the indext of the first character that differs between two texts
505
:param atext: The first text
507
:param btext: The second text
509
:return: The index, or None if there are no differences within the range
510
:rtype: int or NoneType
513
if len(btext) < length:
515
for i in range(length):
516
if atext[i] != btext[i]:
521
def iter_patched(orig_lines, patch_lines):
522
"""Iterate through a series of lines with a patch applied.
523
This handles a single file, and does exact, not fuzzy patching.
525
patch_lines = iter_lines_handle_nl(iter(patch_lines))
526
get_patch_names(patch_lines)
527
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
530
def iter_patched_from_hunks(orig_lines, hunks):
531
"""Iterate through a series of lines with a patch applied.
532
This handles a single file, and does exact, not fuzzy patching.
534
:param orig_lines: The unpatched lines.
535
:param hunks: An iterable of Hunk instances.
539
if orig_lines is not None:
540
orig_lines = iter(orig_lines)
542
while line_no < hunk.orig_pos:
543
orig_line = next(orig_lines)
546
for hunk_line in hunk.lines:
547
seen_patch.append(str(hunk_line))
548
if isinstance(hunk_line, InsertLine):
549
yield hunk_line.contents
550
elif isinstance(hunk_line, (ContextLine, RemoveLine)):
551
orig_line = next(orig_lines)
552
if orig_line != hunk_line.contents:
553
raise PatchConflict(line_no, orig_line,
554
b"".join(seen_patch))
555
if isinstance(hunk_line, ContextLine):
558
if not isinstance(hunk_line, RemoveLine):
559
raise AssertionError(hunk_line)
561
if orig_lines is not None:
562
for line in orig_lines: