1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
2
# <aaron.bentley@utoronto.ca>
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
from __future__ import absolute_import
28
binary_files_re = b'Binary files (.*) and (.*) differ\n'
31
class PatchSyntax(BzrError):
32
"""Base class for patch syntax errors."""
35
class BinaryFiles(BzrError):
37
_fmt = 'Binary files section encountered.'
39
def __init__(self, orig_name, mod_name):
40
self.orig_name = orig_name
41
self.mod_name = mod_name
44
class MalformedPatchHeader(PatchSyntax):
46
_fmt = "Malformed patch header. %(desc)s\n%(line)r"
48
def __init__(self, desc, line):
53
class MalformedLine(PatchSyntax):
55
_fmt = "Malformed line. %(desc)s\n%(line)r"
57
def __init__(self, desc, line):
62
class PatchConflict(BzrError):
64
_fmt = ('Text contents mismatch at line %(line_no)d. Original has '
65
'"%(orig_line)s", but patch says it should be "%(patch_line)s"')
67
def __init__(self, line_no, orig_line, patch_line):
68
self.line_no = line_no
69
self.orig_line = orig_line.rstrip('\n')
70
self.patch_line = patch_line.rstrip('\n')
73
class MalformedHunkHeader(PatchSyntax):
75
_fmt = "Malformed hunk header. %(desc)s\n%(line)r"
77
def __init__(self, desc, line):
82
def get_patch_names(iter_lines):
83
line = next(iter_lines)
85
match = re.match(binary_files_re, line)
87
raise BinaryFiles(match.group(1), match.group(2))
88
if not line.startswith(b"--- "):
89
raise MalformedPatchHeader("No orig name", line)
91
orig_name = line[4:].rstrip(b"\n")
93
raise MalformedPatchHeader("No orig line", "")
95
line = next(iter_lines)
96
if not line.startswith(b"+++ "):
97
raise PatchSyntax("No mod name")
99
mod_name = line[4:].rstrip(b"\n")
100
except StopIteration:
101
raise MalformedPatchHeader("No mod line", "")
102
return (orig_name, mod_name)
105
def parse_range(textrange):
106
"""Parse a patch range, handling the "1" special-case
108
:param textrange: The text to parse
110
:return: the position and range, as a tuple
113
tmp = textrange.split(b',')
124
def hunk_from_header(line):
126
matches = re.match(br'\@\@ ([^@]*) \@\@( (.*))?\n', line)
128
raise MalformedHunkHeader("Does not match format.", line)
130
(orig, mod) = matches.group(1).split(b" ")
131
except (ValueError, IndexError) as e:
132
raise MalformedHunkHeader(str(e), line)
133
if not orig.startswith(b'-') or not mod.startswith(b'+'):
134
raise MalformedHunkHeader("Positions don't start with + or -.", line)
136
(orig_pos, orig_range) = parse_range(orig[1:])
137
(mod_pos, mod_range) = parse_range(mod[1:])
138
except (ValueError, IndexError) as e:
139
raise MalformedHunkHeader(str(e), line)
140
if mod_range < 0 or orig_range < 0:
141
raise MalformedHunkHeader("Hunk range is negative", line)
142
tail = matches.group(3)
143
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
146
class HunkLine(object):
148
def __init__(self, contents):
149
self.contents = contents
151
def get_str(self, leadchar):
152
if self.contents == b"\n" and leadchar == b" " and False:
154
if not self.contents.endswith(b'\n'):
155
terminator = b'\n' + NO_NL
158
return leadchar + self.contents + terminator
161
raise NotImplementedError
164
class ContextLine(HunkLine):
166
def __init__(self, contents):
167
HunkLine.__init__(self, contents)
170
return self.get_str(b" ")
173
class InsertLine(HunkLine):
174
def __init__(self, contents):
175
HunkLine.__init__(self, contents)
178
return self.get_str(b"+")
181
class RemoveLine(HunkLine):
182
def __init__(self, contents):
183
HunkLine.__init__(self, contents)
186
return self.get_str(b"-")
188
NO_NL = b'\\ No newline at end of file\n'
189
__pychecker__="no-returnvalues"
191
def parse_line(line):
192
if line.startswith(b"\n"):
193
return ContextLine(line)
194
elif line.startswith(b" "):
195
return ContextLine(line[1:])
196
elif line.startswith(b"+"):
197
return InsertLine(line[1:])
198
elif line.startswith(b"-"):
199
return RemoveLine(line[1:])
201
raise MalformedLine("Unknown line type", line)
207
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
208
self.orig_pos = orig_pos
209
self.orig_range = orig_range
210
self.mod_pos = mod_pos
211
self.mod_range = mod_range
215
def get_header(self):
216
if self.tail is None:
219
tail_str = b' ' + self.tail
220
return b"@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
222
self.range_str(self.mod_pos,
226
def range_str(self, pos, range):
227
"""Return a file range, special-casing for 1-line files.
229
:param pos: The position in the file
231
:range: The range in the file
233
:return: a string in the format 1,4 except when range == pos == 1
238
return b"%i,%i" % (pos, range)
241
lines = [self.get_header()]
242
for line in self.lines:
243
lines.append(line.as_bytes())
244
return b"".join(lines)
248
def shift_to_mod(self, pos):
249
if pos < self.orig_pos-1:
251
elif pos > self.orig_pos+self.orig_range:
252
return self.mod_range - self.orig_range
254
return self.shift_to_mod_lines(pos)
256
def shift_to_mod_lines(self, pos):
257
position = self.orig_pos-1
259
for line in self.lines:
260
if isinstance(line, InsertLine):
262
elif isinstance(line, RemoveLine):
267
elif isinstance(line, ContextLine):
274
def iter_hunks(iter_lines, allow_dirty=False):
276
:arg iter_lines: iterable of lines to parse for hunks
277
:kwarg allow_dirty: If True, when we encounter something that is not
278
a hunk header when we're looking for one, assume the rest of the lines
279
are not part of the patch (comments or other junk). Default False
282
for line in iter_lines:
291
hunk = hunk_from_header(line)
292
except MalformedHunkHeader:
294
# If the line isn't a hunk header, then we've reached the end
295
# of this patch and there's "junk" at the end. Ignore the
296
# rest of this patch.
301
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
302
hunk_line = parse_line(next(iter_lines))
303
hunk.lines.append(hunk_line)
304
if isinstance(hunk_line, (RemoveLine, ContextLine)):
306
if isinstance(hunk_line, (InsertLine, ContextLine)):
312
class BinaryPatch(object):
314
def __init__(self, oldname, newname):
315
self.oldname = oldname
316
self.newname = newname
319
return b'Binary files %s and %s differ\n' % (self.oldname, self.newname)
322
class Patch(BinaryPatch):
324
def __init__(self, oldname, newname):
325
BinaryPatch.__init__(self, oldname, newname)
329
ret = self.get_header()
330
ret += b"".join([h.as_bytes() for h in self.hunks])
333
def get_header(self):
334
return b"--- %s\n+++ %s\n" % (self.oldname, self.newname)
336
def stats_values(self):
337
"""Calculate the number of inserts and removes."""
340
for hunk in self.hunks:
341
for line in hunk.lines:
342
if isinstance(line, InsertLine):
344
elif isinstance(line, RemoveLine):
346
return (inserts, removes, len(self.hunks))
349
"""Return a string of patch statistics"""
350
return "%i inserts, %i removes in %i hunks" % \
353
def pos_in_mod(self, position):
355
for hunk in self.hunks:
356
shift = hunk.shift_to_mod(position)
362
def iter_inserted(self):
363
"""Iteraties through inserted lines
365
:return: Pair of line number, line
366
:rtype: iterator of (int, InsertLine)
368
for hunk in self.hunks:
369
pos = hunk.mod_pos - 1;
370
for line in hunk.lines:
371
if isinstance(line, InsertLine):
374
if isinstance(line, ContextLine):
378
def parse_patch(iter_lines, allow_dirty=False):
380
:arg iter_lines: iterable of lines to parse
381
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
384
iter_lines = iter_lines_handle_nl(iter_lines)
386
(orig_name, mod_name) = get_patch_names(iter_lines)
387
except BinaryFiles as e:
388
return BinaryPatch(e.orig_name, e.mod_name)
390
patch = Patch(orig_name, mod_name)
391
for hunk in iter_hunks(iter_lines, allow_dirty):
392
patch.hunks.append(hunk)
396
def iter_file_patch(iter_lines, allow_dirty=False, keep_dirty=False):
398
:arg iter_lines: iterable of lines to parse for patches
399
:kwarg allow_dirty: If True, allow comments and other non-patch text
400
before the first patch. Note that the algorithm here can only find
401
such text before any patches have been found. Comments after the
402
first patch are stripped away in iter_hunks() if it is also passed
403
allow_dirty=True. Default False.
405
### FIXME: Docstring is not quite true. We allow certain comments no
406
# matter what, If they startwith '===', '***', or '#' Someone should
407
# reexamine this logic and decide if we should include those in
408
# allow_dirty or restrict those to only being before the patch is found
409
# (as allow_dirty does).
410
regex = re.compile(binary_files_re)
416
for line in iter_lines:
417
if line.startswith(b'=== '):
418
if len(saved_lines) > 0:
419
if keep_dirty and len(dirty_head) > 0:
420
yield {'saved_lines': saved_lines,
421
'dirty_head': dirty_head}
426
dirty_head.append(line)
428
if line.startswith(b'*** '):
430
if line.startswith(b'#'):
433
if line.startswith(b'-') or line.startswith(b' '):
435
elif line.startswith(b'--- ') or regex.match(line):
436
if allow_dirty and beginning:
437
# Patches can have "junk" at the beginning
438
# Stripping junk from the end of patches is handled when we
441
elif len(saved_lines) > 0:
442
if keep_dirty and len(dirty_head) > 0:
443
yield {'saved_lines': saved_lines,
444
'dirty_head': dirty_head}
449
elif line.startswith(b'@@'):
450
hunk = hunk_from_header(line)
451
orig_range = hunk.orig_range
452
saved_lines.append(line)
453
if len(saved_lines) > 0:
454
if keep_dirty and len(dirty_head) > 0:
455
yield {'saved_lines': saved_lines,
456
'dirty_head': dirty_head}
461
def iter_lines_handle_nl(iter_lines):
463
Iterates through lines, ensuring that lines that originally had no
464
terminating \n are produced without one. This transformation may be
465
applied at any point up until hunk line parsing, and is safe to apply
469
for line in iter_lines:
471
if not last_line.endswith(b'\n'):
472
raise AssertionError()
473
last_line = last_line[:-1]
475
if last_line is not None:
478
if last_line is not None:
482
def parse_patches(iter_lines, allow_dirty=False, keep_dirty=False):
484
:arg iter_lines: iterable of lines to parse for patches
485
:kwarg allow_dirty: If True, allow text that's not part of the patch at
486
selected places. This includes comments before and after a patch
487
for instance. Default False.
488
:kwarg keep_dirty: If True, returns a dict of patches with dirty headers.
491
for patch_lines in iter_file_patch(iter_lines, allow_dirty, keep_dirty):
492
if 'dirty_head' in patch_lines:
493
yield ({'patch': parse_patch(patch_lines['saved_lines'], allow_dirty),
494
'dirty_head': patch_lines['dirty_head']})
496
yield parse_patch(patch_lines, allow_dirty)
499
def difference_index(atext, btext):
500
"""Find the indext of the first character that differs between two texts
502
:param atext: The first text
504
:param btext: The second text
506
:return: The index, or None if there are no differences within the range
507
:rtype: int or NoneType
510
if len(btext) < length:
512
for i in range(length):
513
if atext[i] != btext[i]:
518
def iter_patched(orig_lines, patch_lines):
519
"""Iterate through a series of lines with a patch applied.
520
This handles a single file, and does exact, not fuzzy patching.
522
patch_lines = iter_lines_handle_nl(iter(patch_lines))
523
get_patch_names(patch_lines)
524
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
527
def iter_patched_from_hunks(orig_lines, hunks):
528
"""Iterate through a series of lines with a patch applied.
529
This handles a single file, and does exact, not fuzzy patching.
531
:param orig_lines: The unpatched lines.
532
:param hunks: An iterable of Hunk instances.
536
if orig_lines is not None:
537
orig_lines = iter(orig_lines)
539
while line_no < hunk.orig_pos:
540
orig_line = next(orig_lines)
543
for hunk_line in hunk.lines:
544
seen_patch.append(str(hunk_line))
545
if isinstance(hunk_line, InsertLine):
546
yield hunk_line.contents
547
elif isinstance(hunk_line, (ContextLine, RemoveLine)):
548
orig_line = next(orig_lines)
549
if orig_line != hunk_line.contents:
550
raise PatchConflict(line_no, orig_line, b"".join(seen_patch))
551
if isinstance(hunk_line, ContextLine):
554
if not isinstance(hunk_line, RemoveLine):
555
raise AssertionError(hunk_line)
557
if orig_lines is not None:
558
for line in orig_lines: