1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
2
# <aaron.bentley@utoronto.ca>
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
from __future__ import absolute_import
27
binary_files_re = b'Binary files (.*) and (.*) differ\n'
30
class PatchSyntax(BzrError):
31
"""Base class for patch syntax errors."""
34
class BinaryFiles(BzrError):
36
_fmt = 'Binary files section encountered.'
38
def __init__(self, orig_name, mod_name):
39
self.orig_name = orig_name
40
self.mod_name = mod_name
43
class MalformedPatchHeader(PatchSyntax):
45
_fmt = "Malformed patch header. %(desc)s\n%(line)r"
47
def __init__(self, desc, line):
52
class MalformedLine(PatchSyntax):
54
_fmt = "Malformed line. %(desc)s\n%(line)r"
56
def __init__(self, desc, line):
61
class PatchConflict(BzrError):
63
_fmt = ('Text contents mismatch at line %(line_no)d. Original has '
64
'"%(orig_line)s", but patch says it should be "%(patch_line)s"')
66
def __init__(self, line_no, orig_line, patch_line):
67
self.line_no = line_no
68
self.orig_line = orig_line.rstrip('\n')
69
self.patch_line = patch_line.rstrip('\n')
72
class MalformedHunkHeader(PatchSyntax):
74
_fmt = "Malformed hunk header. %(desc)s\n%(line)r"
76
def __init__(self, desc, line):
81
def get_patch_names(iter_lines):
82
line = next(iter_lines)
84
match = re.match(binary_files_re, line)
86
raise BinaryFiles(match.group(1), match.group(2))
87
if not line.startswith(b"--- "):
88
raise MalformedPatchHeader("No orig name", line)
90
orig_name = line[4:].rstrip(b"\n")
92
raise MalformedPatchHeader("No orig line", "")
94
line = next(iter_lines)
95
if not line.startswith(b"+++ "):
96
raise PatchSyntax("No mod name")
98
mod_name = line[4:].rstrip(b"\n")
100
raise MalformedPatchHeader("No mod line", "")
101
return (orig_name, mod_name)
104
def parse_range(textrange):
105
"""Parse a patch range, handling the "1" special-case
107
:param textrange: The text to parse
109
:return: the position and range, as a tuple
112
tmp = textrange.split(b',')
123
def hunk_from_header(line):
125
matches = re.match(br'\@\@ ([^@]*) \@\@( (.*))?\n', line)
127
raise MalformedHunkHeader("Does not match format.", line)
129
(orig, mod) = matches.group(1).split(b" ")
130
except (ValueError, IndexError) as e:
131
raise MalformedHunkHeader(str(e), line)
132
if not orig.startswith(b'-') or not mod.startswith(b'+'):
133
raise MalformedHunkHeader("Positions don't start with + or -.", line)
135
(orig_pos, orig_range) = parse_range(orig[1:])
136
(mod_pos, mod_range) = parse_range(mod[1:])
137
except (ValueError, IndexError) as e:
138
raise MalformedHunkHeader(str(e), line)
139
if mod_range < 0 or orig_range < 0:
140
raise MalformedHunkHeader("Hunk range is negative", line)
141
tail = matches.group(3)
142
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
145
class HunkLine(object):
147
def __init__(self, contents):
148
self.contents = contents
150
def get_str(self, leadchar):
151
if self.contents == b"\n" and leadchar == b" " and False:
153
if not self.contents.endswith(b'\n'):
154
terminator = b'\n' + NO_NL
157
return leadchar + self.contents + terminator
160
raise NotImplementedError
163
class ContextLine(HunkLine):
165
def __init__(self, contents):
166
HunkLine.__init__(self, contents)
169
return self.get_str(b" ")
172
class InsertLine(HunkLine):
173
def __init__(self, contents):
174
HunkLine.__init__(self, contents)
177
return self.get_str(b"+")
180
class RemoveLine(HunkLine):
181
def __init__(self, contents):
182
HunkLine.__init__(self, contents)
185
return self.get_str(b"-")
187
NO_NL = b'\\ No newline at end of file\n'
188
__pychecker__="no-returnvalues"
190
def parse_line(line):
191
if line.startswith(b"\n"):
192
return ContextLine(line)
193
elif line.startswith(b" "):
194
return ContextLine(line[1:])
195
elif line.startswith(b"+"):
196
return InsertLine(line[1:])
197
elif line.startswith(b"-"):
198
return RemoveLine(line[1:])
200
raise MalformedLine("Unknown line type", line)
206
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
207
self.orig_pos = orig_pos
208
self.orig_range = orig_range
209
self.mod_pos = mod_pos
210
self.mod_range = mod_range
214
def get_header(self):
215
if self.tail is None:
218
tail_str = b' ' + self.tail
219
return b"@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
221
self.range_str(self.mod_pos,
225
def range_str(self, pos, range):
226
"""Return a file range, special-casing for 1-line files.
228
:param pos: The position in the file
230
:range: The range in the file
232
:return: a string in the format 1,4 except when range == pos == 1
237
return b"%i,%i" % (pos, range)
240
lines = [self.get_header()]
241
for line in self.lines:
242
lines.append(line.as_bytes())
243
return b"".join(lines)
247
def shift_to_mod(self, pos):
248
if pos < self.orig_pos-1:
250
elif pos > self.orig_pos+self.orig_range:
251
return self.mod_range - self.orig_range
253
return self.shift_to_mod_lines(pos)
255
def shift_to_mod_lines(self, pos):
256
position = self.orig_pos-1
258
for line in self.lines:
259
if isinstance(line, InsertLine):
261
elif isinstance(line, RemoveLine):
266
elif isinstance(line, ContextLine):
273
def iter_hunks(iter_lines, allow_dirty=False):
275
:arg iter_lines: iterable of lines to parse for hunks
276
:kwarg allow_dirty: If True, when we encounter something that is not
277
a hunk header when we're looking for one, assume the rest of the lines
278
are not part of the patch (comments or other junk). Default False
281
for line in iter_lines:
290
hunk = hunk_from_header(line)
291
except MalformedHunkHeader:
293
# If the line isn't a hunk header, then we've reached the end
294
# of this patch and there's "junk" at the end. Ignore the
295
# rest of this patch.
300
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
301
hunk_line = parse_line(next(iter_lines))
302
hunk.lines.append(hunk_line)
303
if isinstance(hunk_line, (RemoveLine, ContextLine)):
305
if isinstance(hunk_line, (InsertLine, ContextLine)):
311
class BinaryPatch(object):
313
def __init__(self, oldname, newname):
314
self.oldname = oldname
315
self.newname = newname
318
return b'Binary files %s and %s differ\n' % (self.oldname, self.newname)
321
class Patch(BinaryPatch):
323
def __init__(self, oldname, newname):
324
BinaryPatch.__init__(self, oldname, newname)
328
ret = self.get_header()
329
ret += b"".join([h.as_bytes() for h in self.hunks])
332
def get_header(self):
333
return b"--- %s\n+++ %s\n" % (self.oldname, self.newname)
335
def stats_values(self):
336
"""Calculate the number of inserts and removes."""
339
for hunk in self.hunks:
340
for line in hunk.lines:
341
if isinstance(line, InsertLine):
343
elif isinstance(line, RemoveLine):
345
return (inserts, removes, len(self.hunks))
348
"""Return a string of patch statistics"""
349
return "%i inserts, %i removes in %i hunks" % \
352
def pos_in_mod(self, position):
354
for hunk in self.hunks:
355
shift = hunk.shift_to_mod(position)
361
def iter_inserted(self):
362
"""Iteraties through inserted lines
364
:return: Pair of line number, line
365
:rtype: iterator of (int, InsertLine)
367
for hunk in self.hunks:
368
pos = hunk.mod_pos - 1;
369
for line in hunk.lines:
370
if isinstance(line, InsertLine):
373
if isinstance(line, ContextLine):
377
def parse_patch(iter_lines, allow_dirty=False):
379
:arg iter_lines: iterable of lines to parse
380
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
383
iter_lines = iter_lines_handle_nl(iter_lines)
385
(orig_name, mod_name) = get_patch_names(iter_lines)
386
except BinaryFiles as e:
387
return BinaryPatch(e.orig_name, e.mod_name)
389
patch = Patch(orig_name, mod_name)
390
for hunk in iter_hunks(iter_lines, allow_dirty):
391
patch.hunks.append(hunk)
395
def iter_file_patch(iter_lines, allow_dirty=False, keep_dirty=False):
397
:arg iter_lines: iterable of lines to parse for patches
398
:kwarg allow_dirty: If True, allow comments and other non-patch text
399
before the first patch. Note that the algorithm here can only find
400
such text before any patches have been found. Comments after the
401
first patch are stripped away in iter_hunks() if it is also passed
402
allow_dirty=True. Default False.
404
### FIXME: Docstring is not quite true. We allow certain comments no
405
# matter what, If they startwith '===', '***', or '#' Someone should
406
# reexamine this logic and decide if we should include those in
407
# allow_dirty or restrict those to only being before the patch is found
408
# (as allow_dirty does).
409
regex = re.compile(binary_files_re)
415
for line in iter_lines:
416
if line.startswith(b'=== '):
417
if len(saved_lines) > 0:
418
if keep_dirty and len(dirty_head) > 0:
419
yield {'saved_lines': saved_lines,
420
'dirty_head': dirty_head}
425
dirty_head.append(line)
427
if line.startswith(b'*** '):
429
if line.startswith(b'#'):
432
if line.startswith(b'-') or line.startswith(b' '):
434
elif line.startswith(b'--- ') or regex.match(line):
435
if allow_dirty and beginning:
436
# Patches can have "junk" at the beginning
437
# Stripping junk from the end of patches is handled when we
440
elif len(saved_lines) > 0:
441
if keep_dirty and len(dirty_head) > 0:
442
yield {'saved_lines': saved_lines,
443
'dirty_head': dirty_head}
448
elif line.startswith(b'@@'):
449
hunk = hunk_from_header(line)
450
orig_range = hunk.orig_range
451
saved_lines.append(line)
452
if len(saved_lines) > 0:
453
if keep_dirty and len(dirty_head) > 0:
454
yield {'saved_lines': saved_lines,
455
'dirty_head': dirty_head}
460
def iter_lines_handle_nl(iter_lines):
462
Iterates through lines, ensuring that lines that originally had no
463
terminating \n are produced without one. This transformation may be
464
applied at any point up until hunk line parsing, and is safe to apply
468
for line in iter_lines:
470
if not last_line.endswith(b'\n'):
471
raise AssertionError()
472
last_line = last_line[:-1]
474
if last_line is not None:
477
if last_line is not None:
481
def parse_patches(iter_lines, allow_dirty=False, keep_dirty=False):
483
:arg iter_lines: iterable of lines to parse for patches
484
:kwarg allow_dirty: If True, allow text that's not part of the patch at
485
selected places. This includes comments before and after a patch
486
for instance. Default False.
487
:kwarg keep_dirty: If True, returns a dict of patches with dirty headers.
490
for patch_lines in iter_file_patch(iter_lines, allow_dirty, keep_dirty):
491
if 'dirty_head' in patch_lines:
492
yield ({'patch': parse_patch(patch_lines['saved_lines'], allow_dirty),
493
'dirty_head': patch_lines['dirty_head']})
495
yield parse_patch(patch_lines, allow_dirty)
498
def difference_index(atext, btext):
499
"""Find the indext of the first character that differs between two texts
501
:param atext: The first text
503
:param btext: The second text
505
:return: The index, or None if there are no differences within the range
506
:rtype: int or NoneType
509
if len(btext) < length:
511
for i in range(length):
512
if atext[i] != btext[i]:
517
def iter_patched(orig_lines, patch_lines):
518
"""Iterate through a series of lines with a patch applied.
519
This handles a single file, and does exact, not fuzzy patching.
521
patch_lines = iter_lines_handle_nl(iter(patch_lines))
522
get_patch_names(patch_lines)
523
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
526
def iter_patched_from_hunks(orig_lines, hunks):
527
"""Iterate through a series of lines with a patch applied.
528
This handles a single file, and does exact, not fuzzy patching.
530
:param orig_lines: The unpatched lines.
531
:param hunks: An iterable of Hunk instances.
535
if orig_lines is not None:
536
orig_lines = iter(orig_lines)
538
while line_no < hunk.orig_pos:
539
orig_line = next(orig_lines)
542
for hunk_line in hunk.lines:
543
seen_patch.append(str(hunk_line))
544
if isinstance(hunk_line, InsertLine):
545
yield hunk_line.contents
546
elif isinstance(hunk_line, (ContextLine, RemoveLine)):
547
orig_line = next(orig_lines)
548
if orig_line != hunk_line.contents:
549
raise PatchConflict(line_no, orig_line, b"".join(seen_patch))
550
if isinstance(hunk_line, ContextLine):
553
if not isinstance(hunk_line, RemoveLine):
554
raise AssertionError(hunk_line)
556
if orig_lines is not None:
557
for line in orig_lines: