1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
2
# <aaron.bentley@utoronto.ca>
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
from __future__ import absolute_import
27
binary_files_re = 'Binary files (.*) and (.*) differ\n'
30
class PatchSyntax(BzrError):
31
"""Base class for patch syntax errors."""
34
class BinaryFiles(BzrError):
36
_fmt = 'Binary files section encountered.'
38
def __init__(self, orig_name, mod_name):
39
self.orig_name = orig_name
40
self.mod_name = mod_name
43
class MalformedPatchHeader(PatchSyntax):
45
_fmt = "Malformed patch header. %(desc)s\n%(line)r"
47
def __init__(self, desc, line):
52
class MalformedLine(PatchSyntax):
54
_fmt = "Malformed line. %(desc)s\n%(line)r"
56
def __init__(self, desc, line):
61
class PatchConflict(BzrError):
63
_fmt = ('Text contents mismatch at line %(line_no)d. Original has '
64
'"%(orig_line)s", but patch says it should be "%(patch_line)s"')
66
def __init__(self, line_no, orig_line, patch_line):
67
self.line_no = line_no
68
self.orig_line = orig_line.rstrip('\n')
69
self.patch_line = patch_line.rstrip('\n')
72
class MalformedHunkHeader(PatchSyntax):
74
_fmt = "Malformed hunk header. %(desc)s\n%(line)r"
76
def __init__(self, desc, line):
81
def get_patch_names(iter_lines):
82
line = next(iter_lines)
84
match = re.match(binary_files_re, line)
86
raise BinaryFiles(match.group(1), match.group(2))
87
if not line.startswith("--- "):
88
raise MalformedPatchHeader("No orig name", line)
90
orig_name = line[4:].rstrip("\n")
92
raise MalformedPatchHeader("No orig line", "")
94
line = next(iter_lines)
95
if not line.startswith("+++ "):
96
raise PatchSyntax("No mod name")
98
mod_name = line[4:].rstrip("\n")
100
raise MalformedPatchHeader("No mod line", "")
101
return (orig_name, mod_name)
104
def parse_range(textrange):
105
"""Parse a patch range, handling the "1" special-case
107
:param textrange: The text to parse
109
:return: the position and range, as a tuple
112
tmp = textrange.split(',')
123
def hunk_from_header(line):
125
matches = re.match(r'\@\@ ([^@]*) \@\@( (.*))?\n', line)
127
raise MalformedHunkHeader("Does not match format.", line)
129
(orig, mod) = matches.group(1).split(" ")
130
except (ValueError, IndexError) as e:
131
raise MalformedHunkHeader(str(e), line)
132
if not orig.startswith('-') or not mod.startswith('+'):
133
raise MalformedHunkHeader("Positions don't start with + or -.", line)
135
(orig_pos, orig_range) = parse_range(orig[1:])
136
(mod_pos, mod_range) = parse_range(mod[1:])
137
except (ValueError, IndexError) as e:
138
raise MalformedHunkHeader(str(e), line)
139
if mod_range < 0 or orig_range < 0:
140
raise MalformedHunkHeader("Hunk range is negative", line)
141
tail = matches.group(3)
142
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
146
def __init__(self, contents):
147
self.contents = contents
149
def get_str(self, leadchar):
150
if self.contents == "\n" and leadchar == " " and False:
152
if not self.contents.endswith('\n'):
153
terminator = '\n' + NO_NL
156
return leadchar + self.contents + terminator
159
class ContextLine(HunkLine):
160
def __init__(self, contents):
161
HunkLine.__init__(self, contents)
164
return self.get_str(" ")
167
class InsertLine(HunkLine):
168
def __init__(self, contents):
169
HunkLine.__init__(self, contents)
172
return self.get_str("+")
175
class RemoveLine(HunkLine):
176
def __init__(self, contents):
177
HunkLine.__init__(self, contents)
180
return self.get_str("-")
182
NO_NL = '\\ No newline at end of file\n'
183
__pychecker__="no-returnvalues"
185
def parse_line(line):
186
if line.startswith("\n"):
187
return ContextLine(line)
188
elif line.startswith(" "):
189
return ContextLine(line[1:])
190
elif line.startswith("+"):
191
return InsertLine(line[1:])
192
elif line.startswith("-"):
193
return RemoveLine(line[1:])
195
raise MalformedLine("Unknown line type", line)
200
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
201
self.orig_pos = orig_pos
202
self.orig_range = orig_range
203
self.mod_pos = mod_pos
204
self.mod_range = mod_range
208
def get_header(self):
209
if self.tail is None:
212
tail_str = ' ' + self.tail
213
return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
215
self.range_str(self.mod_pos,
219
def range_str(self, pos, range):
220
"""Return a file range, special-casing for 1-line files.
222
:param pos: The position in the file
224
:range: The range in the file
226
:return: a string in the format 1,4 except when range == pos == 1
231
return "%i,%i" % (pos, range)
234
lines = [self.get_header()]
235
for line in self.lines:
236
lines.append(str(line))
237
return "".join(lines)
239
def shift_to_mod(self, pos):
240
if pos < self.orig_pos-1:
242
elif pos > self.orig_pos+self.orig_range:
243
return self.mod_range - self.orig_range
245
return self.shift_to_mod_lines(pos)
247
def shift_to_mod_lines(self, pos):
248
position = self.orig_pos-1
250
for line in self.lines:
251
if isinstance(line, InsertLine):
253
elif isinstance(line, RemoveLine):
258
elif isinstance(line, ContextLine):
265
def iter_hunks(iter_lines, allow_dirty=False):
267
:arg iter_lines: iterable of lines to parse for hunks
268
:kwarg allow_dirty: If True, when we encounter something that is not
269
a hunk header when we're looking for one, assume the rest of the lines
270
are not part of the patch (comments or other junk). Default False
273
for line in iter_lines:
282
hunk = hunk_from_header(line)
283
except MalformedHunkHeader:
285
# If the line isn't a hunk header, then we've reached the end
286
# of this patch and there's "junk" at the end. Ignore the
287
# rest of this patch.
292
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
293
hunk_line = parse_line(next(iter_lines))
294
hunk.lines.append(hunk_line)
295
if isinstance(hunk_line, (RemoveLine, ContextLine)):
297
if isinstance(hunk_line, (InsertLine, ContextLine)):
303
class BinaryPatch(object):
304
def __init__(self, oldname, newname):
305
self.oldname = oldname
306
self.newname = newname
309
return 'Binary files %s and %s differ\n' % (self.oldname, self.newname)
312
class Patch(BinaryPatch):
314
def __init__(self, oldname, newname):
315
BinaryPatch.__init__(self, oldname, newname)
319
ret = self.get_header()
320
ret += "".join([str(h) for h in self.hunks])
323
def get_header(self):
324
return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
326
def stats_values(self):
327
"""Calculate the number of inserts and removes."""
330
for hunk in self.hunks:
331
for line in hunk.lines:
332
if isinstance(line, InsertLine):
334
elif isinstance(line, RemoveLine):
336
return (inserts, removes, len(self.hunks))
339
"""Return a string of patch statistics"""
340
return "%i inserts, %i removes in %i hunks" % \
343
def pos_in_mod(self, position):
345
for hunk in self.hunks:
346
shift = hunk.shift_to_mod(position)
352
def iter_inserted(self):
353
"""Iteraties through inserted lines
355
:return: Pair of line number, line
356
:rtype: iterator of (int, InsertLine)
358
for hunk in self.hunks:
359
pos = hunk.mod_pos - 1;
360
for line in hunk.lines:
361
if isinstance(line, InsertLine):
364
if isinstance(line, ContextLine):
367
def parse_patch(iter_lines, allow_dirty=False):
369
:arg iter_lines: iterable of lines to parse
370
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
373
iter_lines = iter_lines_handle_nl(iter_lines)
375
(orig_name, mod_name) = get_patch_names(iter_lines)
376
except BinaryFiles as e:
377
return BinaryPatch(e.orig_name, e.mod_name)
379
patch = Patch(orig_name, mod_name)
380
for hunk in iter_hunks(iter_lines, allow_dirty):
381
patch.hunks.append(hunk)
385
def iter_file_patch(iter_lines, allow_dirty=False, keep_dirty=False):
387
:arg iter_lines: iterable of lines to parse for patches
388
:kwarg allow_dirty: If True, allow comments and other non-patch text
389
before the first patch. Note that the algorithm here can only find
390
such text before any patches have been found. Comments after the
391
first patch are stripped away in iter_hunks() if it is also passed
392
allow_dirty=True. Default False.
394
### FIXME: Docstring is not quite true. We allow certain comments no
395
# matter what, If they startwith '===', '***', or '#' Someone should
396
# reexamine this logic and decide if we should include those in
397
# allow_dirty or restrict those to only being before the patch is found
398
# (as allow_dirty does).
399
regex = re.compile(binary_files_re)
405
for line in iter_lines:
406
if line.startswith('=== '):
407
if len(saved_lines) > 0:
408
if keep_dirty and len(dirty_head) > 0:
409
yield {'saved_lines': saved_lines,
410
'dirty_head': dirty_head}
415
dirty_head.append(line)
417
if line.startswith('*** '):
419
if line.startswith('#'):
422
if line.startswith('-') or line.startswith(' '):
424
elif line.startswith('--- ') or regex.match(line):
425
if allow_dirty and beginning:
426
# Patches can have "junk" at the beginning
427
# Stripping junk from the end of patches is handled when we
430
elif len(saved_lines) > 0:
431
if keep_dirty and len(dirty_head) > 0:
432
yield {'saved_lines': saved_lines,
433
'dirty_head': dirty_head}
438
elif line.startswith('@@'):
439
hunk = hunk_from_header(line)
440
orig_range = hunk.orig_range
441
saved_lines.append(line)
442
if len(saved_lines) > 0:
443
if keep_dirty and len(dirty_head) > 0:
444
yield {'saved_lines': saved_lines,
445
'dirty_head': dirty_head}
450
def iter_lines_handle_nl(iter_lines):
452
Iterates through lines, ensuring that lines that originally had no
453
terminating \n are produced without one. This transformation may be
454
applied at any point up until hunk line parsing, and is safe to apply
458
for line in iter_lines:
460
if not last_line.endswith('\n'):
461
raise AssertionError()
462
last_line = last_line[:-1]
464
if last_line is not None:
467
if last_line is not None:
471
def parse_patches(iter_lines, allow_dirty=False, keep_dirty=False):
473
:arg iter_lines: iterable of lines to parse for patches
474
:kwarg allow_dirty: If True, allow text that's not part of the patch at
475
selected places. This includes comments before and after a patch
476
for instance. Default False.
477
:kwarg keep_dirty: If True, returns a dict of patches with dirty headers.
481
for patch_lines in iter_file_patch(iter_lines, allow_dirty, keep_dirty):
482
if 'dirty_head' in patch_lines:
483
patches.append({'patch': parse_patch(
484
patch_lines['saved_lines'], allow_dirty),
485
'dirty_head': patch_lines['dirty_head']})
487
patches.append(parse_patch(patch_lines, allow_dirty))
491
def difference_index(atext, btext):
492
"""Find the indext of the first character that differs between two texts
494
:param atext: The first text
496
:param btext: The second text
498
:return: The index, or None if there are no differences within the range
499
:rtype: int or NoneType
502
if len(btext) < length:
504
for i in range(length):
505
if atext[i] != btext[i]:
510
def iter_patched(orig_lines, patch_lines):
511
"""Iterate through a series of lines with a patch applied.
512
This handles a single file, and does exact, not fuzzy patching.
514
patch_lines = iter_lines_handle_nl(iter(patch_lines))
515
get_patch_names(patch_lines)
516
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
519
def iter_patched_from_hunks(orig_lines, hunks):
520
"""Iterate through a series of lines with a patch applied.
521
This handles a single file, and does exact, not fuzzy patching.
523
:param orig_lines: The unpatched lines.
524
:param hunks: An iterable of Hunk instances.
528
if orig_lines is not None:
529
orig_lines = iter(orig_lines)
531
while line_no < hunk.orig_pos:
532
orig_line = next(orig_lines)
535
for hunk_line in hunk.lines:
536
seen_patch.append(str(hunk_line))
537
if isinstance(hunk_line, InsertLine):
538
yield hunk_line.contents
539
elif isinstance(hunk_line, (ContextLine, RemoveLine)):
540
orig_line = next(orig_lines)
541
if orig_line != hunk_line.contents:
542
raise PatchConflict(line_no, orig_line, "".join(seen_patch))
543
if isinstance(hunk_line, ContextLine):
546
if not isinstance(hunk_line, RemoveLine):
547
raise AssertionError(hunk_line)
549
if orig_lines is not None:
550
for line in orig_lines: