14
14
# You should have received a copy of the GNU General Public License
15
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26
binary_files_re = b'Binary files (.*) and (.*) differ\n'
29
class PatchSyntax(BzrError):
30
"""Base class for patch syntax errors."""
33
class BinaryFiles(BzrError):
35
_fmt = 'Binary files section encountered.'
37
def __init__(self, orig_name, mod_name):
38
self.orig_name = orig_name
39
self.mod_name = mod_name
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
class PatchSyntax(Exception):
20
def __init__(self, msg):
21
Exception.__init__(self, msg)
42
24
class MalformedPatchHeader(PatchSyntax):
44
_fmt = "Malformed patch header. %(desc)s\n%(line)r"
46
def __init__(self, desc, line):
25
def __init__(self, desc, line):
28
msg = "Malformed patch header. %s\n%r" % (self.desc, self.line)
29
PatchSyntax.__init__(self, msg)
32
class MalformedHunkHeader(PatchSyntax):
33
def __init__(self, desc, line):
36
msg = "Malformed hunk header. %s\n%r" % (self.desc, self.line)
37
PatchSyntax.__init__(self, msg)
51
40
class MalformedLine(PatchSyntax):
53
_fmt = "Malformed line. %(desc)s\n%(line)r"
55
41
def __init__(self, desc, line):
60
class PatchConflict(BzrError):
62
_fmt = ('Text contents mismatch at line %(line_no)d. Original has '
63
'"%(orig_line)s", but patch says it should be "%(patch_line)s"')
44
msg = "Malformed line. %s\n%s" % (self.desc, self.line)
45
PatchSyntax.__init__(self, msg)
48
class PatchConflict(Exception):
65
49
def __init__(self, line_no, orig_line, patch_line):
66
self.line_no = line_no
67
self.orig_line = orig_line.rstrip('\n')
68
self.patch_line = patch_line.rstrip('\n')
71
class MalformedHunkHeader(PatchSyntax):
73
_fmt = "Malformed hunk header. %(desc)s\n%(line)r"
75
def __init__(self, desc, line):
50
orig = orig_line.rstrip('\n')
51
patch = str(patch_line).rstrip('\n')
52
msg = 'Text contents mismatch at line %d. Original has "%s",'\
53
' but patch says it should be "%s"' % (line_no, orig, patch)
54
Exception.__init__(self, msg)
80
57
def get_patch_names(iter_lines):
81
line = next(iter_lines)
83
match = re.match(binary_files_re, line)
85
raise BinaryFiles(match.group(1), match.group(2))
86
if not line.startswith(b"--- "):
59
line = iter_lines.next()
60
if not line.startswith("--- "):
87
61
raise MalformedPatchHeader("No orig name", line)
89
orig_name = line[4:].rstrip(b"\n")
63
orig_name = line[4:].rstrip("\n")
90
64
except StopIteration:
91
65
raise MalformedPatchHeader("No orig line", "")
93
line = next(iter_lines)
94
if not line.startswith(b"+++ "):
67
line = iter_lines.next()
68
if not line.startswith("+++ "):
95
69
raise PatchSyntax("No mod name")
97
mod_name = line[4:].rstrip(b"\n")
71
mod_name = line[4:].rstrip("\n")
98
72
except StopIteration:
99
73
raise MalformedPatchHeader("No mod line", "")
100
74
return (orig_name, mod_name)
108
82
:return: the position and range, as a tuple
109
83
:rtype: (int, int)
111
tmp = textrange.split(b',')
85
tmp = textrange.split(',')
116
90
(pos, range) = tmp
118
92
range = int(range)
119
93
return (pos, range)
122
96
def hunk_from_header(line):
124
matches = re.match(br'\@\@ ([^@]*) \@\@( (.*))?\n', line)
126
raise MalformedHunkHeader("Does not match format.", line)
97
if not line.startswith("@@") or not line.endswith("@@\n") \
99
raise MalformedHunkHeader("Does not start and end with @@.", line)
128
(orig, mod) = matches.group(1).split(b" ")
129
except (ValueError, IndexError) as e:
101
(orig, mod) = line[3:-4].split(" ")
130
103
raise MalformedHunkHeader(str(e), line)
131
if not orig.startswith(b'-') or not mod.startswith(b'+'):
104
if not orig.startswith('-') or not mod.startswith('+'):
132
105
raise MalformedHunkHeader("Positions don't start with + or -.", line)
134
107
(orig_pos, orig_range) = parse_range(orig[1:])
135
108
(mod_pos, mod_range) = parse_range(mod[1:])
136
except (ValueError, IndexError) as e:
137
110
raise MalformedHunkHeader(str(e), line)
138
111
if mod_range < 0 or orig_range < 0:
139
112
raise MalformedHunkHeader("Hunk range is negative", line)
140
tail = matches.group(3)
141
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
144
class HunkLine(object):
113
return Hunk(orig_pos, orig_range, mod_pos, mod_range)
146
117
def __init__(self, contents):
147
118
self.contents = contents
149
120
def get_str(self, leadchar):
150
if self.contents == b"\n" and leadchar == b" " and False:
152
if not self.contents.endswith(b'\n'):
153
terminator = b'\n' + NO_NL
121
if self.contents == "\n" and leadchar == " " and False:
123
if not self.contents.endswith('\n'):
124
terminator = '\n' + NO_NL
156
127
return leadchar + self.contents + terminator
159
raise NotImplementedError
162
130
class ContextLine(HunkLine):
164
131
def __init__(self, contents):
165
132
HunkLine.__init__(self, contents)
168
return self.get_str(b" ")
135
return self.get_str(" ")
171
138
class InsertLine(HunkLine):
172
139
def __init__(self, contents):
173
140
HunkLine.__init__(self, contents)
176
return self.get_str(b"+")
143
return self.get_str("+")
179
146
class RemoveLine(HunkLine):
180
147
def __init__(self, contents):
181
148
HunkLine.__init__(self, contents)
184
return self.get_str(b"-")
187
NO_NL = b'\\ No newline at end of file\n'
188
__pychecker__ = "no-returnvalues"
151
return self.get_str("-")
153
NO_NL = '\\ No newline at end of file\n'
154
__pychecker__="no-returnvalues"
191
156
def parse_line(line):
192
if line.startswith(b"\n"):
157
if line.startswith("\n"):
193
158
return ContextLine(line)
194
elif line.startswith(b" "):
159
elif line.startswith(" "):
195
160
return ContextLine(line[1:])
196
elif line.startswith(b"+"):
161
elif line.startswith("+"):
197
162
return InsertLine(line[1:])
198
elif line.startswith(b"-"):
163
elif line.startswith("-"):
199
164
return RemoveLine(line[1:])
201
168
raise MalformedLine("Unknown line type", line)
209
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
173
def __init__(self, orig_pos, orig_range, mod_pos, mod_range):
210
174
self.orig_pos = orig_pos
211
175
self.orig_range = orig_range
212
176
self.mod_pos = mod_pos
213
177
self.mod_range = mod_range
217
180
def get_header(self):
218
if self.tail is None:
221
tail_str = b' ' + self.tail
222
return b"@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
224
self.range_str(self.mod_pos,
181
return "@@ -%s +%s @@\n" % (self.range_str(self.orig_pos,
183
self.range_str(self.mod_pos,
228
186
def range_str(self, pos, range):
229
187
"""Return a file range, special-casing for 1-line files.
235
193
:return: a string in the format 1,4 except when range == pos == 1
240
return b"%i,%i" % (pos, range)
198
return "%i,%i" % (pos, range)
243
201
lines = [self.get_header()]
244
202
for line in self.lines:
245
lines.append(line.as_bytes())
246
return b"".join(lines)
203
lines.append(str(line))
204
return "".join(lines)
250
206
def shift_to_mod(self, pos):
251
if pos < self.orig_pos - 1:
207
if pos < self.orig_pos-1:
253
elif pos > self.orig_pos + self.orig_range:
209
elif pos > self.orig_pos+self.orig_range:
254
210
return self.mod_range - self.orig_range
256
212
return self.shift_to_mod_lines(pos)
258
214
def shift_to_mod_lines(self, pos):
259
position = self.orig_pos - 1
215
assert (pos >= self.orig_pos-1 and pos <= self.orig_pos+self.orig_range)
216
position = self.orig_pos-1
261
218
for line in self.lines:
262
219
if isinstance(line, InsertLine):
276
def iter_hunks(iter_lines, allow_dirty=False):
278
:arg iter_lines: iterable of lines to parse for hunks
279
:kwarg allow_dirty: If True, when we encounter something that is not
280
a hunk header when we're looking for one, assume the rest of the lines
281
are not part of the patch (comments or other junk). Default False
233
def iter_hunks(iter_lines):
284
235
for line in iter_lines:
286
237
if hunk is not None:
290
241
if hunk is not None:
293
hunk = hunk_from_header(line)
294
except MalformedHunkHeader:
296
# If the line isn't a hunk header, then we've reached the end
297
# of this patch and there's "junk" at the end. Ignore the
298
# rest of this patch.
243
hunk = hunk_from_header(line)
303
246
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
304
hunk_line = parse_line(next(iter_lines))
247
hunk_line = parse_line(iter_lines.next())
305
248
hunk.lines.append(hunk_line)
306
249
if isinstance(hunk_line, (RemoveLine, ContextLine)):
314
class BinaryPatch(object):
316
258
def __init__(self, oldname, newname):
317
259
self.oldname = oldname
318
260
self.newname = newname
321
return b'Binary files %s and %s differ\n' % (self.oldname, self.newname)
324
class Patch(BinaryPatch):
326
def __init__(self, oldname, newname):
327
BinaryPatch.__init__(self, oldname, newname)
331
ret = self.get_header()
332
ret += b"".join([h.as_bytes() for h in self.hunks])
264
ret = self.get_header()
265
ret += "".join([str(h) for h in self.hunks])
335
268
def get_header(self):
336
return b"--- %s\n+++ %s\n" % (self.oldname, self.newname)
269
return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
338
def stats_values(self):
339
"""Calculate the number of inserts and removes."""
272
"""Return a string of patch statistics"""
342
275
for hunk in self.hunks:
343
276
for line in hunk.lines:
344
277
if isinstance(line, InsertLine):
346
279
elif isinstance(line, RemoveLine):
348
return (inserts, removes, len(self.hunks))
351
"""Return a string of patch statistics"""
352
281
return "%i inserts, %i removes in %i hunks" % \
282
(inserts, removes, len(self.hunks))
355
284
def pos_in_mod(self, position):
356
285
newpos = position
380
def parse_patch(iter_lines, allow_dirty=False):
382
:arg iter_lines: iterable of lines to parse
383
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
386
iter_lines = iter_lines_handle_nl(iter_lines)
388
(orig_name, mod_name) = get_patch_names(iter_lines)
389
except BinaryFiles as e:
390
return BinaryPatch(e.orig_name, e.mod_name)
392
patch = Patch(orig_name, mod_name)
393
for hunk in iter_hunks(iter_lines, allow_dirty):
394
patch.hunks.append(hunk)
398
def iter_file_patch(iter_lines, allow_dirty=False, keep_dirty=False):
400
:arg iter_lines: iterable of lines to parse for patches
401
:kwarg allow_dirty: If True, allow comments and other non-patch text
402
before the first patch. Note that the algorithm here can only find
403
such text before any patches have been found. Comments after the
404
first patch are stripped away in iter_hunks() if it is also passed
405
allow_dirty=True. Default False.
407
# FIXME: Docstring is not quite true. We allow certain comments no
408
# matter what, If they startwith '===', '***', or '#' Someone should
409
# reexamine this logic and decide if we should include those in
410
# allow_dirty or restrict those to only being before the patch is found
411
# (as allow_dirty does).
412
regex = re.compile(binary_files_re)
309
def parse_patch(iter_lines):
310
(orig_name, mod_name) = get_patch_names(iter_lines)
311
patch = Patch(orig_name, mod_name)
312
for hunk in iter_hunks(iter_lines):
313
patch.hunks.append(hunk)
317
def iter_file_patch(iter_lines):
418
320
for line in iter_lines:
419
if line.startswith(b'=== '):
420
if len(saved_lines) > 0:
421
if keep_dirty and len(dirty_head) > 0:
422
yield {'saved_lines': saved_lines,
423
'dirty_head': dirty_head}
428
dirty_head.append(line)
430
if line.startswith(b'*** '):
432
if line.startswith(b'#'):
321
if line.startswith('=== ') or line.startswith('*** '):
323
if line.startswith('#'):
434
325
elif orig_range > 0:
435
if line.startswith(b'-') or line.startswith(b' '):
326
if line.startswith('-') or line.startswith(' '):
437
elif line.startswith(b'--- ') or regex.match(line):
438
if allow_dirty and beginning:
439
# Patches can have "junk" at the beginning
440
# Stripping junk from the end of patches is handled when we
443
elif len(saved_lines) > 0:
444
if keep_dirty and len(dirty_head) > 0:
445
yield {'saved_lines': saved_lines,
446
'dirty_head': dirty_head}
328
elif line.startswith('--- '):
329
if len(saved_lines) > 0:
451
elif line.startswith(b'@@'):
332
elif line.startswith('@@'):
452
333
hunk = hunk_from_header(line)
453
334
orig_range = hunk.orig_range
454
335
saved_lines.append(line)
455
336
if len(saved_lines) > 0:
456
if keep_dirty and len(dirty_head) > 0:
457
yield {'saved_lines': saved_lines,
458
'dirty_head': dirty_head}
463
340
def iter_lines_handle_nl(iter_lines):
484
def parse_patches(iter_lines, allow_dirty=False, keep_dirty=False):
486
:arg iter_lines: iterable of lines to parse for patches
487
:kwarg allow_dirty: If True, allow text that's not part of the patch at
488
selected places. This includes comments before and after a patch
489
for instance. Default False.
490
:kwarg keep_dirty: If True, returns a dict of patches with dirty headers.
493
for patch_lines in iter_file_patch(iter_lines, allow_dirty, keep_dirty):
494
if 'dirty_head' in patch_lines:
495
yield ({'patch': parse_patch(patch_lines['saved_lines'], allow_dirty),
496
'dirty_head': patch_lines['dirty_head']})
498
yield parse_patch(patch_lines, allow_dirty)
360
def parse_patches(iter_lines):
361
iter_lines = iter_lines_handle_nl(iter_lines)
362
return [parse_patch(f.__iter__()) for f in iter_file_patch(iter_lines)]
501
365
def difference_index(atext, btext):
521
385
"""Iterate through a series of lines with a patch applied.
522
386
This handles a single file, and does exact, not fuzzy patching.
524
patch_lines = iter_lines_handle_nl(iter(patch_lines))
388
if orig_lines is not None:
389
orig_lines = orig_lines.__iter__()
391
patch_lines = iter_lines_handle_nl(patch_lines.__iter__())
525
392
get_patch_names(patch_lines)
526
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
529
def iter_patched_from_hunks(orig_lines, hunks):
530
"""Iterate through a series of lines with a patch applied.
531
This handles a single file, and does exact, not fuzzy patching.
533
:param orig_lines: The unpatched lines.
534
:param hunks: An iterable of Hunk instances.
538
if orig_lines is not None:
539
orig_lines = iter(orig_lines)
394
for hunk in iter_hunks(patch_lines):
541
395
while line_no < hunk.orig_pos:
542
orig_line = next(orig_lines)
396
orig_line = orig_lines.next()
545
399
for hunk_line in hunk.lines:
546
seen_patch.append(hunk_line.contents)
400
seen_patch.append(str(hunk_line))
547
401
if isinstance(hunk_line, InsertLine):
548
402
yield hunk_line.contents
549
403
elif isinstance(hunk_line, (ContextLine, RemoveLine)):
550
orig_line = next(orig_lines)
404
orig_line = orig_lines.next()
551
405
if orig_line != hunk_line.contents:
552
raise PatchConflict(line_no, orig_line,
553
b''.join(seen_patch))
406
raise PatchConflict(line_no, orig_line, "".join(seen_patch))
554
407
if isinstance(hunk_line, ContextLine):
557
if not isinstance(hunk_line, RemoveLine):
558
raise AssertionError(hunk_line)
410
assert isinstance(hunk_line, RemoveLine)
560
412
if orig_lines is not None:
561
413
for line in orig_lines:
565
def apply_patches(tt, patches, prefix=1):
566
"""Apply patches to a TreeTransform.
568
:param tt: TreeTransform instance
569
:param patches: List of patches
570
:param prefix: Number leading path segments to strip
573
return '/'.join(p.split('/')[1:])
575
from breezy.bzr.generate_ids import gen_file_id
576
# TODO(jelmer): Extract and set mode
577
for patch in patches:
578
if patch.oldname == b'/dev/null':
582
oldname = strip_prefix(patch.oldname.decode())
583
trans_id = tt.trans_id_tree_path(oldname)
584
orig_contents = tt._tree.get_file_text(oldname)
585
tt.delete_contents(trans_id)
587
if patch.newname != b'/dev/null':
588
newname = strip_prefix(patch.newname.decode())
589
new_contents = iter_patched_from_hunks(
590
orig_contents.splitlines(True), patch.hunks)
592
parts = os.path.split(newname)
594
for part in parts[1:-1]:
595
trans_id = tt.new_directory(part, trans_id)
597
parts[-1], trans_id, new_contents,
598
file_id=gen_file_id(newname))
600
tt.create_file(new_contents, trans_id)
603
class AppliedPatches(object):
604
"""Context that provides access to a tree with patches applied.
607
def __init__(self, tree, patches, prefix=1):
609
self.patches = patches
613
self._tt = self.tree.preview_transform()
614
apply_patches(self._tt, self.patches, prefix=self.prefix)
615
return self._tt.get_preview_tree()
617
def __exit__(self, exc_type, exc_value, exc_tb):