/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3363.18.3 by Aaron Bentley
Add tests for iter_patched_from_hunks
1
# Copyright (C) 2004 - 2006, 2008 Aaron Bentley, Canonical Ltd
0.5.93 by Aaron Bentley
Added patches.py
2
# <aaron.bentley@utoronto.ca>
3
#
2052.3.1 by John Arbash Meinel
Add tests to cleanup the copyright of all source files
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0.5.93 by Aaron Bentley
Added patches.py
17
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
18
0.5.93 by Aaron Bentley
Added patches.py
19
class PatchSyntax(Exception):
20
    def __init__(self, msg):
21
        Exception.__init__(self, msg)
22
23
24
class MalformedPatchHeader(PatchSyntax):
25
    def __init__(self, desc, line):
26
        self.desc = desc
27
        self.line = line
28
        msg = "Malformed patch header.  %s\n%r" % (self.desc, self.line)
29
        PatchSyntax.__init__(self, msg)
30
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
31
0.5.93 by Aaron Bentley
Added patches.py
32
class MalformedHunkHeader(PatchSyntax):
33
    def __init__(self, desc, line):
34
        self.desc = desc
35
        self.line = line
36
        msg = "Malformed hunk header.  %s\n%r" % (self.desc, self.line)
37
        PatchSyntax.__init__(self, msg)
38
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
39
0.5.93 by Aaron Bentley
Added patches.py
40
class MalformedLine(PatchSyntax):
41
    def __init__(self, desc, line):
42
        self.desc = desc
43
        self.line = line
44
        msg = "Malformed line.  %s\n%s" % (self.desc, self.line)
45
        PatchSyntax.__init__(self, msg)
46
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
47
1185.82.125 by Aaron Bentley
More cleanups
48
class PatchConflict(Exception):
49
    def __init__(self, line_no, orig_line, patch_line):
50
        orig = orig_line.rstrip('\n')
51
        patch = str(patch_line).rstrip('\n')
52
        msg = 'Text contents mismatch at line %d.  Original has "%s",'\
53
            ' but patch says it should be "%s"' % (line_no, orig, patch)
54
        Exception.__init__(self, msg)
55
56
0.5.93 by Aaron Bentley
Added patches.py
57
def get_patch_names(iter_lines):
58
    try:
59
        line = iter_lines.next()
60
        if not line.startswith("--- "):
61
            raise MalformedPatchHeader("No orig name", line)
62
        else:
63
            orig_name = line[4:].rstrip("\n")
64
    except StopIteration:
65
        raise MalformedPatchHeader("No orig line", "")
66
    try:
67
        line = iter_lines.next()
68
        if not line.startswith("+++ "):
69
            raise PatchSyntax("No mod name")
70
        else:
71
            mod_name = line[4:].rstrip("\n")
72
    except StopIteration:
73
        raise MalformedPatchHeader("No mod line", "")
74
    return (orig_name, mod_name)
75
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
76
0.5.93 by Aaron Bentley
Added patches.py
77
def parse_range(textrange):
78
    """Parse a patch range, handling the "1" special-case
79
80
    :param textrange: The text to parse
81
    :type textrange: str
82
    :return: the position and range, as a tuple
83
    :rtype: (int, int)
84
    """
85
    tmp = textrange.split(',')
86
    if len(tmp) == 1:
87
        pos = tmp[0]
88
        range = "1"
89
    else:
90
        (pos, range) = tmp
91
    pos = int(pos)
92
    range = int(range)
93
    return (pos, range)
94
95
 
96
def hunk_from_header(line):
3224.5.1 by Andrew Bennetts
Lots of assorted hackery to reduce the number of imports for common operations. Improves 'rocks', 'st' and 'help' times by ~50ms on my laptop.
97
    import re
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
98
    matches = re.match(r'\@\@ ([^@]*) \@\@( (.*))?\n', line)
99
    if matches is None:
100
        raise MalformedHunkHeader("Does not match format.", line)
0.5.93 by Aaron Bentley
Added patches.py
101
    try:
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
102
        (orig, mod) = matches.group(1).split(" ")
2358.3.1 by Martin Pool
Update some too-general exception blocks
103
    except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley
Added patches.py
104
        raise MalformedHunkHeader(str(e), line)
105
    if not orig.startswith('-') or not mod.startswith('+'):
106
        raise MalformedHunkHeader("Positions don't start with + or -.", line)
107
    try:
108
        (orig_pos, orig_range) = parse_range(orig[1:])
109
        (mod_pos, mod_range) = parse_range(mod[1:])
2358.3.1 by Martin Pool
Update some too-general exception blocks
110
    except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley
Added patches.py
111
        raise MalformedHunkHeader(str(e), line)
112
    if mod_range < 0 or orig_range < 0:
113
        raise MalformedHunkHeader("Hunk range is negative", line)
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
114
    tail = matches.group(3)
115
    return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
0.5.93 by Aaron Bentley
Added patches.py
116
117
118
class HunkLine:
119
    def __init__(self, contents):
120
        self.contents = contents
121
122
    def get_str(self, leadchar):
123
        if self.contents == "\n" and leadchar == " " and False:
124
            return "\n"
125
        if not self.contents.endswith('\n'):
126
            terminator = '\n' + NO_NL
127
        else:
128
            terminator = ''
129
        return leadchar + self.contents + terminator
130
131
132
class ContextLine(HunkLine):
133
    def __init__(self, contents):
134
        HunkLine.__init__(self, contents)
135
136
    def __str__(self):
137
        return self.get_str(" ")
138
139
140
class InsertLine(HunkLine):
141
    def __init__(self, contents):
142
        HunkLine.__init__(self, contents)
143
144
    def __str__(self):
145
        return self.get_str("+")
146
147
148
class RemoveLine(HunkLine):
149
    def __init__(self, contents):
150
        HunkLine.__init__(self, contents)
151
152
    def __str__(self):
153
        return self.get_str("-")
154
155
NO_NL = '\\ No newline at end of file\n'
156
__pychecker__="no-returnvalues"
157
158
def parse_line(line):
159
    if line.startswith("\n"):
160
        return ContextLine(line)
161
    elif line.startswith(" "):
162
        return ContextLine(line[1:])
163
    elif line.startswith("+"):
164
        return InsertLine(line[1:])
165
    elif line.startswith("-"):
166
        return RemoveLine(line[1:])
167
    elif line == NO_NL:
168
        return NO_NL
169
    else:
170
        raise MalformedLine("Unknown line type", line)
171
__pychecker__=""
172
173
174
class Hunk:
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
175
    def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
0.5.93 by Aaron Bentley
Added patches.py
176
        self.orig_pos = orig_pos
177
        self.orig_range = orig_range
178
        self.mod_pos = mod_pos
179
        self.mod_range = mod_range
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
180
        self.tail = tail
0.5.93 by Aaron Bentley
Added patches.py
181
        self.lines = []
182
183
    def get_header(self):
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
184
        if self.tail is None:
185
            tail_str = ''
186
        else:
187
            tail_str = ' ' + self.tail
188
        return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
189
                                                     self.orig_range),
190
                                      self.range_str(self.mod_pos,
191
                                                     self.mod_range),
192
                                      tail_str)
0.5.93 by Aaron Bentley
Added patches.py
193
194
    def range_str(self, pos, range):
195
        """Return a file range, special-casing for 1-line files.
196
197
        :param pos: The position in the file
198
        :type pos: int
199
        :range: The range in the file
200
        :type range: int
201
        :return: a string in the format 1,4 except when range == pos == 1
202
        """
203
        if range == 1:
204
            return "%i" % pos
205
        else:
206
            return "%i,%i" % (pos, range)
207
208
    def __str__(self):
209
        lines = [self.get_header()]
210
        for line in self.lines:
211
            lines.append(str(line))
212
        return "".join(lines)
213
214
    def shift_to_mod(self, pos):
215
        if pos < self.orig_pos-1:
216
            return 0
217
        elif pos > self.orig_pos+self.orig_range:
218
            return self.mod_range - self.orig_range
219
        else:
220
            return self.shift_to_mod_lines(pos)
221
222
    def shift_to_mod_lines(self, pos):
223
        position = self.orig_pos-1
224
        shift = 0
225
        for line in self.lines:
226
            if isinstance(line, InsertLine):
227
                shift += 1
228
            elif isinstance(line, RemoveLine):
229
                if position == pos:
230
                    return None
231
                shift -= 1
232
                position += 1
233
            elif isinstance(line, ContextLine):
234
                position += 1
235
            if position > pos:
236
                break
237
        return shift
238
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
239
0.5.93 by Aaron Bentley
Added patches.py
240
def iter_hunks(iter_lines):
241
    hunk = None
242
    for line in iter_lines:
0.5.96 by Aaron Bentley
Cleaned up handling of files with no terminating \n
243
        if line == "\n":
0.5.93 by Aaron Bentley
Added patches.py
244
            if hunk is not None:
245
                yield hunk
246
                hunk = None
247
            continue
248
        if hunk is not None:
249
            yield hunk
250
        hunk = hunk_from_header(line)
251
        orig_size = 0
252
        mod_size = 0
253
        while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
254
            hunk_line = parse_line(iter_lines.next())
0.5.96 by Aaron Bentley
Cleaned up handling of files with no terminating \n
255
            hunk.lines.append(hunk_line)
0.5.93 by Aaron Bentley
Added patches.py
256
            if isinstance(hunk_line, (RemoveLine, ContextLine)):
257
                orig_size += 1
258
            if isinstance(hunk_line, (InsertLine, ContextLine)):
259
                mod_size += 1
260
    if hunk is not None:
261
        yield hunk
262
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
263
0.5.93 by Aaron Bentley
Added patches.py
264
class Patch:
265
    def __init__(self, oldname, newname):
266
        self.oldname = oldname
267
        self.newname = newname
268
        self.hunks = []
269
270
    def __str__(self):
0.5.95 by Aaron Bentley
Updated patch to match bzrtools
271
        ret = self.get_header() 
0.5.93 by Aaron Bentley
Added patches.py
272
        ret += "".join([str(h) for h in self.hunks])
273
        return ret
274
0.5.95 by Aaron Bentley
Updated patch to match bzrtools
275
    def get_header(self):
276
        return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
277
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
278
    def stats_values(self):
279
        """Calculate the number of inserts and removes."""
0.5.93 by Aaron Bentley
Added patches.py
280
        removes = 0
281
        inserts = 0
282
        for hunk in self.hunks:
283
            for line in hunk.lines:
284
                if isinstance(line, InsertLine):
285
                     inserts+=1;
286
                elif isinstance(line, RemoveLine):
287
                     removes+=1;
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
288
        return (inserts, removes, len(self.hunks))
289
290
    def stats_str(self):
291
        """Return a string of patch statistics"""
0.5.93 by Aaron Bentley
Added patches.py
292
        return "%i inserts, %i removes in %i hunks" % \
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
293
            self.stats_values()
0.5.93 by Aaron Bentley
Added patches.py
294
295
    def pos_in_mod(self, position):
296
        newpos = position
297
        for hunk in self.hunks:
298
            shift = hunk.shift_to_mod(position)
299
            if shift is None:
300
                return None
301
            newpos += shift
302
        return newpos
303
            
304
    def iter_inserted(self):
305
        """Iteraties through inserted lines
306
        
307
        :return: Pair of line number, line
308
        :rtype: iterator of (int, InsertLine)
309
        """
310
        for hunk in self.hunks:
311
            pos = hunk.mod_pos - 1;
312
            for line in hunk.lines:
313
                if isinstance(line, InsertLine):
314
                    yield (pos, line)
315
                    pos += 1
316
                if isinstance(line, ContextLine):
317
                    pos += 1
318
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
319
0.5.93 by Aaron Bentley
Added patches.py
320
def parse_patch(iter_lines):
321
    (orig_name, mod_name) = get_patch_names(iter_lines)
322
    patch = Patch(orig_name, mod_name)
323
    for hunk in iter_hunks(iter_lines):
324
        patch.hunks.append(hunk)
325
    return patch
326
327
328
def iter_file_patch(iter_lines):
329
    saved_lines = []
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
330
    orig_range = 0
0.5.93 by Aaron Bentley
Added patches.py
331
    for line in iter_lines:
0.5.106 by John Arbash Meinel
Allowing *** to be a patch header.
332
        if line.startswith('=== ') or line.startswith('*** '):
0.5.93 by Aaron Bentley
Added patches.py
333
            continue
1770.1.1 by Aaron Bentley
Ignore lines that start with '#' in patch parser
334
        if line.startswith('#'):
335
            continue
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
336
        elif orig_range > 0:
337
            if line.startswith('-') or line.startswith(' '):
338
                orig_range -= 1
0.5.93 by Aaron Bentley
Added patches.py
339
        elif line.startswith('--- '):
340
            if len(saved_lines) > 0:
341
                yield saved_lines
342
            saved_lines = []
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
343
        elif line.startswith('@@'):
344
            hunk = hunk_from_header(line)
345
            orig_range = hunk.orig_range
0.5.93 by Aaron Bentley
Added patches.py
346
        saved_lines.append(line)
347
    if len(saved_lines) > 0:
348
        yield saved_lines
349
350
0.5.96 by Aaron Bentley
Cleaned up handling of files with no terminating \n
351
def iter_lines_handle_nl(iter_lines):
352
    """
353
    Iterates through lines, ensuring that lines that originally had no
354
    terminating \n are produced without one.  This transformation may be
355
    applied at any point up until hunk line parsing, and is safe to apply
356
    repeatedly.
357
    """
358
    last_line = None
359
    for line in iter_lines:
360
        if line == NO_NL:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
361
            if not last_line.endswith('\n'):
362
                raise AssertionError()
0.5.96 by Aaron Bentley
Cleaned up handling of files with no terminating \n
363
            last_line = last_line[:-1]
364
            line = None
365
        if last_line is not None:
366
            yield last_line
367
        last_line = line
368
    if last_line is not None:
369
        yield last_line
370
371
0.5.93 by Aaron Bentley
Added patches.py
372
def parse_patches(iter_lines):
0.5.96 by Aaron Bentley
Cleaned up handling of files with no terminating \n
373
    iter_lines = iter_lines_handle_nl(iter_lines)
0.5.93 by Aaron Bentley
Added patches.py
374
    return [parse_patch(f.__iter__()) for f in iter_file_patch(iter_lines)]
375
376
377
def difference_index(atext, btext):
1759.2.1 by Jelmer Vernooij
Fix some types (found using aspell).
378
    """Find the indext of the first character that differs between two texts
0.5.93 by Aaron Bentley
Added patches.py
379
380
    :param atext: The first text
381
    :type atext: str
382
    :param btext: The second text
383
    :type str: str
384
    :return: The index, or None if there are no differences within the range
385
    :rtype: int or NoneType
386
    """
387
    length = len(atext)
388
    if len(btext) < length:
389
        length = len(btext)
390
    for i in range(length):
391
        if atext[i] != btext[i]:
392
            return i;
393
    return None
394
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
395
0.5.93 by Aaron Bentley
Added patches.py
396
def iter_patched(orig_lines, patch_lines):
397
    """Iterate through a series of lines with a patch applied.
398
    This handles a single file, and does exact, not fuzzy patching.
399
    """
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
400
    patch_lines = iter_lines_handle_nl(iter(patch_lines))
0.5.93 by Aaron Bentley
Added patches.py
401
    get_patch_names(patch_lines)
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
402
    return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
403
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
404
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
405
def iter_patched_from_hunks(orig_lines, hunks):
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
406
    """Iterate through a series of lines with a patch applied.
407
    This handles a single file, and does exact, not fuzzy patching.
408
409
    :param orig_lines: The unpatched lines.
410
    :param hunks: An iterable of Hunk instances.
411
    """
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
412
    seen_patch = []
0.5.93 by Aaron Bentley
Added patches.py
413
    line_no = 1
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
414
    if orig_lines is not None:
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
415
        orig_lines = iter(orig_lines)
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
416
    for hunk in hunks:
0.5.93 by Aaron Bentley
Added patches.py
417
        while line_no < hunk.orig_pos:
418
            orig_line = orig_lines.next()
419
            yield orig_line
420
            line_no += 1
421
        for hunk_line in hunk.lines:
422
            seen_patch.append(str(hunk_line))
423
            if isinstance(hunk_line, InsertLine):
424
                yield hunk_line.contents
425
            elif isinstance(hunk_line, (ContextLine, RemoveLine)):
426
                orig_line = orig_lines.next()
427
                if orig_line != hunk_line.contents:
428
                    raise PatchConflict(line_no, orig_line, "".join(seen_patch))
429
                if isinstance(hunk_line, ContextLine):
430
                    yield orig_line
431
                else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
432
                    if not isinstance(hunk_line, RemoveLine):
433
                        raise AssertionError(hunk_line)
0.5.93 by Aaron Bentley
Added patches.py
434
                line_no += 1
0.5.105 by John Arbash Meinel
Adding more test patches to the test suite.
435
    if orig_lines is not None:
436
        for line in orig_lines:
437
            yield line