bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
198
by mbp at sourcefrog
 - experimental compressed Revfile support  | 
1  | 
# (C) 2005 Matt Mackall
 | 
| 
205
by mbp at sourcefrog
 Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines  | 
2  | 
# (C) 2005 Canonical Ltd
 | 
3  | 
||
4  | 
# based on code by Matt Mackall, hacked by Martin Pool
 | 
|
5  | 
||
6  | 
# mm's code works line-by-line; this just works on byte strings.
 | 
|
7  | 
# Possibly slower; possibly gives better results for code not
 | 
|
8  | 
# regularly separated by newlines and anyhow a bit simpler.
 | 
|
9  | 
||
| 
198
by mbp at sourcefrog
 - experimental compressed Revfile support  | 
10  | 
|
11  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
12  | 
# it under the terms of the GNU General Public License as published by
 | 
|
13  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
14  | 
# (at your option) any later version.
 | 
|
15  | 
||
16  | 
# This program is distributed in the hope that it will be useful,
 | 
|
17  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
18  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
19  | 
# GNU General Public License for more details.
 | 
|
20  | 
||
21  | 
# You should have received a copy of the GNU General Public License
 | 
|
22  | 
# along with this program; if not, write to the Free Software
 | 
|
23  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
24  | 
||
| 
205
by mbp at sourcefrog
 Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines  | 
25  | 
|
26  | 
# TODO: maybe work on files not strings?
 | 
|
27  | 
||
| 
974.1.26
by aaron.bentley at utoronto
 merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472  | 
28  | 
# FIXME: doesn't work properly on files without trailing newlines
 | 
| 
205
by mbp at sourcefrog
 Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines  | 
29  | 
|
| 
1185.1.41
by Robert Collins
 massive patch from Alexander Belchenko - many PEP8 fixes, removes unused function uuid  | 
30  | 
import difflib  | 
31  | 
import struct  | 
|
32  | 
import sys  | 
|
| 
974.1.26
by aaron.bentley at utoronto
 merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472  | 
33  | 
import unittest  | 
| 
205
by mbp at sourcefrog
 Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines  | 
34  | 
from cStringIO import StringIO  | 
| 
198
by mbp at sourcefrog
 - experimental compressed Revfile support  | 
35  | 
|
| 
1185.1.41
by Robert Collins
 massive patch from Alexander Belchenko - many PEP8 fixes, removes unused function uuid  | 
36  | 
|
| 
974.1.26
by aaron.bentley at utoronto
 merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472  | 
37  | 
def linesplit(a):  | 
38  | 
"""Split into two lists: content and line positions.  | 
|
39  | 
||
40  | 
    This returns (al, ap).
 | 
|
41  | 
||
42  | 
    al[i] is the string content of line i of the file, including its
 | 
|
43  | 
    newline (if any).
 | 
|
44  | 
||
45  | 
    ap[i] is the byte position in the file where that line starts.
 | 
|
46  | 
||
47  | 
    ap[-1] is the byte position of the end of the file (i.e. the
 | 
|
48  | 
    length of the file.)
 | 
|
49  | 
||
50  | 
    This transformation allows us to do a line-based diff and then map
 | 
|
51  | 
    back to byte positions.
 | 
|
52  | 
    """
 | 
|
53  | 
||
54  | 
al, ap = [], []  | 
|
55  | 
last = 0  | 
|
56  | 
||
57  | 
n = a.find("\n") + 1  | 
|
58  | 
while n > 0:  | 
|
59  | 
ap.append(last)  | 
|
60  | 
al.append(a[last:n])  | 
|
61  | 
last = n  | 
|
62  | 
n = a.find("\n", n) + 1  | 
|
63  | 
||
64  | 
if last < len(a):  | 
|
65  | 
al.append(a[last:])  | 
|
66  | 
ap.append(last)  | 
|
67  | 
||
68  | 
    # position at the end
 | 
|
69  | 
ap.append(len(a))  | 
|
70  | 
||
71  | 
return (al, ap)  | 
|
72  | 
||
73  | 
||
| 
198
by mbp at sourcefrog
 - experimental compressed Revfile support  | 
74  | 
def diff(a, b):  | 
| 
974.1.26
by aaron.bentley at utoronto
 merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472  | 
75  | 
    # TODO: Use different splits, perhaps rsync-like, for binary files?
 | 
76  | 
||
77  | 
(al, ap) = linesplit(a)  | 
|
78  | 
(bl, bp) = linesplit(b)  | 
|
79  | 
||
80  | 
d = difflib.SequenceMatcher(None, al, bl)  | 
|
| 
225
by mbp at sourcefrog
 debug output  | 
81  | 
|
82  | 
    ## sys.stderr.write('  ~ real_quick_ratio: %.4f\n' % d.real_quick_ratio())
 | 
|
83  | 
||
| 
198
by mbp at sourcefrog
 - experimental compressed Revfile support  | 
84  | 
for o, m, n, s, t in d.get_opcodes():  | 
85  | 
if o == 'equal': continue  | 
|
| 
205
by mbp at sourcefrog
 Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines  | 
86  | 
        # a[m:n] should be replaced by b[s:t]
 | 
87  | 
if s == t:  | 
|
| 
974.1.26
by aaron.bentley at utoronto
 merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472  | 
88  | 
yield ap[m], ap[n], ''  | 
| 
205
by mbp at sourcefrog
 Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines  | 
89  | 
else:  | 
| 
974.1.26
by aaron.bentley at utoronto
 merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472  | 
90  | 
yield ap[m], ap[n], ''.join(bl[s:t])  | 
| 
198
by mbp at sourcefrog
 - experimental compressed Revfile support  | 
91  | 
|
92  | 
||
93  | 
def tobinary(ops):  | 
|
| 
205
by mbp at sourcefrog
 Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines  | 
94  | 
b = StringIO()  | 
| 
198
by mbp at sourcefrog
 - experimental compressed Revfile support  | 
95  | 
for f in ops:  | 
| 
205
by mbp at sourcefrog
 Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines  | 
96  | 
b.write(struct.pack(">III", f[0], f[1], len(f[2])))  | 
97  | 
b.write(f[2])  | 
|
98  | 
return b.getvalue()  | 
|
99  | 
||
| 
198
by mbp at sourcefrog
 - experimental compressed Revfile support  | 
100  | 
|
101  | 
def bdiff(a, b):  | 
|
102  | 
return tobinary(diff(a, b))  | 
|
103  | 
||
| 
205
by mbp at sourcefrog
 Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines  | 
104  | 
|
| 
198
by mbp at sourcefrog
 - experimental compressed Revfile support  | 
105  | 
def patch(t, ops):  | 
106  | 
last = 0  | 
|
| 
205
by mbp at sourcefrog
 Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines  | 
107  | 
b = StringIO()  | 
108  | 
||
109  | 
for m, n, r in ops:  | 
|
110  | 
b.write(t[last:m])  | 
|
111  | 
if r:  | 
|
112  | 
b.write(r)  | 
|
113  | 
last = n  | 
|
114  | 
||
115  | 
b.write(t[last:])  | 
|
116  | 
return b.getvalue()  | 
|
117  | 
||
| 
198
by mbp at sourcefrog
 - experimental compressed Revfile support  | 
118  | 
|
119  | 
def frombinary(b):  | 
|
| 
205
by mbp at sourcefrog
 Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines  | 
120  | 
bin = StringIO(b)  | 
121  | 
while True:  | 
|
122  | 
p = bin.read(12)  | 
|
123  | 
if not p:  | 
|
124  | 
            break
 | 
|
125  | 
||
126  | 
m, n, l = struct.unpack(">III", p)  | 
|
127  | 
||
128  | 
if l == 0:  | 
|
129  | 
r = ''  | 
|
130  | 
else:  | 
|
131  | 
r = bin.read(l)  | 
|
132  | 
if len(r) != l:  | 
|
133  | 
raise Exception("truncated patch data")  | 
|
134  | 
||
135  | 
yield m, n, r  | 
|
136  | 
||
| 
198
by mbp at sourcefrog
 - experimental compressed Revfile support  | 
137  | 
|
138  | 
def bpatch(t, b):  | 
|
139  | 
return patch(t, frombinary(b))  | 
|
140  | 
||
141  | 
||
142  | 
||
143  | 
||
| 
974.1.26
by aaron.bentley at utoronto
 merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472  | 
144  | 
class TestDiffPatch(unittest.TestCase):  | 
145  | 
def doDiffPatch(self, old, new):  | 
|
146  | 
diff = bdiff(old, new)  | 
|
147  | 
result = bpatch(old, diff)  | 
|
148  | 
self.assertEquals(new, result)  | 
|
149  | 
||
150  | 
||
151  | 
def testSimpleDiff(self):  | 
|
152  | 
"""Simply add a line at the end"""  | 
|
153  | 
self.doDiffPatch('a\nb\n', 'a\nb\nc\n')  | 
|
154  | 
||
155  | 
||
156  | 
||
157  | 
def testTrailingLine(self):  | 
|
158  | 
"""Test diff that adds an unterminated line.  | 
|
159  | 
||
160  | 
        (Old versions didn't do this properly.)"""
 | 
|
161  | 
self.doDiffPatch('a\nb\nc\n',  | 
|
162  | 
'a\nb\nc\nd')  | 
|
163  | 
||
164  | 
||
165  | 
if __name__ == '__main__':  | 
|
166  | 
unittest.main()  |