bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
1 |
from difflib import SequenceMatcher |
2 |
||
|
0.9.3
by Aaron Bentley
Get three-parent comparisions under test |
3 |
|
|
0.9.1
by Aaron Bentley
Get trivial case passing |
4 |
class MultiParent(object): |
5 |
||
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
6 |
def __init__(self, hunks=None): |
7 |
if hunks is not None: |
|
8 |
self.hunks = hunks |
|
9 |
else: |
|
10 |
self.hunks = [] |
|
11 |
||
12 |
def __repr__(self): |
|
13 |
return "MultiParent(%r)" % self.hunks |
|
14 |
||
15 |
def __eq__(self, other): |
|
16 |
if self.__class__ is not other.__class__: |
|
17 |
return False |
|
18 |
return (self.hunks == other.hunks) |
|
|
0.9.1
by Aaron Bentley
Get trivial case passing |
19 |
|
20 |
@staticmethod
|
|
21 |
def from_lines(text, parents=()): |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
22 |
"""Produce a MultiParent from a list of lines and parents""" |
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
23 |
def compare(parent): |
24 |
return SequenceMatcher(None, parent, text).get_matching_blocks() |
|
25 |
parent_comparisons = [compare(p) for p in parents] |
|
26 |
cur_line = 0 |
|
27 |
new_text = NewText([]) |
|
28 |
parent_text = [] |
|
29 |
block_iter = [iter(i) for i in parent_comparisons] |
|
30 |
diff = MultiParent([]) |
|
31 |
def next_block(p): |
|
32 |
try: |
|
33 |
return block_iter[p].next() |
|
34 |
except StopIteration: |
|
35 |
return None |
|
36 |
cur_block = [next_block(p) for p, i in enumerate(block_iter)] |
|
37 |
while cur_line < len(text): |
|
38 |
best_match = None |
|
39 |
for p, block in enumerate(cur_block): |
|
40 |
if block is None: |
|
41 |
continue
|
|
42 |
i, j, n = block |
|
43 |
while j + n < cur_line: |
|
44 |
block = cur_block[p] = next_block(p) |
|
45 |
if block is None: |
|
46 |
break
|
|
47 |
i, j, n = block |
|
48 |
if block is None: |
|
49 |
continue
|
|
50 |
if j > cur_line: |
|
51 |
continue
|
|
52 |
offset = cur_line - j |
|
53 |
i += offset |
|
54 |
j = cur_line |
|
55 |
n -= offset |
|
56 |
if n == 0: |
|
57 |
continue
|
|
58 |
if best_match is None or n > best_match.num_lines: |
|
59 |
best_match = ParentText(p, i, j, n) |
|
60 |
if best_match is None: |
|
61 |
new_text.lines.append(text[cur_line]) |
|
62 |
cur_line += 1 |
|
63 |
else: |
|
64 |
if len(new_text.lines) > 0: |
|
65 |
diff.hunks.append(new_text) |
|
66 |
new_text = NewText([]) |
|
67 |
diff.hunks.append(best_match) |
|
68 |
cur_line += best_match.num_lines |
|
69 |
if len(new_text.lines) > 0: |
|
70 |
diff.hunks.append(new_text) |
|
|
0.9.1
by Aaron Bentley
Get trivial case passing |
71 |
return diff |
72 |
||
73 |
@classmethod
|
|
74 |
def from_texts(cls, text, parents=()): |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
75 |
"""Produce a MultiParent from a text and list of parent text""" |
|
0.9.1
by Aaron Bentley
Get trivial case passing |
76 |
return cls.from_lines(text.splitlines(True), |
77 |
[p.splitlines(True) for p in parents]) |
|
78 |
||
|
0.9.4
by Aaron Bentley
Start supporting serialization |
79 |
def to_patch(self): |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
80 |
"""Yield text lines for a patch""" |
|
0.9.4
by Aaron Bentley
Start supporting serialization |
81 |
for hunk in self.hunks: |
82 |
for line in hunk.to_patch(): |
|
83 |
yield line |
|
84 |
||
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
85 |
def range_iterator(self): |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
86 |
"""Iterate through the hunks, with range indicated |
87 |
||
88 |
kind is "new" or "parent".
|
|
89 |
for "new", data is a list of lines.
|
|
90 |
for "parent", data is (parent, parent_start, parent_end)
|
|
91 |
:return: a generator of (start, end, kind, data)
|
|
92 |
"""
|
|
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
93 |
start = 0 |
94 |
for hunk in self.hunks: |
|
95 |
if isinstance(hunk, NewText): |
|
96 |
kind = 'new' |
|
97 |
end = start + len(hunk.lines) |
|
98 |
data = hunk.lines |
|
99 |
else: |
|
100 |
kind = 'parent' |
|
101 |
start = hunk.child_pos |
|
102 |
end = start + hunk.num_lines |
|
103 |
data = (hunk.parent, hunk.parent_pos, hunk.parent_pos + |
|
104 |
hunk.num_lines) |
|
105 |
yield start, end, kind, data |
|
106 |
start = end |
|
107 |
||
|
0.9.11
by Aaron Bentley
Implement reconstruct_version, handle all hunks through that |
108 |
def num_lines(self): |
109 |
extra_n = 0 |
|
110 |
for hunk in reversed(self.hunks): |
|
111 |
if isinstance(hunk, ParentText): |
|
112 |
return hunk.child_pos + hunk.num_lines + extra_n |
|
113 |
extra_n += len(hunk.lines) |
|
114 |
return extra_n |
|
115 |
||
|
0.9.1
by Aaron Bentley
Get trivial case passing |
116 |
|
117 |
class NewText(object): |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
118 |
"""The contents of text that is introduced by this text""" |
|
0.9.1
by Aaron Bentley
Get trivial case passing |
119 |
|
120 |
def __init__(self, lines): |
|
121 |
self.lines = lines |
|
122 |
||
123 |
def __eq__(self, other): |
|
124 |
if self.__class__ is not other.__class__: |
|
125 |
return False |
|
126 |
return (other.lines == self.lines) |
|
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
127 |
|
128 |
def __repr__(self): |
|
129 |
return 'NewText(%r)' % self.lines |
|
130 |
||
|
0.9.4
by Aaron Bentley
Start supporting serialization |
131 |
def to_patch(self): |
132 |
yield 'i %d\n' % len(self.lines) |
|
133 |
for line in self.lines: |
|
134 |
yield line |
|
135 |
yield '\n' |
|
136 |
||
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
137 |
|
138 |
class ParentText(object): |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
139 |
"""A reference to text present in a parent text""" |
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
140 |
|
141 |
def __init__(self, parent, parent_pos, child_pos, num_lines): |
|
142 |
self.parent = parent |
|
143 |
self.parent_pos = parent_pos |
|
144 |
self.child_pos = child_pos |
|
145 |
self.num_lines = num_lines |
|
146 |
||
147 |
def __repr__(self): |
|
148 |
return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\ |
|
149 |
' %(num_lines)r)' % self.__dict__ |
|
150 |
||
151 |
def __eq__(self, other): |
|
152 |
if self.__class__ != other.__class__: |
|
153 |
return False |
|
154 |
return (self.__dict__ == other.__dict__) |
|
|
0.9.4
by Aaron Bentley
Start supporting serialization |
155 |
|
156 |
def to_patch(self): |
|
157 |
yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\ |
|
158 |
% self.__dict__ |
|
|
0.9.8
by Aaron Bentley
get add_version working |
159 |
|
160 |
||
161 |
class MultiVersionedFile(object): |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
162 |
"""VersionedFile skeleton for MultiParent""" |
|
0.9.8
by Aaron Bentley
get add_version working |
163 |
|
|
0.9.12
by Aaron Bentley
Make benchmarks for mp |
164 |
def __init__(self, snapshot_interval=25): |
|
0.9.8
by Aaron Bentley
get add_version working |
165 |
self._diffs = {} |
166 |
self._lines = {} |
|
167 |
self._parents = {} |
|
|
0.9.12
by Aaron Bentley
Make benchmarks for mp |
168 |
self._snapshots = {} |
169 |
self.snapshot_interval = snapshot_interval |
|
170 |
||
171 |
def do_snapshot(self, version_id, parent_ids): |
|
172 |
if len(parent_ids) == 0: |
|
173 |
return False |
|
174 |
counter = 0 |
|
175 |
while counter < self.snapshot_interval: |
|
176 |
if len(parent_ids) == 0: |
|
177 |
return False |
|
178 |
version_id = parent_ids[0] |
|
179 |
if self._snapshots[version_id]: |
|
180 |
return False |
|
181 |
counter += 1 |
|
182 |
parent_ids = self._parents[version_id] |
|
183 |
return True |
|
|
0.9.8
by Aaron Bentley
get add_version working |
184 |
|
185 |
def add_version(self, lines, version_id, parent_ids): |
|
|
0.9.12
by Aaron Bentley
Make benchmarks for mp |
186 |
if self.do_snapshot(version_id, parent_ids): |
187 |
self._snapshots[version_id] = True |
|
188 |
diff = MultiParent([NewText(lines)]) |
|
189 |
else: |
|
190 |
self._snapshots[version_id] = False |
|
191 |
parent_lines = [self._lines[p] for p in parent_ids] |
|
192 |
diff = MultiParent.from_lines(lines, parent_lines) |
|
|
0.9.8
by Aaron Bentley
get add_version working |
193 |
self.add_diff(diff, version_id, parent_ids) |
194 |
self._lines[version_id] = lines |
|
195 |
||
196 |
def add_diff(self, diff, version_id, parent_ids): |
|
197 |
self._diffs[version_id] = diff |
|
198 |
self._parents[version_id] = parent_ids |
|
199 |
||
200 |
def clear_cache(self): |
|
201 |
self._lines.clear() |
|
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
202 |
|
203 |
def get_line_list(self, version_ids): |
|
204 |
return [self.cache_version(v) for v in version_ids] |
|
205 |
||
206 |
def cache_version(self, version_id): |
|
207 |
try: |
|
208 |
return self._lines[version_id] |
|
209 |
except KeyError: |
|
210 |
pass
|
|
211 |
diff = self._diffs[version_id] |
|
212 |
lines = [] |
|
213 |
reconstructor = _Reconstructor(self._diffs, self._lines, self._parents) |
|
|
0.9.11
by Aaron Bentley
Implement reconstruct_version, handle all hunks through that |
214 |
reconstructor.reconstruct_version(lines, version_id) |
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
215 |
self._lines[version_id] = lines |
216 |
return lines |
|
217 |
||
218 |
||
219 |
class _Reconstructor(object): |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
220 |
"""Build a text from the diffs, ancestry graph and cached lines""" |
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
221 |
|
222 |
def __init__(self, diffs, lines, parents): |
|
223 |
self.diffs = diffs |
|
224 |
self.lines = lines |
|
225 |
self.parents = parents |
|
226 |
self.cursor = {} |
|
227 |
||
228 |
def reconstruct(self, lines, parent_text, version_id): |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
229 |
"""Append the lines referred to by a ParentText to lines""" |
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
230 |
parent_id = self.parents[version_id][parent_text.parent] |
231 |
end = parent_text.parent_pos + parent_text.num_lines |
|
232 |
return self._reconstruct(lines, parent_id, parent_text.parent_pos, end) |
|
233 |
||
234 |
def _reconstruct(self, lines, req_version_id, req_start, req_end): |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
235 |
"""Append lines for the requested version_id range""" |
236 |
# stack of pending range requests
|
|
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
237 |
pending_reqs = [(req_version_id, req_start, req_end)] |
238 |
while len(pending_reqs) > 0: |
|
239 |
req_version_id, req_start, req_end = pending_reqs.pop() |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
240 |
# lazily allocate cursors for versions
|
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
241 |
try: |
242 |
start, end, kind, data, iterator = self.cursor[req_version_id] |
|
243 |
except KeyError: |
|
244 |
iterator = self.diffs[req_version_id].range_iterator() |
|
245 |
start, end, kind, data = iterator.next() |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
246 |
# find the first hunk relevant to the request
|
247 |
while end <= req_start: |
|
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
248 |
start, end, kind, data = iterator.next() |
249 |
self.cursor[req_version_id] = start, end, kind, data, iterator |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
250 |
# if the hunk can't satisfy the whole request, split it in two,
|
251 |
# and leave the second half for later.
|
|
252 |
if req_end > end: |
|
253 |
pending_reqs.append((req_version_id, end, req_end)) |
|
254 |
req_end = end |
|
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
255 |
if kind == 'new': |
256 |
lines.extend(data[req_start - start: (req_end - start)]) |
|
257 |
else: |
|
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
258 |
# If the hunk is a ParentText, rewrite it as a range request
|
259 |
# for the parent, and make it the next pending request.
|
|
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
260 |
parent, parent_start, parent_end = data |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
261 |
new_version_id = self.parents[req_version_id][parent] |
262 |
new_start = parent_start + req_start - start |
|
263 |
new_end = parent_end + req_end - end |
|
264 |
pending_reqs.append((new_version_id, new_start, new_end)) |
|
|
0.9.11
by Aaron Bentley
Implement reconstruct_version, handle all hunks through that |
265 |
|
266 |
def reconstruct_version(self, lines, version_id): |
|
267 |
length = self.diffs[version_id].num_lines() |
|
268 |
return self._reconstruct(lines, version_id, 0, length) |