bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
1 |
# Copyright (C) 2009 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Functionality for doing annotations in the 'optimal' way"""
|
|
18 |
||
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
19 |
cdef extern from "python-compat.h": |
20 |
pass
|
|
21 |
||
22 |
cdef extern from "Python.h": |
|
23 |
ctypedef int Py_ssize_t |
|
24 |
ctypedef struct PyObject: |
|
25 |
pass
|
|
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
26 |
ctypedef struct PyListObject: |
27 |
PyObject **ob_item |
|
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
28 |
int PyList_CheckExact(object) |
29 |
PyObject *PyList_GET_ITEM(object, Py_ssize_t o) |
|
30 |
Py_ssize_t PyList_GET_SIZE(object) |
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
31 |
int PyList_Append(object, object) except -1 |
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
32 |
int PyList_SetItem(object, Py_ssize_t o, object) except -1 |
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
33 |
int PyList_Sort(object) except -1 |
34 |
||
35 |
int PyTuple_CheckExact(object) |
|
36 |
object PyTuple_New(Py_ssize_t len) |
|
37 |
void PyTuple_SET_ITEM(object, Py_ssize_t pos, object) |
|
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
38 |
void PyTuple_SET_ITEM_ptr "PyTuple_SET_ITEM" (object, Py_ssize_t, |
39 |
PyObject *) |
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
40 |
int PyTuple_Resize(PyObject **, Py_ssize_t newlen) |
41 |
PyObject *PyTuple_GET_ITEM(object, Py_ssize_t o) |
|
42 |
Py_ssize_t PyTuple_GET_SIZE(object) |
|
43 |
||
44 |
PyObject *PyDict_GetItem(object d, object k) |
|
45 |
int PyDict_SetItem(object d, object k, object v) except -1 |
|
46 |
||
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
47 |
void Py_INCREF(object) |
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
48 |
void Py_INCREF_ptr "Py_INCREF" (PyObject *) |
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
49 |
void Py_DECREF_ptr "Py_DECREF" (PyObject *) |
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
50 |
|
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
51 |
int Py_EQ |
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
52 |
int Py_LT |
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
53 |
int PyObject_RichCompareBool(object, object, int opid) except -1 |
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
54 |
int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" ( |
55 |
PyObject *, PyObject *, int opid) |
|
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
56 |
|
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
57 |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
58 |
from bzrlib import errors, graph as _mod_graph, osutils, patiencediff, ui |
59 |
||
60 |
||
61 |
cdef class _NeededTextIterator: |
|
62 |
||
63 |
cdef object counter |
|
64 |
cdef object text_cache |
|
65 |
cdef object stream |
|
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
66 |
cdef object ann_keys |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
67 |
cdef object stream_len |
68 |
cdef object pb |
|
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
69 |
cdef int stream_is_consumed |
70 |
cdef int ann_key_pos |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
71 |
|
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
72 |
def __init__(self, stream, text_cache, stream_len, ann_keys, pb=None): |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
73 |
self.counter = 0 |
74 |
self.stream = stream |
|
75 |
self.stream_len = stream_len |
|
76 |
self.text_cache = text_cache |
|
77 |
self.stream_len = stream_len |
|
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
78 |
self.ann_keys = list(ann_keys) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
79 |
self.pb = pb |
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
80 |
self.stream_is_consumed = 0 |
81 |
self.ann_key_pos = 0 |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
82 |
|
83 |
def __iter__(self): |
|
84 |
return self |
|
85 |
||
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
86 |
cdef _get_ann_text(self): |
87 |
if self.ann_key_pos >= len(self.ann_keys): |
|
88 |
raise StopIteration |
|
89 |
key = self.ann_keys[self.ann_key_pos] |
|
90 |
self.ann_key_pos = self.ann_key_pos + 1 |
|
91 |
lines = self.text_cache[key] |
|
92 |
num_lines = len(lines) |
|
93 |
return key, lines, num_lines |
|
94 |
||
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
95 |
def __next__(self): |
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
96 |
if self.stream_is_consumed: |
97 |
return self._get_ann_text() |
|
98 |
try: |
|
99 |
record = self.stream.next() |
|
100 |
except StopIteration: |
|
101 |
self.stream_is_consumed = 1 |
|
102 |
return self._get_ann_text() |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
103 |
if self.pb is not None: |
104 |
self.pb.update('extracting', self.counter, self.stream_len) |
|
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
105 |
if record.storage_kind == 'absent': |
106 |
raise errors.RevisionNotPresent(record.key, None) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
107 |
self.counter = self.counter + 1 |
108 |
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked')) |
|
109 |
num_lines = len(lines) |
|
110 |
self.text_cache[record.key] = lines |
|
111 |
return record.key, lines, num_lines |
|
112 |
||
113 |
||
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
114 |
cdef int _check_annotations_are_lists(annotations, |
115 |
parent_annotations) except -1: |
|
116 |
if not PyList_CheckExact(annotations): |
|
117 |
raise TypeError('annotations must be a list') |
|
118 |
if not PyList_CheckExact(parent_annotations): |
|
119 |
raise TypeError('parent_annotations must be a list') |
|
120 |
return 0 |
|
121 |
||
122 |
||
123 |
cdef int _check_match_ranges(parent_annotations, annotations, |
|
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
124 |
Py_ssize_t parent_idx, Py_ssize_t lines_idx, |
125 |
Py_ssize_t match_len) except -1: |
|
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
126 |
if parent_idx + match_len > PyList_GET_SIZE(parent_annotations): |
127 |
raise ValueError('Match length exceeds len of' |
|
128 |
' parent_annotations %s > %s' |
|
129 |
% (parent_idx + match_len, |
|
130 |
PyList_GET_SIZE(parent_annotations))) |
|
131 |
if lines_idx + match_len > PyList_GET_SIZE(annotations): |
|
132 |
raise ValueError('Match length exceeds len of' |
|
133 |
' annotations %s > %s' |
|
134 |
% (lines_idx + match_len, |
|
135 |
PyList_GET_SIZE(annotations))) |
|
136 |
return 0 |
|
137 |
||
138 |
||
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
139 |
cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos): |
140 |
pos[0] = pos[0] + 1 |
|
141 |
if pos[0] >= PyTuple_GET_SIZE(tpl): |
|
142 |
return NULL |
|
143 |
return PyTuple_GET_ITEM(tpl, pos[0]) |
|
144 |
||
145 |
||
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
146 |
cdef object _combine_annotations(ann_one, ann_two, cache): |
147 |
"""Combine the annotations from both sides.""" |
|
148 |
cdef Py_ssize_t pos_one, pos_two, len_one, len_two |
|
149 |
cdef Py_ssize_t out_pos |
|
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
150 |
cdef PyObject *temp, *left, *right |
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
151 |
|
|
4454.3.51
by John Arbash Meinel
Using a global cache gives us key uniqueness. |
152 |
if (PyObject_RichCompareBool(ann_one, ann_two, Py_LT)): |
153 |
cache_key = (ann_one, ann_two) |
|
154 |
else: |
|
155 |
cache_key = (ann_two, ann_one) |
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
156 |
temp = PyDict_GetItem(cache, cache_key) |
157 |
if temp != NULL: |
|
158 |
return <object>temp |
|
159 |
||
160 |
if not PyTuple_CheckExact(ann_one) or not PyTuple_CheckExact(ann_two): |
|
161 |
raise TypeError('annotations must be tuples') |
|
162 |
# We know that annotations are tuples, and that both sides are already
|
|
163 |
# sorted, so we can just walk and update a new list.
|
|
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
164 |
pos_one = -1 |
165 |
pos_two = -1 |
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
166 |
out_pos = 0 |
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
167 |
left = _next_tuple_entry(ann_one, &pos_one) |
168 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
169 |
new_ann = PyTuple_New(PyTuple_GET_SIZE(ann_one) |
|
170 |
+ PyTuple_GET_SIZE(ann_two)) |
|
171 |
while left != NULL and right != NULL: |
|
172 |
if (PyObject_RichCompareBool_ptr(left, right, Py_EQ)): |
|
173 |
# Identical values, step both
|
|
174 |
Py_INCREF_ptr(left) |
|
175 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) |
|
176 |
left = _next_tuple_entry(ann_one, &pos_one) |
|
177 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
178 |
elif (PyObject_RichCompareBool_ptr(left, right, Py_LT)): |
|
179 |
# left < right or right == NULL
|
|
180 |
Py_INCREF_ptr(left) |
|
181 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) |
|
182 |
left = _next_tuple_entry(ann_one, &pos_one) |
|
183 |
else: # right < left or left == NULL |
|
184 |
Py_INCREF_ptr(right) |
|
185 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, right) |
|
186 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
187 |
out_pos = out_pos + 1 |
|
188 |
while left != NULL: |
|
189 |
Py_INCREF_ptr(left) |
|
190 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) |
|
191 |
left = _next_tuple_entry(ann_one, &pos_one) |
|
192 |
out_pos = out_pos + 1 |
|
193 |
while right != NULL: |
|
194 |
Py_INCREF_ptr(right) |
|
195 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, right) |
|
196 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
197 |
out_pos = out_pos + 1 |
|
198 |
if out_pos != PyTuple_GET_SIZE(new_ann): |
|
199 |
# Timing _PyTuple_Resize was not significantly faster that slicing
|
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
200 |
# PyTuple_Resize((<PyObject **>new_ann), out_pos)
|
201 |
new_ann = new_ann[0:out_pos] |
|
202 |
PyDict_SetItem(cache, cache_key, new_ann) |
|
203 |
return new_ann |
|
204 |
||
205 |
||
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
206 |
cdef _apply_parent_annotations(annotations, parent_annotations, |
207 |
matching_blocks): |
|
208 |
"""Apply the annotations from parent_annotations into annotations. |
|
209 |
||
210 |
matching_blocks defines the ranges that match.
|
|
211 |
"""
|
|
212 |
cdef Py_ssize_t parent_idx, lines_idx, match_len, idx |
|
213 |
cdef PyListObject *par_list, *ann_list |
|
214 |
cdef PyObject **par_temp, **ann_temp |
|
215 |
||
216 |
_check_annotations_are_lists(annotations, parent_annotations) |
|
217 |
par_list = <PyListObject *>parent_annotations |
|
218 |
ann_list = <PyListObject *>annotations |
|
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
219 |
# For NEWS and bzrlib/builtins.py, over 99% of the lines are simply copied
|
220 |
# across from the parent entry. So this routine is heavily optimized for
|
|
221 |
# that. Would be interesting if we could use memcpy() but we have to incref
|
|
222 |
# and decref
|
|
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
223 |
for parent_idx, lines_idx, match_len in matching_blocks: |
224 |
_check_match_ranges(parent_annotations, annotations, |
|
225 |
parent_idx, lines_idx, match_len) |
|
226 |
par_temp = par_list.ob_item + parent_idx |
|
227 |
ann_temp = ann_list.ob_item + lines_idx |
|
228 |
for idx from 0 <= idx < match_len: |
|
229 |
Py_INCREF_ptr(par_temp[idx]) |
|
230 |
Py_DECREF_ptr(ann_temp[idx]) |
|
231 |
ann_temp[idx] = par_temp[idx] |
|
232 |
||
233 |
||
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
234 |
class Annotator: |
235 |
"""Class that drives performing annotations.""" |
|
236 |
||
237 |
def __init__(self, vf): |
|
238 |
"""Create a new Annotator from a VersionedFile.""" |
|
239 |
self._vf = vf |
|
240 |
self._parent_map = {} |
|
241 |
self._text_cache = {} |
|
242 |
# Map from key => number of nexts that will be built from this key
|
|
243 |
self._num_needed_children = {} |
|
244 |
self._annotations_cache = {} |
|
245 |
self._heads_provider = None |
|
|
4454.3.51
by John Arbash Meinel
Using a global cache gives us key uniqueness. |
246 |
self._ann_tuple_cache = {} |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
247 |
|
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
248 |
|
249 |
def _update_needed_children(self, key, parent_keys): |
|
250 |
for parent_key in parent_keys: |
|
251 |
if parent_key in self._num_needed_children: |
|
252 |
self._num_needed_children[parent_key] += 1 |
|
253 |
else: |
|
254 |
self._num_needed_children[parent_key] = 1 |
|
255 |
||
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
256 |
def _get_needed_keys(self, key): |
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
257 |
"""Determine the texts we need to get from the backing vf. |
258 |
||
259 |
:return: (vf_keys_needed, ann_keys_needed)
|
|
260 |
vf_keys_needed These are keys that we need to get from the vf
|
|
261 |
ann_keys_needed Texts which we have in self._text_cache but we
|
|
262 |
don't have annotations for. We need to yield these
|
|
263 |
in the proper order so that we can get proper
|
|
264 |
annotations.
|
|
265 |
"""
|
|
266 |
parent_map = self._parent_map |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
267 |
# We need 1 extra copy of the node we will be looking at when we are
|
268 |
# done
|
|
269 |
self._num_needed_children[key] = 1 |
|
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
270 |
vf_keys_needed = set() |
271 |
ann_keys_needed = set() |
|
272 |
needed_keys = set([key]) |
|
273 |
while needed_keys: |
|
274 |
parent_lookup = [] |
|
275 |
next_parent_map = {} |
|
276 |
for key in needed_keys: |
|
277 |
if key in self._parent_map: |
|
278 |
# We don't need to lookup this key in the vf
|
|
279 |
if key not in self._text_cache: |
|
280 |
# Extract this text from the vf
|
|
281 |
vf_keys_needed.add(key) |
|
282 |
elif key not in self._annotations_cache: |
|
283 |
# We do need to annotate
|
|
284 |
ann_keys_needed.add(key) |
|
285 |
next_parent_map[key] = self._parent_map[key] |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
286 |
else: |
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
287 |
parent_lookup.append(key) |
288 |
vf_keys_needed.add(key) |
|
289 |
needed_keys = set() |
|
290 |
next_parent_map.update(self._vf.get_parent_map(parent_lookup)) |
|
291 |
for key, parent_keys in next_parent_map.iteritems(): |
|
292 |
self._update_needed_children(key, parent_keys) |
|
293 |
for key in parent_keys: |
|
294 |
if key not in parent_map: |
|
295 |
needed_keys.add(key) |
|
296 |
parent_map.update(next_parent_map) |
|
297 |
# _heads_provider does some graph caching, so it is only valid while
|
|
298 |
# self._parent_map hasn't changed
|
|
299 |
self._heads_provider = None |
|
300 |
return vf_keys_needed, ann_keys_needed |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
301 |
|
302 |
def _get_needed_texts(self, key, pb=None): |
|
303 |
"""Get the texts we need to properly annotate key. |
|
304 |
||
305 |
:param key: A Key that is present in self._vf
|
|
306 |
:return: Yield (this_key, text, num_lines)
|
|
307 |
'text' is an opaque object that just has to work with whatever
|
|
308 |
matcher object we are using. Currently it is always 'lines' but
|
|
309 |
future improvements may change this to a simple text string.
|
|
310 |
"""
|
|
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
311 |
keys, ann_keys = self._get_needed_keys(key) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
312 |
if pb is not None: |
313 |
pb.update('getting stream', 0, len(keys)) |
|
314 |
stream = self._vf.get_record_stream(keys, 'topological', True) |
|
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
315 |
iterator = _NeededTextIterator(stream, self._text_cache, len(keys), |
316 |
ann_keys, pb) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
317 |
return iterator |
318 |
||
319 |
def _get_parent_annotations_and_matches(self, key, text, parent_key): |
|
320 |
"""Get the list of annotations for the parent, and the matching lines. |
|
321 |
||
322 |
:param text: The opaque value given by _get_needed_texts
|
|
323 |
:param parent_key: The key for the parent text
|
|
324 |
:return: (parent_annotations, matching_blocks)
|
|
325 |
parent_annotations is a list as long as the number of lines in
|
|
326 |
parent
|
|
327 |
matching_blocks is a list of (parent_idx, text_idx, len) tuples
|
|
328 |
indicating which lines match between the two texts
|
|
329 |
"""
|
|
330 |
parent_lines = self._text_cache[parent_key] |
|
331 |
parent_annotations = self._annotations_cache[parent_key] |
|
332 |
# PatienceSequenceMatcher should probably be part of Policy
|
|
333 |
matcher = patiencediff.PatienceSequenceMatcher(None, |
|
334 |
parent_lines, text) |
|
335 |
matching_blocks = matcher.get_matching_blocks() |
|
336 |
return parent_annotations, matching_blocks |
|
337 |
||
338 |
def _update_from_one_parent(self, key, annotations, lines, parent_key): |
|
339 |
"""Reannotate this text relative to its first parent.""" |
|
340 |
parent_annotations, matching_blocks = self._get_parent_annotations_and_matches( |
|
341 |
key, lines, parent_key) |
|
342 |
||
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
343 |
_apply_parent_annotations(annotations, parent_annotations, |
344 |
matching_blocks) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
345 |
|
346 |
def _update_from_other_parents(self, key, annotations, lines, |
|
347 |
this_annotation, parent_key): |
|
348 |
"""Reannotate this text relative to a second (or more) parent.""" |
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
349 |
cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx |
350 |
cdef Py_ssize_t pos |
|
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
351 |
cdef PyObject *ann_temp, *par_temp |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
352 |
parent_annotations, matching_blocks = self._get_parent_annotations_and_matches( |
353 |
key, lines, parent_key) |
|
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
354 |
_check_annotations_are_lists(annotations, parent_annotations) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
355 |
last_ann = None |
356 |
last_parent = None |
|
357 |
last_res = None |
|
|
4454.3.51
by John Arbash Meinel
Using a global cache gives us key uniqueness. |
358 |
cache = self._ann_tuple_cache |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
359 |
for parent_idx, lines_idx, match_len in matching_blocks: |
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
360 |
_check_match_ranges(parent_annotations, annotations, |
361 |
parent_idx, lines_idx, match_len) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
362 |
# For lines which match this parent, we will now resolve whether
|
363 |
# this parent wins over the current annotation
|
|
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
364 |
for idx from 0 <= idx < match_len: |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
365 |
ann_idx = lines_idx + idx |
|
4454.3.52
by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms. |
366 |
ann_temp = PyList_GET_ITEM(annotations, ann_idx) |
367 |
par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx) |
|
368 |
if (ann_temp == par_temp): |
|
369 |
# This is parent, do nothing
|
|
370 |
# Pointer comparison is fine here. Value comparison would
|
|
371 |
# be ok, but it will be handled in the final if clause by
|
|
372 |
# merging the two tuples into the same tuple
|
|
373 |
# Avoiding the Py_INCREF by using pointer comparison drops
|
|
374 |
# timing from 215ms => 125ms
|
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
375 |
continue
|
|
4454.3.52
by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms. |
376 |
par_ann = <object>par_temp |
377 |
ann = <object>ann_temp |
|
378 |
if (ann is this_annotation): |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
379 |
# Originally claimed 'this', but it was really in this
|
380 |
# parent
|
|
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
381 |
Py_INCREF(par_ann) |
382 |
PyList_SetItem(annotations, ann_idx, par_ann) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
383 |
continue
|
384 |
# Resolve the fact that both sides have a different value for
|
|
385 |
# last modified
|
|
|
4454.3.52
by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms. |
386 |
if (ann is last_ann and par_ann is last_parent): |
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
387 |
Py_INCREF(last_res) |
388 |
PyList_SetItem(annotations, ann_idx, last_res) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
389 |
else: |
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
390 |
new_ann = _combine_annotations(ann, par_ann, cache) |
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
391 |
Py_INCREF(new_ann) |
392 |
PyList_SetItem(annotations, ann_idx, new_ann) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
393 |
last_ann = ann |
394 |
last_parent = par_ann |
|
395 |
last_res = new_ann |
|
396 |
||
397 |
def _record_annotation(self, key, parent_keys, annotations): |
|
398 |
self._annotations_cache[key] = annotations |
|
399 |
for parent_key in parent_keys: |
|
400 |
num = self._num_needed_children[parent_key] |
|
|
4454.3.58
by John Arbash Meinel
Enable the new annotator for gc format repos. |
401 |
num = num - 1 |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
402 |
if num == 0: |
403 |
del self._text_cache[parent_key] |
|
404 |
del self._annotations_cache[parent_key] |
|
405 |
# Do we want to clean up _num_needed_children at this point as
|
|
406 |
# well?
|
|
407 |
self._num_needed_children[parent_key] = num |
|
408 |
||
409 |
def _annotate_one(self, key, text, num_lines): |
|
410 |
this_annotation = (key,) |
|
411 |
# Note: annotations will be mutated by calls to _update_from*
|
|
412 |
annotations = [this_annotation] * num_lines |
|
413 |
parent_keys = self._parent_map[key] |
|
414 |
if parent_keys: |
|
415 |
self._update_from_one_parent(key, annotations, text, parent_keys[0]) |
|
416 |
for parent in parent_keys[1:]: |
|
417 |
self._update_from_other_parents(key, annotations, text, |
|
418 |
this_annotation, parent) |
|
419 |
self._record_annotation(key, parent_keys, annotations) |
|
420 |
||
|
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
421 |
def add_special_text(self, key, parent_keys, text): |
422 |
"""Add a specific text to the graph.""" |
|
|
4454.3.63
by John Arbash Meinel
Copy the implementation over to the Pyrex version. |
423 |
self._parent_map[key] = parent_keys |
424 |
self._text_cache[key] = osutils.split_lines(text) |
|
425 |
self._heads_provider = None |
|
|
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
426 |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
427 |
def annotate(self, key): |
428 |
"""Return annotated fulltext for the given key.""" |
|
429 |
pb = ui.ui_factory.nested_progress_bar() |
|
430 |
try: |
|
431 |
for text_key, text, num_lines in self._get_needed_texts(key, pb=pb): |
|
432 |
self._annotate_one(text_key, text, num_lines) |
|
433 |
finally: |
|
434 |
pb.finished() |
|
435 |
try: |
|
436 |
annotations = self._annotations_cache[key] |
|
437 |
except KeyError: |
|
438 |
raise errors.RevisionNotPresent(key, self._vf) |
|
439 |
return annotations, self._text_cache[key] |
|
440 |
||
441 |
def _get_heads_provider(self): |
|
442 |
if self._heads_provider is None: |
|
443 |
self._heads_provider = _mod_graph.KnownGraph(self._parent_map) |
|
444 |
return self._heads_provider |
|
445 |
||
446 |
def annotate_flat(self, key): |
|
447 |
"""Determine the single-best-revision to source for each line. |
|
448 |
||
449 |
This is meant as a compatibility thunk to how annotate() used to work.
|
|
450 |
"""
|
|
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
451 |
cdef Py_ssize_t pos, num_lines |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
452 |
annotations, lines = self.annotate(key) |
453 |
assert len(annotations) == len(lines) |
|
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
454 |
num_lines = len(lines) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
455 |
out = [] |
456 |
heads = self._get_heads_provider().heads |
|
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
457 |
for pos from 0 <= pos < num_lines: |
458 |
annotation = annotations[pos] |
|
459 |
line = lines[pos] |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
460 |
if len(annotation) == 1: |
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
461 |
head = annotation[0] |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
462 |
else: |
463 |
the_heads = heads(annotation) |
|
464 |
if len(the_heads) == 1: |
|
465 |
for head in the_heads: |
|
466 |
break
|
|
467 |
else: |
|
468 |
# We need to resolve the ambiguity, for now just pick the
|
|
469 |
# sorted smallest
|
|
470 |
head = sorted(the_heads)[0] |
|
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
471 |
PyList_Append(out, (head, line)) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
472 |
return out |