bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
1 |
# Copyright (C) 2009 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Functionality for doing annotations in the 'optimal' way"""
|
|
18 |
||
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
19 |
cdef extern from "python-compat.h": |
20 |
pass
|
|
21 |
||
22 |
cdef extern from "Python.h": |
|
23 |
ctypedef int Py_ssize_t |
|
24 |
ctypedef struct PyObject: |
|
25 |
pass
|
|
26 |
int PyList_CheckExact(object) |
|
27 |
PyObject *PyList_GET_ITEM(object, Py_ssize_t o) |
|
28 |
Py_ssize_t PyList_GET_SIZE(object) |
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
29 |
int PyList_Append(object, object) except -1 |
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
30 |
int PyList_SetItem(object, Py_ssize_t o, object) except -1 |
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
31 |
int PyList_Sort(object) except -1 |
32 |
||
33 |
int PyTuple_CheckExact(object) |
|
34 |
object PyTuple_New(Py_ssize_t len) |
|
35 |
void PyTuple_SET_ITEM(object, Py_ssize_t pos, object) |
|
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
36 |
void PyTuple_SET_ITEM_ptr "PyTuple_SET_ITEM" (object, Py_ssize_t, |
37 |
PyObject *) |
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
38 |
int PyTuple_Resize(PyObject **, Py_ssize_t newlen) |
39 |
PyObject *PyTuple_GET_ITEM(object, Py_ssize_t o) |
|
40 |
Py_ssize_t PyTuple_GET_SIZE(object) |
|
41 |
||
42 |
PyObject *PyDict_GetItem(object d, object k) |
|
43 |
int PyDict_SetItem(object d, object k, object v) except -1 |
|
44 |
||
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
45 |
void Py_INCREF(object) |
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
46 |
void Py_INCREF_ptr "Py_INCREF" (PyObject *) |
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
47 |
|
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
48 |
int Py_EQ |
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
49 |
int Py_LT |
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
50 |
int PyObject_RichCompareBool(object, object, int opid) except -1 |
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
51 |
int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" ( |
52 |
PyObject *, PyObject *, int opid) |
|
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
53 |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
54 |
from bzrlib import errors, graph as _mod_graph, osutils, patiencediff, ui |
55 |
||
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
56 |
import time |
57 |
||
58 |
||
59 |
counters = {} |
|
60 |
cdef object _counters |
|
61 |
_counters = counters |
|
62 |
||
63 |
cdef _update_counter(name, value): |
|
64 |
_counters[name] = value + _counters.setdefault(name, 0) |
|
65 |
||
|
4454.3.48
by John Arbash Meinel
More debug timing. |
66 |
def update_counter(name, value): |
67 |
_update_counter(name, value) |
|
68 |
||
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
69 |
cdef object c |
70 |
c = time.clock |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
71 |
|
72 |
cdef class _NeededTextIterator: |
|
73 |
||
74 |
cdef object counter |
|
75 |
cdef object text_cache |
|
76 |
cdef object stream |
|
77 |
cdef object stream_len |
|
78 |
cdef object pb |
|
79 |
||
80 |
def __init__(self, stream, text_cache, stream_len, pb=None): |
|
81 |
self.counter = 0 |
|
82 |
self.stream = stream |
|
83 |
self.stream_len = stream_len |
|
84 |
self.text_cache = text_cache |
|
85 |
self.stream_len = stream_len |
|
86 |
self.pb = pb |
|
87 |
||
88 |
def __iter__(self): |
|
89 |
return self |
|
90 |
||
91 |
def __next__(self): |
|
92 |
record = self.stream.next() |
|
93 |
if self.pb is not None: |
|
94 |
self.pb.update('extracting', self.counter, self.stream_len) |
|
95 |
self.counter = self.counter + 1 |
|
96 |
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked')) |
|
97 |
num_lines = len(lines) |
|
98 |
self.text_cache[record.key] = lines |
|
99 |
return record.key, lines, num_lines |
|
100 |
||
101 |
||
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
102 |
cdef int _check_annotations_are_lists(annotations, |
103 |
parent_annotations) except -1: |
|
104 |
if not PyList_CheckExact(annotations): |
|
105 |
raise TypeError('annotations must be a list') |
|
106 |
if not PyList_CheckExact(parent_annotations): |
|
107 |
raise TypeError('parent_annotations must be a list') |
|
108 |
return 0 |
|
109 |
||
110 |
||
111 |
cdef int _check_match_ranges(parent_annotations, annotations, |
|
112 |
Py_ssize_t parent_idx, Py_ssize_t lines_idx, |
|
113 |
Py_ssize_t match_len) except -1: |
|
114 |
if parent_idx + match_len > PyList_GET_SIZE(parent_annotations): |
|
115 |
raise ValueError('Match length exceeds len of' |
|
116 |
' parent_annotations %s > %s' |
|
117 |
% (parent_idx + match_len, |
|
118 |
PyList_GET_SIZE(parent_annotations))) |
|
119 |
if lines_idx + match_len > PyList_GET_SIZE(annotations): |
|
120 |
raise ValueError('Match length exceeds len of' |
|
121 |
' annotations %s > %s' |
|
122 |
% (lines_idx + match_len, |
|
123 |
PyList_GET_SIZE(annotations))) |
|
124 |
return 0 |
|
125 |
||
126 |
||
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
127 |
cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos): |
128 |
pos[0] = pos[0] + 1 |
|
129 |
if pos[0] >= PyTuple_GET_SIZE(tpl): |
|
130 |
return NULL |
|
131 |
return PyTuple_GET_ITEM(tpl, pos[0]) |
|
132 |
||
133 |
||
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
134 |
cdef object _combine_annotations(ann_one, ann_two, cache): |
135 |
"""Combine the annotations from both sides.""" |
|
136 |
cdef Py_ssize_t pos_one, pos_two, len_one, len_two |
|
137 |
cdef Py_ssize_t out_pos |
|
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
138 |
cdef PyObject *temp, *left, *right |
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
139 |
|
|
4454.3.51
by John Arbash Meinel
Using a global cache gives us key uniqueness. |
140 |
if (PyObject_RichCompareBool(ann_one, ann_two, Py_LT)): |
141 |
cache_key = (ann_one, ann_two) |
|
142 |
else: |
|
143 |
cache_key = (ann_two, ann_one) |
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
144 |
temp = PyDict_GetItem(cache, cache_key) |
145 |
if temp != NULL: |
|
146 |
return <object>temp |
|
147 |
||
148 |
if not PyTuple_CheckExact(ann_one) or not PyTuple_CheckExact(ann_two): |
|
149 |
raise TypeError('annotations must be tuples') |
|
150 |
# We know that annotations are tuples, and that both sides are already
|
|
151 |
# sorted, so we can just walk and update a new list.
|
|
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
152 |
pos_one = -1 |
153 |
pos_two = -1 |
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
154 |
out_pos = 0 |
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
155 |
left = _next_tuple_entry(ann_one, &pos_one) |
156 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
157 |
new_ann = PyTuple_New(PyTuple_GET_SIZE(ann_one) |
|
158 |
+ PyTuple_GET_SIZE(ann_two)) |
|
159 |
while left != NULL and right != NULL: |
|
160 |
if (PyObject_RichCompareBool_ptr(left, right, Py_EQ)): |
|
161 |
# Identical values, step both
|
|
162 |
Py_INCREF_ptr(left) |
|
163 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) |
|
164 |
left = _next_tuple_entry(ann_one, &pos_one) |
|
165 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
166 |
elif (PyObject_RichCompareBool_ptr(left, right, Py_LT)): |
|
167 |
# left < right or right == NULL
|
|
168 |
Py_INCREF_ptr(left) |
|
169 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) |
|
170 |
left = _next_tuple_entry(ann_one, &pos_one) |
|
171 |
else: # right < left or left == NULL |
|
172 |
Py_INCREF_ptr(right) |
|
173 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, right) |
|
174 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
175 |
out_pos = out_pos + 1 |
|
176 |
while left != NULL: |
|
177 |
Py_INCREF_ptr(left) |
|
178 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) |
|
179 |
left = _next_tuple_entry(ann_one, &pos_one) |
|
180 |
out_pos = out_pos + 1 |
|
181 |
while right != NULL: |
|
182 |
Py_INCREF_ptr(right) |
|
183 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, right) |
|
184 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
185 |
out_pos = out_pos + 1 |
|
186 |
if out_pos != PyTuple_GET_SIZE(new_ann): |
|
187 |
# Timing _PyTuple_Resize was not significantly faster that slicing
|
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
188 |
# PyTuple_Resize((<PyObject **>new_ann), out_pos)
|
189 |
new_ann = new_ann[0:out_pos] |
|
190 |
PyDict_SetItem(cache, cache_key, new_ann) |
|
191 |
return new_ann |
|
192 |
||
193 |
||
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
194 |
class Annotator: |
195 |
"""Class that drives performing annotations.""" |
|
196 |
||
197 |
def __init__(self, vf): |
|
198 |
"""Create a new Annotator from a VersionedFile.""" |
|
199 |
self._vf = vf |
|
200 |
self._parent_map = {} |
|
201 |
self._text_cache = {} |
|
202 |
# Map from key => number of nexts that will be built from this key
|
|
203 |
self._num_needed_children = {} |
|
204 |
self._annotations_cache = {} |
|
205 |
self._heads_provider = None |
|
|
4454.3.51
by John Arbash Meinel
Using a global cache gives us key uniqueness. |
206 |
self._ann_tuple_cache = {} |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
207 |
|
208 |
def _get_needed_keys(self, key): |
|
209 |
graph = _mod_graph.Graph(self._vf) |
|
210 |
parent_map = {} |
|
211 |
# We need 1 extra copy of the node we will be looking at when we are
|
|
212 |
# done
|
|
213 |
self._num_needed_children[key] = 1 |
|
214 |
for key, parent_keys in graph.iter_ancestry([key]): |
|
215 |
if parent_keys is None: |
|
216 |
continue
|
|
217 |
parent_map[key] = parent_keys |
|
218 |
for parent_key in parent_keys: |
|
219 |
if parent_key in self._num_needed_children: |
|
220 |
self._num_needed_children[parent_key] += 1 |
|
221 |
else: |
|
222 |
self._num_needed_children[parent_key] = 1 |
|
|
4454.3.52
by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms. |
223 |
# _update_counter('num children', 1)
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
224 |
self._parent_map.update(parent_map) |
225 |
# _heads_provider does some graph caching, so it is only valid while
|
|
226 |
# self._parent_map hasn't changed
|
|
227 |
self._heads_provider = None |
|
228 |
keys = parent_map.keys() |
|
229 |
return keys |
|
230 |
||
231 |
def _get_needed_texts(self, key, pb=None): |
|
232 |
"""Get the texts we need to properly annotate key. |
|
233 |
||
234 |
:param key: A Key that is present in self._vf
|
|
235 |
:return: Yield (this_key, text, num_lines)
|
|
236 |
'text' is an opaque object that just has to work with whatever
|
|
237 |
matcher object we are using. Currently it is always 'lines' but
|
|
238 |
future improvements may change this to a simple text string.
|
|
239 |
"""
|
|
240 |
keys = self._get_needed_keys(key) |
|
241 |
if pb is not None: |
|
242 |
pb.update('getting stream', 0, len(keys)) |
|
243 |
stream = self._vf.get_record_stream(keys, 'topological', True) |
|
244 |
iterator = _NeededTextIterator(stream, self._text_cache, len(keys), pb) |
|
245 |
return iterator |
|
246 |
||
247 |
def _get_parent_annotations_and_matches(self, key, text, parent_key): |
|
248 |
"""Get the list of annotations for the parent, and the matching lines. |
|
249 |
||
250 |
:param text: The opaque value given by _get_needed_texts
|
|
251 |
:param parent_key: The key for the parent text
|
|
252 |
:return: (parent_annotations, matching_blocks)
|
|
253 |
parent_annotations is a list as long as the number of lines in
|
|
254 |
parent
|
|
255 |
matching_blocks is a list of (parent_idx, text_idx, len) tuples
|
|
256 |
indicating which lines match between the two texts
|
|
257 |
"""
|
|
258 |
parent_lines = self._text_cache[parent_key] |
|
259 |
parent_annotations = self._annotations_cache[parent_key] |
|
260 |
# PatienceSequenceMatcher should probably be part of Policy
|
|
|
4454.3.52
by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms. |
261 |
# t = c()
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
262 |
matcher = patiencediff.PatienceSequenceMatcher(None, |
263 |
parent_lines, text) |
|
264 |
matching_blocks = matcher.get_matching_blocks() |
|
|
4454.3.52
by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms. |
265 |
# _update_counter('get_matching_blocks()', c() - t)
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
266 |
return parent_annotations, matching_blocks |
267 |
||
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
268 |
def _pyx_update_from_one_parent(self, key, annotations, lines, parent_key): |
269 |
"""Reannotate this text relative to its first parent.""" |
|
270 |
cdef Py_ssize_t parent_idx, lines_idx, match_len, idx |
|
271 |
cdef PyObject *temp |
|
272 |
||
273 |
parent_annotations, matching_blocks = self._get_parent_annotations_and_matches( |
|
274 |
key, lines, parent_key) |
|
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
275 |
_check_annotations_are_lists(annotations, parent_annotations) |
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
276 |
|
277 |
for parent_idx, lines_idx, match_len in matching_blocks: |
|
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
278 |
_check_match_ranges(parent_annotations, annotations, |
279 |
parent_idx, lines_idx, match_len) |
|
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
280 |
for idx from 0 <= idx < match_len: |
281 |
temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx) |
|
282 |
ann = <object>temp |
|
283 |
Py_INCREF(ann) # PyList_SetItem steals a ref |
|
284 |
PyList_SetItem(annotations, lines_idx + idx, ann) |
|
285 |
||
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
286 |
def _update_from_one_parent(self, key, annotations, lines, parent_key): |
287 |
"""Reannotate this text relative to its first parent.""" |
|
288 |
parent_annotations, matching_blocks = self._get_parent_annotations_and_matches( |
|
289 |
key, lines, parent_key) |
|
290 |
||
291 |
for parent_idx, lines_idx, match_len in matching_blocks: |
|
292 |
# For all matching regions we copy across the parent annotations
|
|
293 |
annotations[lines_idx:lines_idx + match_len] = \ |
|
294 |
parent_annotations[parent_idx:parent_idx + match_len] |
|
295 |
||
296 |
def _update_from_other_parents(self, key, annotations, lines, |
|
297 |
this_annotation, parent_key): |
|
298 |
"""Reannotate this text relative to a second (or more) parent.""" |
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
299 |
cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx |
300 |
cdef Py_ssize_t pos |
|
|
4454.3.52
by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms. |
301 |
cdef PyObject *temp, *ann_temp, *par_temp |
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
302 |
t1 = c() |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
303 |
parent_annotations, matching_blocks = self._get_parent_annotations_and_matches( |
304 |
key, lines, parent_key) |
|
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
305 |
t2 = c() |
306 |
_update_counter('update other match', t2 - t1) |
|
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
307 |
_check_annotations_are_lists(annotations, parent_annotations) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
308 |
last_ann = None |
309 |
last_parent = None |
|
310 |
last_res = None |
|
|
4454.3.51
by John Arbash Meinel
Using a global cache gives us key uniqueness. |
311 |
cache = self._ann_tuple_cache |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
312 |
for parent_idx, lines_idx, match_len in matching_blocks: |
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
313 |
_check_match_ranges(parent_annotations, annotations, |
314 |
parent_idx, lines_idx, match_len) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
315 |
# For lines which match this parent, we will now resolve whether
|
316 |
# this parent wins over the current annotation
|
|
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
317 |
for idx from 0 <= idx < match_len: |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
318 |
ann_idx = lines_idx + idx |
|
4454.3.52
by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms. |
319 |
ann_temp = PyList_GET_ITEM(annotations, ann_idx) |
320 |
par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx) |
|
321 |
if (ann_temp == par_temp): |
|
322 |
# This is parent, do nothing
|
|
323 |
# Pointer comparison is fine here. Value comparison would
|
|
324 |
# be ok, but it will be handled in the final if clause by
|
|
325 |
# merging the two tuples into the same tuple
|
|
326 |
# Avoiding the Py_INCREF by using pointer comparison drops
|
|
327 |
# timing from 215ms => 125ms
|
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
328 |
continue
|
|
4454.3.52
by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms. |
329 |
par_ann = <object>par_temp |
330 |
ann = <object>ann_temp |
|
331 |
if (ann is this_annotation): |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
332 |
# Originally claimed 'this', but it was really in this
|
333 |
# parent
|
|
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
334 |
Py_INCREF(par_ann) |
335 |
PyList_SetItem(annotations, ann_idx, par_ann) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
336 |
continue
|
337 |
# Resolve the fact that both sides have a different value for
|
|
338 |
# last modified
|
|
|
4454.3.52
by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms. |
339 |
if (ann is last_ann and par_ann is last_parent): |
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
340 |
Py_INCREF(last_res) |
341 |
PyList_SetItem(annotations, ann_idx, last_res) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
342 |
else: |
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
343 |
new_ann = _combine_annotations(ann, par_ann, cache) |
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
344 |
Py_INCREF(new_ann) |
345 |
PyList_SetItem(annotations, ann_idx, new_ann) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
346 |
last_ann = ann |
347 |
last_parent = par_ann |
|
348 |
last_res = new_ann |
|
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
349 |
t3 = c() |
350 |
_update_counter('update other resolve', t3 - t2) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
351 |
|
352 |
def _record_annotation(self, key, parent_keys, annotations): |
|
353 |
self._annotations_cache[key] = annotations |
|
354 |
for parent_key in parent_keys: |
|
355 |
num = self._num_needed_children[parent_key] |
|
356 |
num -= 1 |
|
357 |
if num == 0: |
|
358 |
del self._text_cache[parent_key] |
|
359 |
del self._annotations_cache[parent_key] |
|
360 |
# Do we want to clean up _num_needed_children at this point as
|
|
361 |
# well?
|
|
362 |
self._num_needed_children[parent_key] = num |
|
363 |
||
364 |
def _annotate_one(self, key, text, num_lines): |
|
365 |
this_annotation = (key,) |
|
366 |
# Note: annotations will be mutated by calls to _update_from*
|
|
367 |
annotations = [this_annotation] * num_lines |
|
368 |
parent_keys = self._parent_map[key] |
|
369 |
if parent_keys: |
|
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
370 |
t1 = c() |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
371 |
self._update_from_one_parent(key, annotations, text, parent_keys[0]) |
|
4454.3.48
by John Arbash Meinel
More debug timing. |
372 |
_update_counter('left parents', 1) |
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
373 |
t2 = c() |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
374 |
for parent in parent_keys[1:]: |
375 |
self._update_from_other_parents(key, annotations, text, |
|
376 |
this_annotation, parent) |
|
|
4454.3.48
by John Arbash Meinel
More debug timing. |
377 |
_update_counter('right parents', 1) |
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
378 |
t3 = c() |
379 |
_update_counter('update left', t2 - t1) |
|
380 |
_update_counter('update rest', t3 - t2) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
381 |
self._record_annotation(key, parent_keys, annotations) |
382 |
||
383 |
def annotate(self, key): |
|
384 |
"""Return annotated fulltext for the given key.""" |
|
385 |
keys = self._get_needed_texts(key) |
|
386 |
pb = ui.ui_factory.nested_progress_bar() |
|
387 |
try: |
|
388 |
for text_key, text, num_lines in self._get_needed_texts(key, pb=pb): |
|
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
389 |
t = c() |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
390 |
self._annotate_one(text_key, text, num_lines) |
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
391 |
_update_counter('annotate one', c() - t) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
392 |
finally: |
393 |
pb.finished() |
|
394 |
try: |
|
395 |
annotations = self._annotations_cache[key] |
|
396 |
except KeyError: |
|
397 |
raise errors.RevisionNotPresent(key, self._vf) |
|
398 |
return annotations, self._text_cache[key] |
|
399 |
||
400 |
def _get_heads_provider(self): |
|
401 |
if self._heads_provider is None: |
|
402 |
self._heads_provider = _mod_graph.KnownGraph(self._parent_map) |
|
403 |
return self._heads_provider |
|
404 |
||
405 |
def annotate_flat(self, key): |
|
406 |
"""Determine the single-best-revision to source for each line. |
|
407 |
||
408 |
This is meant as a compatibility thunk to how annotate() used to work.
|
|
409 |
"""
|
|
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
410 |
t_first = c() |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
411 |
annotations, lines = self.annotate(key) |
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
412 |
_update_counter('annotate time', c() - t_first) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
413 |
assert len(annotations) == len(lines) |
414 |
out = [] |
|
415 |
heads = self._get_heads_provider().heads |
|
416 |
append = out.append |
|
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
417 |
t_second = c() |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
418 |
for annotation, line in zip(annotations, lines): |
419 |
if len(annotation) == 1: |
|
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
420 |
_update_counter('one source', 1) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
421 |
append((annotation[0], line)) |
422 |
else: |
|
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
423 |
_update_counter('multi source', 1) |
424 |
t = c() |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
425 |
the_heads = heads(annotation) |
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
426 |
_update_counter('heads time', c() - t) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
427 |
if len(the_heads) == 1: |
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
428 |
_update_counter('one head', 1) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
429 |
for head in the_heads: |
430 |
break
|
|
431 |
else: |
|
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
432 |
_update_counter('multi heads', 1) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
433 |
# We need to resolve the ambiguity, for now just pick the
|
434 |
# sorted smallest
|
|
435 |
head = sorted(the_heads)[0] |
|
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
436 |
if head == annotation[0]: |
437 |
_update_counter('first ann', 1) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
438 |
append((head, line)) |
|
4454.3.47
by John Arbash Meinel
lots of debugging timers. |
439 |
_update_counter('resolve annotations', c() - t_second) |
440 |
_update_counter('overall', c() - t_first) |
|
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
441 |
return out |