/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Functionality for doing annotations in the 'optimal' way"""
18
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
19
cdef extern from "python-compat.h":
20
    pass
21
22
cdef extern from "Python.h":
23
    ctypedef int Py_ssize_t
24
    ctypedef struct PyObject:
25
        pass
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
26
    ctypedef struct PyListObject:
27
        PyObject **ob_item
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
28
    int PyList_CheckExact(object)
29
    PyObject *PyList_GET_ITEM(object, Py_ssize_t o)
30
    Py_ssize_t PyList_GET_SIZE(object)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
31
    int PyList_Append(object, object) except -1
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
32
    int PyList_SetItem(object, Py_ssize_t o, object) except -1
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
33
    int PyList_Sort(object) except -1
34
35
    int PyTuple_CheckExact(object)
36
    object PyTuple_New(Py_ssize_t len)
37
    void PyTuple_SET_ITEM(object, Py_ssize_t pos, object)
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
38
    void PyTuple_SET_ITEM_ptr "PyTuple_SET_ITEM" (object, Py_ssize_t,
39
                                                  PyObject *)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
40
    int PyTuple_Resize(PyObject **, Py_ssize_t newlen)
41
    PyObject *PyTuple_GET_ITEM(object, Py_ssize_t o)
42
    Py_ssize_t PyTuple_GET_SIZE(object)
43
44
    PyObject *PyDict_GetItem(object d, object k)
45
    int PyDict_SetItem(object d, object k, object v) except -1
46
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
47
    void Py_INCREF(object)
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
48
    void Py_INCREF_ptr "Py_INCREF" (PyObject *)
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
49
    void Py_DECREF_ptr "Py_DECREF" (PyObject *)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
50
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
51
    int Py_EQ
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
52
    int Py_LT
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
53
    int PyObject_RichCompareBool(object, object, int opid) except -1
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
54
    int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" (
55
        PyObject *, PyObject *, int opid)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
56
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
57
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
58
from bzrlib import errors, graph as _mod_graph, osutils, patiencediff, ui
59
60
61
cdef class _NeededTextIterator:
62
63
    cdef object counter
64
    cdef object text_cache
65
    cdef object stream
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
66
    cdef object ann_keys
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
67
    cdef object stream_len
68
    cdef object pb
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
69
    cdef int stream_is_consumed
70
    cdef int ann_key_pos
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
71
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
72
    def __init__(self, stream, text_cache, stream_len, ann_keys, pb=None):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
73
        self.counter = 0
74
        self.stream = stream
75
        self.stream_len = stream_len
76
        self.text_cache = text_cache
77
        self.stream_len = stream_len
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
78
        self.ann_keys = list(ann_keys)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
79
        self.pb = pb
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
80
        self.stream_is_consumed = 0
81
        self.ann_key_pos = 0
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
82
83
    def __iter__(self):
84
        return self
85
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
86
    cdef _get_ann_text(self):
87
        if self.ann_key_pos >= len(self.ann_keys):
88
            raise StopIteration
89
        key = self.ann_keys[self.ann_key_pos]
90
        self.ann_key_pos = self.ann_key_pos + 1
91
        lines = self.text_cache[key]
92
        num_lines = len(lines)
93
        return key, lines, num_lines
94
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
95
    def __next__(self):
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
96
        if self.stream_is_consumed:
97
            return self._get_ann_text()
98
        try:
99
            record = self.stream.next()
100
        except StopIteration:
101
            self.stream_is_consumed = 1
102
            return self._get_ann_text()
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
103
        if self.pb is not None:
104
            self.pb.update('extracting', self.counter, self.stream_len)
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
105
        if record.storage_kind == 'absent':
106
            raise errors.RevisionNotPresent(record.key, None)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
107
        self.counter = self.counter + 1
108
        lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
109
        num_lines = len(lines)
110
        self.text_cache[record.key] = lines
111
        return record.key, lines, num_lines
112
113
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
114
cdef int _check_annotations_are_lists(annotations,
115
                                      parent_annotations) except -1:
116
    if not PyList_CheckExact(annotations):
117
        raise TypeError('annotations must be a list')
118
    if not PyList_CheckExact(parent_annotations):
119
        raise TypeError('parent_annotations must be a list')
120
    return 0
121
122
123
cdef int _check_match_ranges(parent_annotations, annotations,
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
124
                             Py_ssize_t parent_idx, Py_ssize_t lines_idx,
125
                             Py_ssize_t match_len) except -1:
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
126
    if parent_idx + match_len > PyList_GET_SIZE(parent_annotations):
127
        raise ValueError('Match length exceeds len of'
128
                         ' parent_annotations %s > %s'
129
                         % (parent_idx + match_len,
130
                            PyList_GET_SIZE(parent_annotations)))
131
    if lines_idx + match_len > PyList_GET_SIZE(annotations):
132
        raise ValueError('Match length exceeds len of'
133
                         ' annotations %s > %s'
134
                         % (lines_idx + match_len,
135
                            PyList_GET_SIZE(annotations)))
136
    return 0
137
138
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
139
cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos):
140
    pos[0] = pos[0] + 1
141
    if pos[0] >= PyTuple_GET_SIZE(tpl):
142
        return NULL
143
    return PyTuple_GET_ITEM(tpl, pos[0])
144
145
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
146
cdef object _combine_annotations(ann_one, ann_two, cache):
147
    """Combine the annotations from both sides."""
148
    cdef Py_ssize_t pos_one, pos_two, len_one, len_two
149
    cdef Py_ssize_t out_pos
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
150
    cdef PyObject *temp, *left, *right
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
151
4454.3.51 by John Arbash Meinel
Using a global cache gives us key uniqueness.
152
    if (PyObject_RichCompareBool(ann_one, ann_two, Py_LT)):
153
        cache_key = (ann_one, ann_two)
154
    else:
155
        cache_key = (ann_two, ann_one)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
156
    temp = PyDict_GetItem(cache, cache_key)
157
    if temp != NULL:
158
        return <object>temp
159
160
    if not PyTuple_CheckExact(ann_one) or not PyTuple_CheckExact(ann_two):
161
        raise TypeError('annotations must be tuples')
162
    # We know that annotations are tuples, and that both sides are already
163
    # sorted, so we can just walk and update a new list.
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
164
    pos_one = -1
165
    pos_two = -1
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
166
    out_pos = 0
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
167
    left = _next_tuple_entry(ann_one, &pos_one)
168
    right = _next_tuple_entry(ann_two, &pos_two)
169
    new_ann = PyTuple_New(PyTuple_GET_SIZE(ann_one)
170
                          + PyTuple_GET_SIZE(ann_two))
171
    while left != NULL and right != NULL:
172
        if (PyObject_RichCompareBool_ptr(left, right, Py_EQ)):
173
            # Identical values, step both
174
            Py_INCREF_ptr(left)
175
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
176
            left = _next_tuple_entry(ann_one, &pos_one)
177
            right = _next_tuple_entry(ann_two, &pos_two)
178
        elif (PyObject_RichCompareBool_ptr(left, right, Py_LT)):
179
            # left < right or right == NULL
180
            Py_INCREF_ptr(left)
181
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
182
            left = _next_tuple_entry(ann_one, &pos_one)
183
        else: # right < left or left == NULL
184
            Py_INCREF_ptr(right)
185
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
186
            right = _next_tuple_entry(ann_two, &pos_two)
187
        out_pos = out_pos + 1
188
    while left != NULL:
189
        Py_INCREF_ptr(left)
190
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
191
        left = _next_tuple_entry(ann_one, &pos_one)
192
        out_pos = out_pos + 1
193
    while right != NULL:
194
        Py_INCREF_ptr(right)
195
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
196
        right = _next_tuple_entry(ann_two, &pos_two)
197
        out_pos = out_pos + 1
198
    if out_pos != PyTuple_GET_SIZE(new_ann):
199
        # Timing _PyTuple_Resize was not significantly faster that slicing
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
200
        # PyTuple_Resize((<PyObject **>new_ann), out_pos)
201
        new_ann = new_ann[0:out_pos]
202
    PyDict_SetItem(cache, cache_key, new_ann)
203
    return new_ann
204
205
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
206
cdef _apply_parent_annotations(annotations, parent_annotations,
207
                               matching_blocks):
208
    """Apply the annotations from parent_annotations into annotations.
209
210
    matching_blocks defines the ranges that match.
211
    """
212
    cdef Py_ssize_t parent_idx, lines_idx, match_len, idx
213
    cdef PyListObject *par_list, *ann_list
214
    cdef PyObject **par_temp, **ann_temp
215
216
    _check_annotations_are_lists(annotations, parent_annotations)
217
    par_list = <PyListObject *>parent_annotations
218
    ann_list = <PyListObject *>annotations
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
219
    # For NEWS and bzrlib/builtins.py, over 99% of the lines are simply copied
220
    # across from the parent entry. So this routine is heavily optimized for
221
    # that. Would be interesting if we could use memcpy() but we have to incref
222
    # and decref
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
223
    for parent_idx, lines_idx, match_len in matching_blocks:
224
        _check_match_ranges(parent_annotations, annotations,
225
                            parent_idx, lines_idx, match_len)
226
        par_temp = par_list.ob_item + parent_idx
227
        ann_temp = ann_list.ob_item + lines_idx
228
        for idx from 0 <= idx < match_len:
229
            Py_INCREF_ptr(par_temp[idx])
230
            Py_DECREF_ptr(ann_temp[idx])
231
            ann_temp[idx] = par_temp[idx]
232
233
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
234
class Annotator:
235
    """Class that drives performing annotations."""
236
237
    def __init__(self, vf):
238
        """Create a new Annotator from a VersionedFile."""
239
        self._vf = vf
240
        self._parent_map = {}
241
        self._text_cache = {}
242
        # Map from key => number of nexts that will be built from this key
243
        self._num_needed_children = {}
244
        self._annotations_cache = {}
245
        self._heads_provider = None
4454.3.51 by John Arbash Meinel
Using a global cache gives us key uniqueness.
246
        self._ann_tuple_cache = {}
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
247
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
248
249
    def _update_needed_children(self, key, parent_keys):
250
        for parent_key in parent_keys:
251
            if parent_key in self._num_needed_children:
252
                self._num_needed_children[parent_key] += 1
253
            else:
254
                self._num_needed_children[parent_key] = 1
255
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
256
    def _get_needed_keys(self, key):
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
257
        """Determine the texts we need to get from the backing vf.
258
259
        :return: (vf_keys_needed, ann_keys_needed)
260
            vf_keys_needed  These are keys that we need to get from the vf
261
            ann_keys_needed Texts which we have in self._text_cache but we
262
                            don't have annotations for. We need to yield these
263
                            in the proper order so that we can get proper
264
                            annotations.
265
        """
266
        parent_map = self._parent_map
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
267
        # We need 1 extra copy of the node we will be looking at when we are
268
        # done
269
        self._num_needed_children[key] = 1
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
270
        vf_keys_needed = set()
271
        ann_keys_needed = set()
272
        needed_keys = set([key])
273
        while needed_keys:
274
            parent_lookup = []
275
            next_parent_map = {}
276
            for key in needed_keys:
277
                if key in self._parent_map:
278
                    # We don't need to lookup this key in the vf
279
                    if key not in self._text_cache:
280
                        # Extract this text from the vf
281
                        vf_keys_needed.add(key)
282
                    elif key not in self._annotations_cache:
283
                        # We do need to annotate
284
                        ann_keys_needed.add(key)
285
                        next_parent_map[key] = self._parent_map[key]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
286
                else:
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
287
                    parent_lookup.append(key)
288
                    vf_keys_needed.add(key)
289
            needed_keys = set()
290
            next_parent_map.update(self._vf.get_parent_map(parent_lookup))
291
            for key, parent_keys in next_parent_map.iteritems():
292
                self._update_needed_children(key, parent_keys)
293
                for key in parent_keys:
294
                    if key not in parent_map:
295
                        needed_keys.add(key)
296
            parent_map.update(next_parent_map)
297
            # _heads_provider does some graph caching, so it is only valid while
298
            # self._parent_map hasn't changed
299
            self._heads_provider = None
300
        return vf_keys_needed, ann_keys_needed
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
301
302
    def _get_needed_texts(self, key, pb=None):
303
        """Get the texts we need to properly annotate key.
304
305
        :param key: A Key that is present in self._vf
306
        :return: Yield (this_key, text, num_lines)
307
            'text' is an opaque object that just has to work with whatever
308
            matcher object we are using. Currently it is always 'lines' but
309
            future improvements may change this to a simple text string.
310
        """
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
311
        keys, ann_keys = self._get_needed_keys(key)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
312
        if pb is not None:
313
            pb.update('getting stream', 0, len(keys))
314
        stream  = self._vf.get_record_stream(keys, 'topological', True)
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
315
        iterator = _NeededTextIterator(stream, self._text_cache, len(keys),
316
                                       ann_keys, pb)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
317
        return iterator
318
319
    def _get_parent_annotations_and_matches(self, key, text, parent_key):
320
        """Get the list of annotations for the parent, and the matching lines.
321
322
        :param text: The opaque value given by _get_needed_texts
323
        :param parent_key: The key for the parent text
324
        :return: (parent_annotations, matching_blocks)
325
            parent_annotations is a list as long as the number of lines in
326
                parent
327
            matching_blocks is a list of (parent_idx, text_idx, len) tuples
328
                indicating which lines match between the two texts
329
        """
330
        parent_lines = self._text_cache[parent_key]
331
        parent_annotations = self._annotations_cache[parent_key]
332
        # PatienceSequenceMatcher should probably be part of Policy
333
        matcher = patiencediff.PatienceSequenceMatcher(None,
334
            parent_lines, text)
335
        matching_blocks = matcher.get_matching_blocks()
336
        return parent_annotations, matching_blocks
337
338
    def _update_from_one_parent(self, key, annotations, lines, parent_key):
339
        """Reannotate this text relative to its first parent."""
340
        parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
341
            key, lines, parent_key)
342
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
343
        _apply_parent_annotations(annotations, parent_annotations,
344
                                  matching_blocks)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
345
346
    def _update_from_other_parents(self, key, annotations, lines,
347
                                   this_annotation, parent_key):
348
        """Reannotate this text relative to a second (or more) parent."""
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
349
        cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx
350
        cdef Py_ssize_t pos
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
351
        cdef PyObject *ann_temp, *par_temp
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
352
        parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
353
            key, lines, parent_key)
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
354
        _check_annotations_are_lists(annotations, parent_annotations)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
355
        last_ann = None
356
        last_parent = None
357
        last_res = None
4454.3.51 by John Arbash Meinel
Using a global cache gives us key uniqueness.
358
        cache = self._ann_tuple_cache
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
359
        for parent_idx, lines_idx, match_len in matching_blocks:
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
360
            _check_match_ranges(parent_annotations, annotations,
361
                                parent_idx, lines_idx, match_len)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
362
            # For lines which match this parent, we will now resolve whether
363
            # this parent wins over the current annotation
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
364
            for idx from 0 <= idx < match_len:
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
365
                ann_idx = lines_idx + idx
4454.3.52 by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms.
366
                ann_temp = PyList_GET_ITEM(annotations, ann_idx)
367
                par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx)
368
                if (ann_temp == par_temp):
369
                    # This is parent, do nothing
370
                    # Pointer comparison is fine here. Value comparison would
371
                    # be ok, but it will be handled in the final if clause by
372
                    # merging the two tuples into the same tuple
373
                    # Avoiding the Py_INCREF by using pointer comparison drops
374
                    # timing from 215ms => 125ms
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
375
                    continue
4454.3.52 by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms.
376
                par_ann = <object>par_temp
377
                ann = <object>ann_temp
378
                if (ann is this_annotation):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
379
                    # Originally claimed 'this', but it was really in this
380
                    # parent
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
381
                    Py_INCREF(par_ann)
382
                    PyList_SetItem(annotations, ann_idx, par_ann)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
383
                    continue
384
                # Resolve the fact that both sides have a different value for
385
                # last modified
4454.3.52 by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms.
386
                if (ann is last_ann and par_ann is last_parent):
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
387
                    Py_INCREF(last_res)
388
                    PyList_SetItem(annotations, ann_idx, last_res)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
389
                else:
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
390
                    new_ann = _combine_annotations(ann, par_ann, cache)
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
391
                    Py_INCREF(new_ann)
392
                    PyList_SetItem(annotations, ann_idx, new_ann)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
393
                    last_ann = ann
394
                    last_parent = par_ann
395
                    last_res = new_ann
396
397
    def _record_annotation(self, key, parent_keys, annotations):
398
        self._annotations_cache[key] = annotations
399
        for parent_key in parent_keys:
400
            num = self._num_needed_children[parent_key]
4454.3.58 by John Arbash Meinel
Enable the new annotator for gc format repos.
401
            num = num - 1
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
402
            if num == 0:
403
                del self._text_cache[parent_key]
404
                del self._annotations_cache[parent_key]
405
                # Do we want to clean up _num_needed_children at this point as
406
                # well?
407
            self._num_needed_children[parent_key] = num
408
409
    def _annotate_one(self, key, text, num_lines):
410
        this_annotation = (key,)
411
        # Note: annotations will be mutated by calls to _update_from*
412
        annotations = [this_annotation] * num_lines
413
        parent_keys = self._parent_map[key]
414
        if parent_keys:
415
            self._update_from_one_parent(key, annotations, text, parent_keys[0])
416
            for parent in parent_keys[1:]:
417
                self._update_from_other_parents(key, annotations, text,
418
                                                this_annotation, parent)
419
        self._record_annotation(key, parent_keys, annotations)
420
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
421
    def add_special_text(self, key, parent_keys, text):
422
        """Add a specific text to the graph."""
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
423
        self._parent_map[key] = parent_keys
424
        self._text_cache[key] = osutils.split_lines(text)
425
        self._heads_provider = None
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
426
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
427
    def annotate(self, key):
428
        """Return annotated fulltext for the given key."""
429
        pb = ui.ui_factory.nested_progress_bar()
430
        try:
431
            for text_key, text, num_lines in self._get_needed_texts(key, pb=pb):
432
                self._annotate_one(text_key, text, num_lines)
433
        finally:
434
            pb.finished()
435
        try:
436
            annotations = self._annotations_cache[key]
437
        except KeyError:
438
            raise errors.RevisionNotPresent(key, self._vf)
439
        return annotations, self._text_cache[key]
440
441
    def _get_heads_provider(self):
442
        if self._heads_provider is None:
443
            self._heads_provider = _mod_graph.KnownGraph(self._parent_map)
444
        return self._heads_provider
445
446
    def annotate_flat(self, key):
447
        """Determine the single-best-revision to source for each line.
448
449
        This is meant as a compatibility thunk to how annotate() used to work.
450
        """
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
451
        cdef Py_ssize_t pos, num_lines
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
452
        annotations, lines = self.annotate(key)
453
        assert len(annotations) == len(lines)
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
454
        num_lines = len(lines)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
455
        out = []
456
        heads = self._get_heads_provider().heads
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
457
        for pos from 0 <= pos < num_lines:
458
            annotation = annotations[pos]
459
            line = lines[pos]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
460
            if len(annotation) == 1:
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
461
                head = annotation[0]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
462
            else:
463
                the_heads = heads(annotation)
464
                if len(the_heads) == 1:
465
                    for head in the_heads:
466
                        break
467
                else:
468
                    # We need to resolve the ambiguity, for now just pick the
469
                    # sorted smallest
470
                    head = sorted(the_heads)[0]
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
471
            PyList_Append(out, (head, line))
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
472
        return out