/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Functionality for doing annotations in the 'optimal' way"""
18
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
19
cdef extern from "python-compat.h":
20
    pass
21
22
cdef extern from "Python.h":
23
    ctypedef int Py_ssize_t
24
    ctypedef struct PyObject:
25
        pass
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
26
    ctypedef struct PyListObject:
27
        PyObject **ob_item
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
28
    int PyList_CheckExact(object)
29
    PyObject *PyList_GET_ITEM(object, Py_ssize_t o)
30
    Py_ssize_t PyList_GET_SIZE(object)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
31
    int PyList_Append(object, object) except -1
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
32
    int PyList_SetItem(object, Py_ssize_t o, object) except -1
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
33
    int PyList_Sort(object) except -1
34
35
    int PyTuple_CheckExact(object)
36
    object PyTuple_New(Py_ssize_t len)
37
    void PyTuple_SET_ITEM(object, Py_ssize_t pos, object)
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
38
    void PyTuple_SET_ITEM_ptr "PyTuple_SET_ITEM" (object, Py_ssize_t,
39
                                                  PyObject *)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
40
    int PyTuple_Resize(PyObject **, Py_ssize_t newlen)
41
    PyObject *PyTuple_GET_ITEM(object, Py_ssize_t o)
42
    Py_ssize_t PyTuple_GET_SIZE(object)
43
44
    PyObject *PyDict_GetItem(object d, object k)
45
    int PyDict_SetItem(object d, object k, object v) except -1
46
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
47
    void Py_INCREF(object)
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
48
    void Py_INCREF_ptr "Py_INCREF" (PyObject *)
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
49
    void Py_DECREF_ptr "Py_DECREF" (PyObject *)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
50
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
51
    int Py_EQ
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
52
    int Py_LT
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
53
    int PyObject_RichCompareBool(object, object, int opid) except -1
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
54
    int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" (
55
        PyObject *, PyObject *, int opid)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
56
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
57
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
58
from bzrlib import errors, graph as _mod_graph, osutils, patiencediff, ui
59
60
61
cdef class _NeededTextIterator:
62
63
    cdef object counter
64
    cdef object text_cache
65
    cdef object stream
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
66
    cdef object ann_keys
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
67
    cdef object stream_len
68
    cdef object pb
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
69
    cdef int stream_is_consumed
70
    cdef int ann_key_pos
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
71
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
72
    def __init__(self, stream, text_cache, stream_len, ann_keys, pb=None):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
73
        self.counter = 0
74
        self.stream = stream
75
        self.stream_len = stream_len
76
        self.text_cache = text_cache
77
        self.stream_len = stream_len
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
78
        self.ann_keys = list(ann_keys)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
79
        self.pb = pb
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
80
        self.stream_is_consumed = 0
81
        self.ann_key_pos = 0
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
82
83
    def __iter__(self):
84
        return self
85
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
86
    cdef _get_ann_text(self):
87
        if self.ann_key_pos >= len(self.ann_keys):
88
            raise StopIteration
89
        key = self.ann_keys[self.ann_key_pos]
90
        self.ann_key_pos = self.ann_key_pos + 1
91
        lines = self.text_cache[key]
92
        num_lines = len(lines)
93
        return key, lines, num_lines
94
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
95
    def __next__(self):
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
96
        if self.stream_is_consumed:
97
            return self._get_ann_text()
98
        try:
99
            record = self.stream.next()
100
        except StopIteration:
101
            self.stream_is_consumed = 1
102
            return self._get_ann_text()
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
103
        if self.pb is not None:
104
            self.pb.update('extracting', self.counter, self.stream_len)
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
105
        if record.storage_kind == 'absent':
106
            raise errors.RevisionNotPresent(record.key, None)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
107
        self.counter = self.counter + 1
108
        lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
109
        num_lines = len(lines)
110
        self.text_cache[record.key] = lines
111
        return record.key, lines, num_lines
112
113
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
114
cdef int _check_annotations_are_lists(annotations,
115
                                      parent_annotations) except -1:
116
    if not PyList_CheckExact(annotations):
117
        raise TypeError('annotations must be a list')
118
    if not PyList_CheckExact(parent_annotations):
119
        raise TypeError('parent_annotations must be a list')
120
    return 0
121
122
123
cdef int _check_match_ranges(parent_annotations, annotations,
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
124
                             Py_ssize_t parent_idx, Py_ssize_t lines_idx,
125
                             Py_ssize_t match_len) except -1:
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
126
    if parent_idx + match_len > PyList_GET_SIZE(parent_annotations):
127
        raise ValueError('Match length exceeds len of'
128
                         ' parent_annotations %s > %s'
129
                         % (parent_idx + match_len,
130
                            PyList_GET_SIZE(parent_annotations)))
131
    if lines_idx + match_len > PyList_GET_SIZE(annotations):
132
        raise ValueError('Match length exceeds len of'
133
                         ' annotations %s > %s'
134
                         % (lines_idx + match_len,
135
                            PyList_GET_SIZE(annotations)))
136
    return 0
137
138
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
139
cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos):
140
    pos[0] = pos[0] + 1
141
    if pos[0] >= PyTuple_GET_SIZE(tpl):
142
        return NULL
143
    return PyTuple_GET_ITEM(tpl, pos[0])
144
145
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
146
cdef object _combine_annotations(ann_one, ann_two, cache):
147
    """Combine the annotations from both sides."""
148
    cdef Py_ssize_t pos_one, pos_two, len_one, len_two
149
    cdef Py_ssize_t out_pos
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
150
    cdef PyObject *temp, *left, *right
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
151
4454.3.51 by John Arbash Meinel
Using a global cache gives us key uniqueness.
152
    if (PyObject_RichCompareBool(ann_one, ann_two, Py_LT)):
153
        cache_key = (ann_one, ann_two)
154
    else:
155
        cache_key = (ann_two, ann_one)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
156
    temp = PyDict_GetItem(cache, cache_key)
157
    if temp != NULL:
158
        return <object>temp
159
160
    if not PyTuple_CheckExact(ann_one) or not PyTuple_CheckExact(ann_two):
161
        raise TypeError('annotations must be tuples')
162
    # We know that annotations are tuples, and that both sides are already
163
    # sorted, so we can just walk and update a new list.
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
164
    pos_one = -1
165
    pos_two = -1
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
166
    out_pos = 0
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
167
    left = _next_tuple_entry(ann_one, &pos_one)
168
    right = _next_tuple_entry(ann_two, &pos_two)
169
    new_ann = PyTuple_New(PyTuple_GET_SIZE(ann_one)
170
                          + PyTuple_GET_SIZE(ann_two))
171
    while left != NULL and right != NULL:
172
        if (PyObject_RichCompareBool_ptr(left, right, Py_EQ)):
173
            # Identical values, step both
174
            Py_INCREF_ptr(left)
175
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
176
            left = _next_tuple_entry(ann_one, &pos_one)
177
            right = _next_tuple_entry(ann_two, &pos_two)
178
        elif (PyObject_RichCompareBool_ptr(left, right, Py_LT)):
179
            # left < right or right == NULL
180
            Py_INCREF_ptr(left)
181
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
182
            left = _next_tuple_entry(ann_one, &pos_one)
183
        else: # right < left or left == NULL
184
            Py_INCREF_ptr(right)
185
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
186
            right = _next_tuple_entry(ann_two, &pos_two)
187
        out_pos = out_pos + 1
188
    while left != NULL:
189
        Py_INCREF_ptr(left)
190
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
191
        left = _next_tuple_entry(ann_one, &pos_one)
192
        out_pos = out_pos + 1
193
    while right != NULL:
194
        Py_INCREF_ptr(right)
195
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
196
        right = _next_tuple_entry(ann_two, &pos_two)
197
        out_pos = out_pos + 1
198
    if out_pos != PyTuple_GET_SIZE(new_ann):
199
        # Timing _PyTuple_Resize was not significantly faster that slicing
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
200
        # PyTuple_Resize((<PyObject **>new_ann), out_pos)
201
        new_ann = new_ann[0:out_pos]
202
    PyDict_SetItem(cache, cache_key, new_ann)
203
    return new_ann
204
205
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
206
cdef _apply_parent_annotations(annotations, parent_annotations,
207
                               matching_blocks):
208
    """Apply the annotations from parent_annotations into annotations.
209
210
    matching_blocks defines the ranges that match.
211
    """
212
    cdef Py_ssize_t parent_idx, lines_idx, match_len, idx
213
    cdef PyListObject *par_list, *ann_list
214
    cdef PyObject **par_temp, **ann_temp
215
216
    _check_annotations_are_lists(annotations, parent_annotations)
217
    par_list = <PyListObject *>parent_annotations
218
    ann_list = <PyListObject *>annotations
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
219
    # For NEWS and bzrlib/builtins.py, over 99% of the lines are simply copied
220
    # across from the parent entry. So this routine is heavily optimized for
221
    # that. Would be interesting if we could use memcpy() but we have to incref
222
    # and decref
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
223
    for parent_idx, lines_idx, match_len in matching_blocks:
224
        _check_match_ranges(parent_annotations, annotations,
225
                            parent_idx, lines_idx, match_len)
226
        par_temp = par_list.ob_item + parent_idx
227
        ann_temp = ann_list.ob_item + lines_idx
228
        for idx from 0 <= idx < match_len:
229
            Py_INCREF_ptr(par_temp[idx])
230
            Py_DECREF_ptr(ann_temp[idx])
231
            ann_temp[idx] = par_temp[idx]
232
233
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
234
class Annotator:
235
    """Class that drives performing annotations."""
236
237
    def __init__(self, vf):
238
        """Create a new Annotator from a VersionedFile."""
239
        self._vf = vf
240
        self._parent_map = {}
241
        self._text_cache = {}
242
        # Map from key => number of nexts that will be built from this key
243
        self._num_needed_children = {}
244
        self._annotations_cache = {}
245
        self._heads_provider = None
4454.3.51 by John Arbash Meinel
Using a global cache gives us key uniqueness.
246
        self._ann_tuple_cache = {}
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
247
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
248
249
    def _update_needed_children(self, key, parent_keys):
250
        for parent_key in parent_keys:
251
            if parent_key in self._num_needed_children:
252
                self._num_needed_children[parent_key] += 1
253
            else:
254
                self._num_needed_children[parent_key] = 1
255
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
256
    def _get_needed_keys(self, key):
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
257
        """Determine the texts we need to get from the backing vf.
258
259
        :return: (vf_keys_needed, ann_keys_needed)
260
            vf_keys_needed  These are keys that we need to get from the vf
261
            ann_keys_needed Texts which we have in self._text_cache but we
262
                            don't have annotations for. We need to yield these
263
                            in the proper order so that we can get proper
264
                            annotations.
265
        """
266
        parent_map = self._parent_map
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
267
        # We need 1 extra copy of the node we will be looking at when we are
268
        # done
269
        self._num_needed_children[key] = 1
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
270
        vf_keys_needed = set()
271
        ann_keys_needed = set()
272
        needed_keys = set([key])
273
        while needed_keys:
274
            parent_lookup = []
275
            next_parent_map = {}
276
            for key in needed_keys:
277
                if key in self._parent_map:
278
                    # We don't need to lookup this key in the vf
279
                    if key not in self._text_cache:
280
                        # Extract this text from the vf
281
                        vf_keys_needed.add(key)
282
                    elif key not in self._annotations_cache:
283
                        # We do need to annotate
284
                        ann_keys_needed.add(key)
285
                        next_parent_map[key] = self._parent_map[key]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
286
                else:
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
287
                    parent_lookup.append(key)
288
                    vf_keys_needed.add(key)
289
            needed_keys = set()
290
            next_parent_map.update(self._vf.get_parent_map(parent_lookup))
291
            for key, parent_keys in next_parent_map.iteritems():
4454.3.65 by John Arbash Meinel
Tests that VF implementations support .get_annotator()
292
                if parent_keys is None:
293
                    parent_keys = ()
294
                    next_parent_map[key] = ()
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
295
                self._update_needed_children(key, parent_keys)
296
                for key in parent_keys:
297
                    if key not in parent_map:
298
                        needed_keys.add(key)
299
            parent_map.update(next_parent_map)
300
            # _heads_provider does some graph caching, so it is only valid while
301
            # self._parent_map hasn't changed
302
            self._heads_provider = None
303
        return vf_keys_needed, ann_keys_needed
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
304
305
    def _get_needed_texts(self, key, pb=None):
306
        """Get the texts we need to properly annotate key.
307
308
        :param key: A Key that is present in self._vf
309
        :return: Yield (this_key, text, num_lines)
310
            'text' is an opaque object that just has to work with whatever
311
            matcher object we are using. Currently it is always 'lines' but
312
            future improvements may change this to a simple text string.
313
        """
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
314
        keys, ann_keys = self._get_needed_keys(key)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
315
        if pb is not None:
316
            pb.update('getting stream', 0, len(keys))
317
        stream  = self._vf.get_record_stream(keys, 'topological', True)
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
318
        iterator = _NeededTextIterator(stream, self._text_cache, len(keys),
319
                                       ann_keys, pb)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
320
        return iterator
321
322
    def _get_parent_annotations_and_matches(self, key, text, parent_key):
323
        """Get the list of annotations for the parent, and the matching lines.
324
325
        :param text: The opaque value given by _get_needed_texts
326
        :param parent_key: The key for the parent text
327
        :return: (parent_annotations, matching_blocks)
328
            parent_annotations is a list as long as the number of lines in
329
                parent
330
            matching_blocks is a list of (parent_idx, text_idx, len) tuples
331
                indicating which lines match between the two texts
332
        """
333
        parent_lines = self._text_cache[parent_key]
334
        parent_annotations = self._annotations_cache[parent_key]
335
        # PatienceSequenceMatcher should probably be part of Policy
336
        matcher = patiencediff.PatienceSequenceMatcher(None,
337
            parent_lines, text)
338
        matching_blocks = matcher.get_matching_blocks()
339
        return parent_annotations, matching_blocks
340
341
    def _update_from_one_parent(self, key, annotations, lines, parent_key):
342
        """Reannotate this text relative to its first parent."""
343
        parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
344
            key, lines, parent_key)
345
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
346
        _apply_parent_annotations(annotations, parent_annotations,
347
                                  matching_blocks)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
348
349
    def _update_from_other_parents(self, key, annotations, lines,
350
                                   this_annotation, parent_key):
351
        """Reannotate this text relative to a second (or more) parent."""
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
352
        cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx
353
        cdef Py_ssize_t pos
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
354
        cdef PyObject *ann_temp, *par_temp
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
355
        parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
356
            key, lines, parent_key)
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
357
        _check_annotations_are_lists(annotations, parent_annotations)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
358
        last_ann = None
359
        last_parent = None
360
        last_res = None
4454.3.51 by John Arbash Meinel
Using a global cache gives us key uniqueness.
361
        cache = self._ann_tuple_cache
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
362
        for parent_idx, lines_idx, match_len in matching_blocks:
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
363
            _check_match_ranges(parent_annotations, annotations,
364
                                parent_idx, lines_idx, match_len)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
365
            # For lines which match this parent, we will now resolve whether
366
            # this parent wins over the current annotation
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
367
            for idx from 0 <= idx < match_len:
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
368
                ann_idx = lines_idx + idx
4454.3.52 by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms.
369
                ann_temp = PyList_GET_ITEM(annotations, ann_idx)
370
                par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx)
371
                if (ann_temp == par_temp):
372
                    # This is parent, do nothing
373
                    # Pointer comparison is fine here. Value comparison would
374
                    # be ok, but it will be handled in the final if clause by
375
                    # merging the two tuples into the same tuple
376
                    # Avoiding the Py_INCREF by using pointer comparison drops
377
                    # timing from 215ms => 125ms
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
378
                    continue
4454.3.52 by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms.
379
                par_ann = <object>par_temp
380
                ann = <object>ann_temp
381
                if (ann is this_annotation):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
382
                    # Originally claimed 'this', but it was really in this
383
                    # parent
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
384
                    Py_INCREF(par_ann)
385
                    PyList_SetItem(annotations, ann_idx, par_ann)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
386
                    continue
387
                # Resolve the fact that both sides have a different value for
388
                # last modified
4454.3.52 by John Arbash Meinel
Tune the inner resolution loop a bit. Down to 125ms from 200+ms.
389
                if (ann is last_ann and par_ann is last_parent):
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
390
                    Py_INCREF(last_res)
391
                    PyList_SetItem(annotations, ann_idx, last_res)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
392
                else:
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
393
                    new_ann = _combine_annotations(ann, par_ann, cache)
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
394
                    Py_INCREF(new_ann)
395
                    PyList_SetItem(annotations, ann_idx, new_ann)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
396
                    last_ann = ann
397
                    last_parent = par_ann
398
                    last_res = new_ann
399
400
    def _record_annotation(self, key, parent_keys, annotations):
401
        self._annotations_cache[key] = annotations
402
        for parent_key in parent_keys:
403
            num = self._num_needed_children[parent_key]
4454.3.58 by John Arbash Meinel
Enable the new annotator for gc format repos.
404
            num = num - 1
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
405
            if num == 0:
406
                del self._text_cache[parent_key]
407
                del self._annotations_cache[parent_key]
408
                # Do we want to clean up _num_needed_children at this point as
409
                # well?
410
            self._num_needed_children[parent_key] = num
411
412
    def _annotate_one(self, key, text, num_lines):
413
        this_annotation = (key,)
414
        # Note: annotations will be mutated by calls to _update_from*
415
        annotations = [this_annotation] * num_lines
416
        parent_keys = self._parent_map[key]
417
        if parent_keys:
418
            self._update_from_one_parent(key, annotations, text, parent_keys[0])
419
            for parent in parent_keys[1:]:
420
                self._update_from_other_parents(key, annotations, text,
421
                                                this_annotation, parent)
422
        self._record_annotation(key, parent_keys, annotations)
423
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
424
    def add_special_text(self, key, parent_keys, text):
425
        """Add a specific text to the graph."""
4454.3.63 by John Arbash Meinel
Copy the implementation over to the Pyrex version.
426
        self._parent_map[key] = parent_keys
427
        self._text_cache[key] = osutils.split_lines(text)
428
        self._heads_provider = None
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
429
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
430
    def annotate(self, key):
431
        """Return annotated fulltext for the given key."""
432
        pb = ui.ui_factory.nested_progress_bar()
433
        try:
434
            for text_key, text, num_lines in self._get_needed_texts(key, pb=pb):
435
                self._annotate_one(text_key, text, num_lines)
436
        finally:
437
            pb.finished()
438
        try:
439
            annotations = self._annotations_cache[key]
440
        except KeyError:
441
            raise errors.RevisionNotPresent(key, self._vf)
442
        return annotations, self._text_cache[key]
443
444
    def _get_heads_provider(self):
445
        if self._heads_provider is None:
446
            self._heads_provider = _mod_graph.KnownGraph(self._parent_map)
447
        return self._heads_provider
448
449
    def annotate_flat(self, key):
450
        """Determine the single-best-revision to source for each line.
451
452
        This is meant as a compatibility thunk to how annotate() used to work.
453
        """
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
454
        cdef Py_ssize_t pos, num_lines
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
455
        annotations, lines = self.annotate(key)
456
        assert len(annotations) == len(lines)
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
457
        num_lines = len(lines)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
458
        out = []
459
        heads = self._get_heads_provider().heads
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
460
        for pos from 0 <= pos < num_lines:
461
            annotation = annotations[pos]
462
            line = lines[pos]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
463
            if len(annotation) == 1:
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
464
                head = annotation[0]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
465
            else:
466
                the_heads = heads(annotation)
467
                if len(the_heads) == 1:
468
                    for head in the_heads:
469
                        break
470
                else:
471
                    # We need to resolve the ambiguity, for now just pick the
472
                    # sorted smallest
473
                    head = sorted(the_heads)[0]
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
474
            PyList_Append(out, (head, line))
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
475
        return out