/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
7509.1.1 by Jelmer Vernooij
Set cython language level to Python 3.
16
#
17
# cython: language_level=3
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
18
19
"""Functionality for doing annotations in the 'optimal' way"""
20
6656.2.2 by Jelmer Vernooij
Use absolute_import.
21
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
22
cdef extern from "python-compat.h":
23
    pass
24
7018.2.2 by Martin
Update _annotator_pyx code for modern Cython
25
from cpython.dict cimport (
26
    PyDict_GetItem,
27
    PyDict_SetItem,
28
    )
29
from cpython.list cimport (
30
    PyList_Append,
31
    PyList_CheckExact,
32
    PyList_GET_ITEM,
33
    PyList_GET_SIZE,
34
    PyList_SetItem,
35
    PyList_Sort,
36
    )
37
from cpython.object cimport (
38
    Py_EQ,
39
    Py_LT,
40
    PyObject,
41
    PyObject_RichCompareBool,
42
    )
43
from cpython.ref cimport (
44
    Py_INCREF,
45
    )
46
from cpython.tuple cimport (
47
    PyTuple_CheckExact,
48
    PyTuple_GET_ITEM,
49
    PyTuple_GET_SIZE,
50
    PyTuple_New,
51
    PyTuple_SET_ITEM,
52
    )
53
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
54
cdef extern from "Python.h":
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
55
    ctypedef struct PyListObject:
56
        PyObject **ob_item
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
57
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
58
    void PyTuple_SET_ITEM_ptr "PyTuple_SET_ITEM" (object, Py_ssize_t,
59
                                                  PyObject *)
7018.2.2 by Martin
Update _annotator_pyx code for modern Cython
60
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
61
    void Py_INCREF_ptr "Py_INCREF" (PyObject *)
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
62
    void Py_DECREF_ptr "Py_DECREF" (PyObject *)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
63
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
64
    int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" (
65
        PyObject *, PyObject *, int opid)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
66
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
67
6656.2.2 by Jelmer Vernooij
Use absolute_import.
68
from . import _annotator_py
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
69
70
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
71
cdef int _check_annotations_are_lists(annotations,
72
                                      parent_annotations) except -1:
73
    if not PyList_CheckExact(annotations):
74
        raise TypeError('annotations must be a list')
75
    if not PyList_CheckExact(parent_annotations):
76
        raise TypeError('parent_annotations must be a list')
77
    return 0
78
79
80
cdef int _check_match_ranges(parent_annotations, annotations,
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
81
                             Py_ssize_t parent_idx, Py_ssize_t lines_idx,
82
                             Py_ssize_t match_len) except -1:
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
83
    if parent_idx + match_len > PyList_GET_SIZE(parent_annotations):
84
        raise ValueError('Match length exceeds len of'
85
                         ' parent_annotations %s > %s'
86
                         % (parent_idx + match_len,
87
                            PyList_GET_SIZE(parent_annotations)))
88
    if lines_idx + match_len > PyList_GET_SIZE(annotations):
89
        raise ValueError('Match length exceeds len of'
90
                         ' annotations %s > %s'
91
                         % (lines_idx + match_len,
92
                            PyList_GET_SIZE(annotations)))
93
    return 0
94
95
4634.117.10 by John Arbash Meinel
Change 'no except' to 'cannot_raise'
96
cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos): # cannot_raise
4634.117.4 by John Arbash Meinel
Check _annotator_pyx for missing except clauses.
97
    """Return the next entry from this tuple.
98
99
    :param tpl: The tuple we are investigating, *must* be a PyTuple
100
    :param pos: The last item we found. Will be updated to the new position.
7018.2.2 by Martin
Update _annotator_pyx code for modern Cython
101
4634.117.4 by John Arbash Meinel
Check _annotator_pyx for missing except clauses.
102
    This cannot raise an exception, as it does no error checking.
103
    """
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
104
    pos[0] = pos[0] + 1
105
    if pos[0] >= PyTuple_GET_SIZE(tpl):
106
        return NULL
107
    return PyTuple_GET_ITEM(tpl, pos[0])
108
109
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
110
cdef object _combine_annotations(ann_one, ann_two, cache):
111
    """Combine the annotations from both sides."""
112
    cdef Py_ssize_t pos_one, pos_two, len_one, len_two
113
    cdef Py_ssize_t out_pos
7018.2.2 by Martin
Update _annotator_pyx code for modern Cython
114
    cdef PyObject *temp
115
    cdef PyObject *left
116
    cdef PyObject *right
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
117
4454.3.51 by John Arbash Meinel
Using a global cache gives us key uniqueness.
118
    if (PyObject_RichCompareBool(ann_one, ann_two, Py_LT)):
119
        cache_key = (ann_one, ann_two)
120
    else:
121
        cache_key = (ann_two, ann_one)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
122
    temp = PyDict_GetItem(cache, cache_key)
123
    if temp != NULL:
124
        return <object>temp
125
126
    if not PyTuple_CheckExact(ann_one) or not PyTuple_CheckExact(ann_two):
127
        raise TypeError('annotations must be tuples')
128
    # We know that annotations are tuples, and that both sides are already
129
    # sorted, so we can just walk and update a new list.
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
130
    pos_one = -1
131
    pos_two = -1
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
132
    out_pos = 0
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
133
    left = _next_tuple_entry(ann_one, &pos_one)
134
    right = _next_tuple_entry(ann_two, &pos_two)
135
    new_ann = PyTuple_New(PyTuple_GET_SIZE(ann_one)
136
                          + PyTuple_GET_SIZE(ann_two))
137
    while left != NULL and right != NULL:
4454.3.74 by John Arbash Meinel
Some small tweaks, add more documentation for 'add_special_text'.
138
        # left == right is done by PyObject_RichCompareBool_ptr, however it
139
        # avoids a function call for a very common case. Drops 'time bzr
140
        # annotate NEWS' from 7.25s to 7.16s, so it *is* a visible impact.
141
        if (left == right
142
            or PyObject_RichCompareBool_ptr(left, right, Py_EQ)):
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
143
            # Identical values, step both
144
            Py_INCREF_ptr(left)
145
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
146
            left = _next_tuple_entry(ann_one, &pos_one)
147
            right = _next_tuple_entry(ann_two, &pos_two)
148
        elif (PyObject_RichCompareBool_ptr(left, right, Py_LT)):
149
            # left < right or right == NULL
150
            Py_INCREF_ptr(left)
151
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
152
            left = _next_tuple_entry(ann_one, &pos_one)
153
        else: # right < left or left == NULL
154
            Py_INCREF_ptr(right)
155
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
156
            right = _next_tuple_entry(ann_two, &pos_two)
157
        out_pos = out_pos + 1
158
    while left != NULL:
159
        Py_INCREF_ptr(left)
160
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
161
        left = _next_tuple_entry(ann_one, &pos_one)
162
        out_pos = out_pos + 1
163
    while right != NULL:
164
        Py_INCREF_ptr(right)
165
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
166
        right = _next_tuple_entry(ann_two, &pos_two)
167
        out_pos = out_pos + 1
168
    if out_pos != PyTuple_GET_SIZE(new_ann):
169
        # Timing _PyTuple_Resize was not significantly faster that slicing
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
170
        # PyTuple_Resize((<PyObject **>new_ann), out_pos)
171
        new_ann = new_ann[0:out_pos]
172
    PyDict_SetItem(cache, cache_key, new_ann)
173
    return new_ann
174
175
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
176
cdef int _apply_parent_annotations(annotations, parent_annotations,
177
                                   matching_blocks) except -1:
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
178
    """Apply the annotations from parent_annotations into annotations.
179
180
    matching_blocks defines the ranges that match.
181
    """
182
    cdef Py_ssize_t parent_idx, lines_idx, match_len, idx
7018.2.2 by Martin
Update _annotator_pyx code for modern Cython
183
    cdef PyListObject *par_list
184
    cdef PyListObject *ann_list
185
    cdef PyObject **par_temp
186
    cdef PyObject **ann_temp
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
187
188
    _check_annotations_are_lists(annotations, parent_annotations)
189
    par_list = <PyListObject *>parent_annotations
190
    ann_list = <PyListObject *>annotations
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
191
    # For NEWS and breezy/builtins.py, over 99% of the lines are simply copied
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
192
    # across from the parent entry. So this routine is heavily optimized for
193
    # that. Would be interesting if we could use memcpy() but we have to incref
194
    # and decref
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
195
    for parent_idx, lines_idx, match_len in matching_blocks:
196
        _check_match_ranges(parent_annotations, annotations,
197
                            parent_idx, lines_idx, match_len)
198
        par_temp = par_list.ob_item + parent_idx
199
        ann_temp = ann_list.ob_item + lines_idx
200
        for idx from 0 <= idx < match_len:
201
            Py_INCREF_ptr(par_temp[idx])
202
            Py_DECREF_ptr(ann_temp[idx])
203
            ann_temp[idx] = par_temp[idx]
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
204
    return 0
205
206
207
cdef int _merge_annotations(this_annotation, annotations, parent_annotations,
208
                            matching_blocks, ann_cache) except -1:
209
    cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx
210
    cdef Py_ssize_t pos
7018.2.2 by Martin
Update _annotator_pyx code for modern Cython
211
    cdef PyObject *ann_temp
212
    cdef PyObject *par_temp
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
213
214
    _check_annotations_are_lists(annotations, parent_annotations)
215
    last_ann = None
216
    last_parent = None
217
    last_res = None
218
    for parent_idx, lines_idx, match_len in matching_blocks:
219
        _check_match_ranges(parent_annotations, annotations,
220
                            parent_idx, lines_idx, match_len)
221
        # For lines which match this parent, we will now resolve whether
222
        # this parent wins over the current annotation
223
        for idx from 0 <= idx < match_len:
224
            ann_idx = lines_idx + idx
225
            ann_temp = PyList_GET_ITEM(annotations, ann_idx)
226
            par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx)
227
            if (ann_temp == par_temp):
228
                # This is parent, do nothing
229
                # Pointer comparison is fine here. Value comparison would
230
                # be ok, but it will be handled in the final if clause by
231
                # merging the two tuples into the same tuple
232
                # Avoiding the Py_INCREF and function call to
233
                # PyObject_RichCompareBool using pointer comparison drops
234
                # timing from 215ms => 125ms
235
                continue
236
            par_ann = <object>par_temp
237
            ann = <object>ann_temp
238
            if (ann is this_annotation):
239
                # Originally claimed 'this', but it was really in this
240
                # parent
241
                Py_INCREF(par_ann)
242
                PyList_SetItem(annotations, ann_idx, par_ann)
243
                continue
244
            # Resolve the fact that both sides have a different value for
245
            # last modified
246
            if (ann is last_ann and par_ann is last_parent):
247
                Py_INCREF(last_res)
248
                PyList_SetItem(annotations, ann_idx, last_res)
249
            else:
250
                new_ann = _combine_annotations(ann, par_ann, ann_cache)
251
                Py_INCREF(new_ann)
252
                PyList_SetItem(annotations, ann_idx, new_ann)
253
                last_ann = ann
254
                last_parent = par_ann
255
                last_res = new_ann
4454.3.76 by John Arbash Meinel
Always return a value, although Pyrex takes care of that for you if you let it.
256
    return 0
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
257
258
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
259
class Annotator(_annotator_py.Annotator):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
260
    """Class that drives performing annotations."""
261
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
262
    def _update_from_first_parent(self, key, annotations, lines, parent_key):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
263
        """Reannotate this text relative to its first parent."""
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
264
        (parent_annotations,
265
         matching_blocks) = self._get_parent_annotations_and_matches(
266
                                key, lines, parent_key)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
267
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
268
        _apply_parent_annotations(annotations, parent_annotations,
269
                                  matching_blocks)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
270
271
    def _update_from_other_parents(self, key, annotations, lines,
272
                                   this_annotation, parent_key):
273
        """Reannotate this text relative to a second (or more) parent."""
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
274
        (parent_annotations,
275
         matching_blocks) = self._get_parent_annotations_and_matches(
276
                                key, lines, parent_key)
277
        _merge_annotations(this_annotation, annotations, parent_annotations,
278
                           matching_blocks, self._ann_tuple_cache)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
279
280
    def annotate_flat(self, key):
281
        """Determine the single-best-revision to source for each line.
282
283
        This is meant as a compatibility thunk to how annotate() used to work.
284
        """
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
285
        cdef Py_ssize_t pos, num_lines
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
286
6656.2.2 by Jelmer Vernooij
Use absolute_import.
287
        from . import annotate
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
288
289
        custom_tiebreaker = annotate._break_annotation_tie
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
290
        annotations, lines = self.annotate(key)
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
291
        num_lines = len(lines)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
292
        out = []
293
        heads = self._get_heads_provider().heads
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
294
        for pos from 0 <= pos < num_lines:
295
            annotation = annotations[pos]
296
            line = lines[pos]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
297
            if len(annotation) == 1:
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
298
                head = annotation[0]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
299
            else:
300
                the_heads = heads(annotation)
301
                if len(the_heads) == 1:
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
302
                    for head in the_heads: break # get the item out of the set
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
303
                else:
304
                    # We need to resolve the ambiguity, for now just pick the
305
                    # sorted smallest
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
306
                    head = self._resolve_annotation_tie(the_heads, line,
307
                                                        custom_tiebreaker)
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
308
            PyList_Append(out, (head, line))
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
309
        return out