/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Functionality for doing annotations in the 'optimal' way"""
18
6656.2.2 by Jelmer Vernooij
Use absolute_import.
19
from __future__ import absolute_import
20
21
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
22
cdef extern from "python-compat.h":
23
    pass
24
25
cdef extern from "Python.h":
26
    ctypedef int Py_ssize_t
27
    ctypedef struct PyObject:
28
        pass
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
29
    ctypedef struct PyListObject:
30
        PyObject **ob_item
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
31
    int PyList_CheckExact(object)
32
    PyObject *PyList_GET_ITEM(object, Py_ssize_t o)
33
    Py_ssize_t PyList_GET_SIZE(object)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
34
    int PyList_Append(object, object) except -1
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
35
    int PyList_SetItem(object, Py_ssize_t o, object) except -1
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
36
    int PyList_Sort(object) except -1
37
38
    int PyTuple_CheckExact(object)
39
    object PyTuple_New(Py_ssize_t len)
40
    void PyTuple_SET_ITEM(object, Py_ssize_t pos, object)
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
41
    void PyTuple_SET_ITEM_ptr "PyTuple_SET_ITEM" (object, Py_ssize_t,
42
                                                  PyObject *)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
43
    int PyTuple_Resize(PyObject **, Py_ssize_t newlen)
44
    PyObject *PyTuple_GET_ITEM(object, Py_ssize_t o)
45
    Py_ssize_t PyTuple_GET_SIZE(object)
46
47
    PyObject *PyDict_GetItem(object d, object k)
48
    int PyDict_SetItem(object d, object k, object v) except -1
49
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
50
    void Py_INCREF(object)
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
51
    void Py_INCREF_ptr "Py_INCREF" (PyObject *)
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
52
    void Py_DECREF_ptr "Py_DECREF" (PyObject *)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
53
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
54
    int Py_EQ
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
55
    int Py_LT
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
56
    int PyObject_RichCompareBool(object, object, int opid) except -1
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
57
    int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" (
58
        PyObject *, PyObject *, int opid)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
59
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
60
6656.2.2 by Jelmer Vernooij
Use absolute_import.
61
from . import _annotator_py
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
62
63
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
64
cdef int _check_annotations_are_lists(annotations,
65
                                      parent_annotations) except -1:
66
    if not PyList_CheckExact(annotations):
67
        raise TypeError('annotations must be a list')
68
    if not PyList_CheckExact(parent_annotations):
69
        raise TypeError('parent_annotations must be a list')
70
    return 0
71
72
73
cdef int _check_match_ranges(parent_annotations, annotations,
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
74
                             Py_ssize_t parent_idx, Py_ssize_t lines_idx,
75
                             Py_ssize_t match_len) except -1:
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
76
    if parent_idx + match_len > PyList_GET_SIZE(parent_annotations):
77
        raise ValueError('Match length exceeds len of'
78
                         ' parent_annotations %s > %s'
79
                         % (parent_idx + match_len,
80
                            PyList_GET_SIZE(parent_annotations)))
81
    if lines_idx + match_len > PyList_GET_SIZE(annotations):
82
        raise ValueError('Match length exceeds len of'
83
                         ' annotations %s > %s'
84
                         % (lines_idx + match_len,
85
                            PyList_GET_SIZE(annotations)))
86
    return 0
87
88
4634.117.10 by John Arbash Meinel
Change 'no except' to 'cannot_raise'
89
cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos): # cannot_raise
4634.117.4 by John Arbash Meinel
Check _annotator_pyx for missing except clauses.
90
    """Return the next entry from this tuple.
91
92
    :param tpl: The tuple we are investigating, *must* be a PyTuple
93
    :param pos: The last item we found. Will be updated to the new position.
94
    
95
    This cannot raise an exception, as it does no error checking.
96
    """
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
97
    pos[0] = pos[0] + 1
98
    if pos[0] >= PyTuple_GET_SIZE(tpl):
99
        return NULL
100
    return PyTuple_GET_ITEM(tpl, pos[0])
101
102
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
103
cdef object _combine_annotations(ann_one, ann_two, cache):
104
    """Combine the annotations from both sides."""
105
    cdef Py_ssize_t pos_one, pos_two, len_one, len_two
106
    cdef Py_ssize_t out_pos
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
107
    cdef PyObject *temp, *left, *right
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
108
4454.3.51 by John Arbash Meinel
Using a global cache gives us key uniqueness.
109
    if (PyObject_RichCompareBool(ann_one, ann_two, Py_LT)):
110
        cache_key = (ann_one, ann_two)
111
    else:
112
        cache_key = (ann_two, ann_one)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
113
    temp = PyDict_GetItem(cache, cache_key)
114
    if temp != NULL:
115
        return <object>temp
116
117
    if not PyTuple_CheckExact(ann_one) or not PyTuple_CheckExact(ann_two):
118
        raise TypeError('annotations must be tuples')
119
    # We know that annotations are tuples, and that both sides are already
120
    # sorted, so we can just walk and update a new list.
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
121
    pos_one = -1
122
    pos_two = -1
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
123
    out_pos = 0
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
124
    left = _next_tuple_entry(ann_one, &pos_one)
125
    right = _next_tuple_entry(ann_two, &pos_two)
126
    new_ann = PyTuple_New(PyTuple_GET_SIZE(ann_one)
127
                          + PyTuple_GET_SIZE(ann_two))
128
    while left != NULL and right != NULL:
4454.3.74 by John Arbash Meinel
Some small tweaks, add more documentation for 'add_special_text'.
129
        # left == right is done by PyObject_RichCompareBool_ptr, however it
130
        # avoids a function call for a very common case. Drops 'time bzr
131
        # annotate NEWS' from 7.25s to 7.16s, so it *is* a visible impact.
132
        if (left == right
133
            or PyObject_RichCompareBool_ptr(left, right, Py_EQ)):
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
134
            # Identical values, step both
135
            Py_INCREF_ptr(left)
136
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
137
            left = _next_tuple_entry(ann_one, &pos_one)
138
            right = _next_tuple_entry(ann_two, &pos_two)
139
        elif (PyObject_RichCompareBool_ptr(left, right, Py_LT)):
140
            # left < right or right == NULL
141
            Py_INCREF_ptr(left)
142
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
143
            left = _next_tuple_entry(ann_one, &pos_one)
144
        else: # right < left or left == NULL
145
            Py_INCREF_ptr(right)
146
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
147
            right = _next_tuple_entry(ann_two, &pos_two)
148
        out_pos = out_pos + 1
149
    while left != NULL:
150
        Py_INCREF_ptr(left)
151
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
152
        left = _next_tuple_entry(ann_one, &pos_one)
153
        out_pos = out_pos + 1
154
    while right != NULL:
155
        Py_INCREF_ptr(right)
156
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
157
        right = _next_tuple_entry(ann_two, &pos_two)
158
        out_pos = out_pos + 1
159
    if out_pos != PyTuple_GET_SIZE(new_ann):
160
        # Timing _PyTuple_Resize was not significantly faster that slicing
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
161
        # PyTuple_Resize((<PyObject **>new_ann), out_pos)
162
        new_ann = new_ann[0:out_pos]
163
    PyDict_SetItem(cache, cache_key, new_ann)
164
    return new_ann
165
166
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
167
cdef int _apply_parent_annotations(annotations, parent_annotations,
168
                                   matching_blocks) except -1:
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
169
    """Apply the annotations from parent_annotations into annotations.
170
171
    matching_blocks defines the ranges that match.
172
    """
173
    cdef Py_ssize_t parent_idx, lines_idx, match_len, idx
174
    cdef PyListObject *par_list, *ann_list
175
    cdef PyObject **par_temp, **ann_temp
176
177
    _check_annotations_are_lists(annotations, parent_annotations)
178
    par_list = <PyListObject *>parent_annotations
179
    ann_list = <PyListObject *>annotations
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
180
    # For NEWS and breezy/builtins.py, over 99% of the lines are simply copied
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
181
    # across from the parent entry. So this routine is heavily optimized for
182
    # that. Would be interesting if we could use memcpy() but we have to incref
183
    # and decref
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
184
    for parent_idx, lines_idx, match_len in matching_blocks:
185
        _check_match_ranges(parent_annotations, annotations,
186
                            parent_idx, lines_idx, match_len)
187
        par_temp = par_list.ob_item + parent_idx
188
        ann_temp = ann_list.ob_item + lines_idx
189
        for idx from 0 <= idx < match_len:
190
            Py_INCREF_ptr(par_temp[idx])
191
            Py_DECREF_ptr(ann_temp[idx])
192
            ann_temp[idx] = par_temp[idx]
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
193
    return 0
194
195
196
cdef int _merge_annotations(this_annotation, annotations, parent_annotations,
197
                            matching_blocks, ann_cache) except -1:
198
    cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx
199
    cdef Py_ssize_t pos
200
    cdef PyObject *ann_temp, *par_temp
201
202
    _check_annotations_are_lists(annotations, parent_annotations)
203
    last_ann = None
204
    last_parent = None
205
    last_res = None
206
    for parent_idx, lines_idx, match_len in matching_blocks:
207
        _check_match_ranges(parent_annotations, annotations,
208
                            parent_idx, lines_idx, match_len)
209
        # For lines which match this parent, we will now resolve whether
210
        # this parent wins over the current annotation
211
        for idx from 0 <= idx < match_len:
212
            ann_idx = lines_idx + idx
213
            ann_temp = PyList_GET_ITEM(annotations, ann_idx)
214
            par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx)
215
            if (ann_temp == par_temp):
216
                # This is parent, do nothing
217
                # Pointer comparison is fine here. Value comparison would
218
                # be ok, but it will be handled in the final if clause by
219
                # merging the two tuples into the same tuple
220
                # Avoiding the Py_INCREF and function call to
221
                # PyObject_RichCompareBool using pointer comparison drops
222
                # timing from 215ms => 125ms
223
                continue
224
            par_ann = <object>par_temp
225
            ann = <object>ann_temp
226
            if (ann is this_annotation):
227
                # Originally claimed 'this', but it was really in this
228
                # parent
229
                Py_INCREF(par_ann)
230
                PyList_SetItem(annotations, ann_idx, par_ann)
231
                continue
232
            # Resolve the fact that both sides have a different value for
233
            # last modified
234
            if (ann is last_ann and par_ann is last_parent):
235
                Py_INCREF(last_res)
236
                PyList_SetItem(annotations, ann_idx, last_res)
237
            else:
238
                new_ann = _combine_annotations(ann, par_ann, ann_cache)
239
                Py_INCREF(new_ann)
240
                PyList_SetItem(annotations, ann_idx, new_ann)
241
                last_ann = ann
242
                last_parent = par_ann
243
                last_res = new_ann
4454.3.76 by John Arbash Meinel
Always return a value, although Pyrex takes care of that for you if you let it.
244
    return 0
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
245
246
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
247
class Annotator(_annotator_py.Annotator):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
248
    """Class that drives performing annotations."""
249
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
250
    def _update_from_first_parent(self, key, annotations, lines, parent_key):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
251
        """Reannotate this text relative to its first parent."""
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
252
        (parent_annotations,
253
         matching_blocks) = self._get_parent_annotations_and_matches(
254
                                key, lines, parent_key)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
255
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
256
        _apply_parent_annotations(annotations, parent_annotations,
257
                                  matching_blocks)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
258
259
    def _update_from_other_parents(self, key, annotations, lines,
260
                                   this_annotation, parent_key):
261
        """Reannotate this text relative to a second (or more) parent."""
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
262
        (parent_annotations,
263
         matching_blocks) = self._get_parent_annotations_and_matches(
264
                                key, lines, parent_key)
265
        _merge_annotations(this_annotation, annotations, parent_annotations,
266
                           matching_blocks, self._ann_tuple_cache)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
267
268
    def annotate_flat(self, key):
269
        """Determine the single-best-revision to source for each line.
270
271
        This is meant as a compatibility thunk to how annotate() used to work.
272
        """
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
273
        cdef Py_ssize_t pos, num_lines
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
274
6656.2.2 by Jelmer Vernooij
Use absolute_import.
275
        from . import annotate
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
276
277
        custom_tiebreaker = annotate._break_annotation_tie
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
278
        annotations, lines = self.annotate(key)
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
279
        num_lines = len(lines)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
280
        out = []
281
        heads = self._get_heads_provider().heads
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
282
        for pos from 0 <= pos < num_lines:
283
            annotation = annotations[pos]
284
            line = lines[pos]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
285
            if len(annotation) == 1:
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
286
                head = annotation[0]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
287
            else:
288
                the_heads = heads(annotation)
289
                if len(the_heads) == 1:
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
290
                    for head in the_heads: break # get the item out of the set
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
291
                else:
292
                    # We need to resolve the ambiguity, for now just pick the
293
                    # sorted smallest
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
294
                    head = self._resolve_annotation_tie(the_heads, line,
295
                                                        custom_tiebreaker)
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
296
            PyList_Append(out, (head, line))
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
297
        return out