/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
7509.1.1 by Jelmer Vernooij
Set cython language level to Python 3.
16
#
17
# cython: language_level=3
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
18
19
"""Implementation of Graph algorithms when we have already loaded everything.
20
"""
21
22
cdef extern from "python-compat.h":
23
    pass
24
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
25
from cpython.bytes cimport (
26
    PyBytes_CheckExact,
27
    )
28
from cpython.dict cimport (
29
    PyDict_CheckExact,
30
    PyDict_DelItem,
31
    PyDict_GetItem,
32
    PyDict_Next,
33
    PyDict_SetItem,
34
    PyDict_Size,
35
    )
36
from cpython.list cimport (
37
    PyList_Append,
38
    PyList_CheckExact,
39
    PyList_GET_SIZE,
40
    PyList_GET_ITEM,
41
    PyList_SetItem,
42
    )
43
from cpython.object cimport (
44
    Py_LT,
45
    PyObject,
46
    PyObject_RichCompareBool,
47
    )
48
from cpython.ref cimport (
49
    Py_INCREF,
50
    )
51
from cpython.tuple cimport (
52
    PyTuple_CheckExact,
53
    PyTuple_GET_SIZE,
54
    PyTuple_GET_ITEM,
55
    PyTuple_New,
56
    PyTuple_SET_ITEM,
57
    )
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
58
6800.1.3 by Jelmer Vernooij
Avoid modifying possibly lazily imported module.
59
import collections
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
60
import gc
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
61
6656.2.2 by Jelmer Vernooij
Use absolute_import.
62
from . import errors, revision
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
63
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
64
cdef object NULL_REVISION
65
NULL_REVISION = revision.NULL_REVISION
66
67
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
68
cdef class _KnownGraphNode:
69
    """Represents a single object in the known graph."""
70
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
71
    cdef object key
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
72
    cdef object parents
73
    cdef object children
4371.4.13 by Vincent Ladeuil
Fixed as per John's review.
74
    cdef public long gdfo
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
75
    cdef int seen
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
76
    cdef object extra
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
77
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
78
    def __init__(self, key):
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
79
        self.key = key
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
80
        self.parents = None
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
81
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
82
        self.children = []
83
        # Greatest distance from origin
84
        self.gdfo = -1
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
85
        self.seen = 0
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
86
        self.extra = None
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
87
88
    property child_keys:
89
        def __get__(self):
90
            cdef _KnownGraphNode child
91
92
            keys = []
93
            for child in self.children:
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
94
                PyList_Append(keys, child.key)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
95
            return keys
96
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
97
    property parent_keys:
98
        def __get__(self):
99
            if self.parents is None:
100
                return None
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
101
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
102
            cdef _KnownGraphNode parent
103
104
            keys = []
105
            for parent in self.parents:
106
                PyList_Append(keys, parent.key)
107
            return keys
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
108
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
109
    cdef clear_references(self):
110
        self.parents = None
111
        self.children = None
112
113
    def __repr__(self):
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
114
        cdef _KnownGraphNode node
115
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
116
        parent_keys = []
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
117
        if self.parents is not None:
118
            for node in self.parents:
119
                parent_keys.append(node.key)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
120
        child_keys = []
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
121
        if self.children is not None:
122
            for node in self.children:
123
                child_keys.append(node.key)
4371.4.10 by Vincent Ladeuil
Cleanup.
124
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
125
            self.__class__.__name__, self.key, self.gdfo,
4371.4.12 by Vincent Ladeuil
Fix typos.
126
            parent_keys, child_keys)
4371.4.10 by Vincent Ladeuil
Cleanup.
127
4371.3.37 by John Arbash Meinel
cleanup passes.
128
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
129
cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
130
    cdef PyObject *temp_node
131
132
    temp_node = PyList_GET_ITEM(lst, pos)
133
    return <_KnownGraphNode>temp_node
134
135
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
136
cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos):
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
137
    cdef PyObject *temp_node
138
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
139
    temp_node = PyTuple_GET_ITEM(tpl, pos)
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
140
    return <_KnownGraphNode>temp_node
141
142
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
143
def get_key(node):
144
    cdef _KnownGraphNode real_node
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
145
    real_node = node
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
146
    return real_node.key
147
148
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
149
cdef object _sort_list_nodes(object lst_or_tpl, int reverse):
150
    """Sort a list of _KnownGraphNode objects.
151
152
    If lst_or_tpl is a list, it is allowed to mutate in place. It may also
153
    just return the input list if everything is already sorted.
154
    """
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
155
    cdef _KnownGraphNode node1, node2
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
156
    cdef int do_swap, is_tuple
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
157
    cdef Py_ssize_t length
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
158
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
159
    is_tuple = PyTuple_CheckExact(lst_or_tpl)
160
    if not (is_tuple or PyList_CheckExact(lst_or_tpl)):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
161
        raise TypeError('lst_or_tpl must be a list or tuple.')
162
    length = len(lst_or_tpl)
163
    if length == 0 or length == 1:
164
        return lst_or_tpl
165
    if length == 2:
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
166
        if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
167
            node1 = _get_tuple_node(lst_or_tpl, 0)
168
            node2 = _get_tuple_node(lst_or_tpl, 1)
169
        else:
170
            node1 = _get_list_node(lst_or_tpl, 0)
171
            node2 = _get_list_node(lst_or_tpl, 1)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
172
        if reverse:
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
173
            do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
174
        else:
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
175
            do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
176
        if not do_swap:
177
            return lst_or_tpl
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
178
        if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
179
            return (node2, node1)
180
        else:
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
181
            # Swap 'in-place', since lists are mutable
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
182
            Py_INCREF(node1)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
183
            PyList_SetItem(lst_or_tpl, 1, node1)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
184
            Py_INCREF(node2)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
185
            PyList_SetItem(lst_or_tpl, 0, node2)
186
            return lst_or_tpl
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
187
    # For all other sizes, we just use 'sorted()'
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
188
    if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
189
        # Note that sorted() is just list(iterable).sort()
190
        lst_or_tpl = list(lst_or_tpl)
191
    lst_or_tpl.sort(key=get_key, reverse=reverse)
192
    return lst_or_tpl
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
193
194
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
195
cdef class _MergeSorter
196
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
197
cdef class KnownGraph:
198
    """This is a class which assumes we already know the full graph."""
199
200
    cdef public object _nodes
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
201
    cdef public object _known_heads
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
202
    cdef public int do_cache
203
204
    def __init__(self, parent_map, do_cache=True):
205
        """Create a new KnownGraph instance.
206
207
        :param parent_map: A dictionary mapping key => parent_keys
208
        """
4371.4.29 by John Arbash Meinel
Pre-allocating self._nodes turned out to be slower.
209
        # tests at pre-allocating the node dict actually slowed things down
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
210
        self._nodes = {}
211
        # Maps {sorted(revision_id, revision_id): heads}
212
        self._known_heads = {}
213
        self.do_cache = int(do_cache)
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
214
        # TODO: consider disabling gc since we are allocating a lot of nodes
215
        #       that won't be collectable anyway. real world testing has not
216
        #       shown a specific impact, yet.
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
217
        self._initialize_nodes(parent_map)
218
        self._find_gdfo()
219
220
    def __dealloc__(self):
221
        cdef _KnownGraphNode child
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
222
        cdef Py_ssize_t pos
223
        cdef PyObject *temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
224
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
225
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
226
            child = <_KnownGraphNode>temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
227
            child.clear_references()
228
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
229
    cdef _KnownGraphNode _get_or_create_node(self, key):
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
230
        cdef PyObject *temp_node
231
        cdef _KnownGraphNode node
232
233
        temp_node = PyDict_GetItem(self._nodes, key)
234
        if temp_node == NULL:
235
            node = _KnownGraphNode(key)
236
            PyDict_SetItem(self._nodes, key, node)
237
        else:
238
            node = <_KnownGraphNode>temp_node
239
        return node
240
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
241
    cdef _populate_parents(self, _KnownGraphNode node, parent_keys):
242
        cdef Py_ssize_t num_parent_keys, pos
243
        cdef _KnownGraphNode parent_node
244
245
        num_parent_keys = len(parent_keys)
246
        # We know how many parents, so we pre allocate the tuple
247
        parent_nodes = PyTuple_New(num_parent_keys)
248
        for pos from 0 <= pos < num_parent_keys:
249
            # Note: it costs us 10ms out of 40ms to lookup all of these
250
            #       parents, it doesn't seem to be an allocation overhead,
251
            #       but rather a lookup overhead. There doesn't seem to be
252
            #       a way around it, and that is one reason why
253
            #       KnownGraphNode maintains a direct pointer to the parent
254
            #       node.
255
            # We use [] because parent_keys may be a tuple or list
256
            parent_node = self._get_or_create_node(parent_keys[pos])
257
            # PyTuple_SET_ITEM will steal a reference, so INCREF first
258
            Py_INCREF(parent_node)
259
            PyTuple_SET_ITEM(parent_nodes, pos, parent_node)
260
            PyList_Append(parent_node.children, node)
261
        node.parents = parent_nodes
262
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
263
    def _initialize_nodes(self, parent_map):
264
        """Populate self._nodes.
265
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
266
        After this has finished:
267
        - self._nodes will have an entry for every entry in parent_map.
268
        - ghosts will have a parent_keys = None,
4371.4.12 by Vincent Ladeuil
Fix typos.
269
        - all nodes found will also have child_keys populated with all known
270
          child keys,
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
271
        """
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
272
        cdef PyObject *temp_key
273
        cdef PyObject *temp_parent_keys
274
        cdef PyObject *temp_node
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
275
        cdef Py_ssize_t pos
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
276
        cdef _KnownGraphNode node
277
        cdef _KnownGraphNode parent_node
278
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
279
        if not PyDict_CheckExact(parent_map):
280
            raise TypeError('parent_map should be a dict of {key:parent_keys}')
281
        # for key, parent_keys in parent_map.iteritems():
282
        pos = 0
283
        while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
284
            key = <object>temp_key
285
            parent_keys = <object>temp_parent_keys
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
286
            node = self._get_or_create_node(key)
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
287
            self._populate_parents(node, parent_keys)
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
288
289
    def _find_tails(self):
290
        cdef PyObject *temp_node
291
        cdef _KnownGraphNode node
292
        cdef Py_ssize_t pos
293
294
        tails = []
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
295
        pos = 0
296
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
297
            node = <_KnownGraphNode>temp_node
298
            if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
299
                node.gdfo = 1
300
                PyList_Append(tails, node)
301
        return tails
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
302
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
303
    def _find_tips(self):
304
        cdef PyObject *temp_node
305
        cdef _KnownGraphNode node
306
        cdef Py_ssize_t pos
307
308
        tips = []
309
        pos = 0
310
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
311
            node = <_KnownGraphNode>temp_node
312
            if PyList_GET_SIZE(node.children) == 0:
313
                PyList_Append(tips, node)
314
        return tips
315
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
316
    def _find_gdfo(self):
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
317
        cdef _KnownGraphNode node
318
        cdef _KnownGraphNode child
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
319
        cdef PyObject *temp
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
320
        cdef Py_ssize_t pos
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
321
        cdef int replace
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
322
        cdef Py_ssize_t last_item
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
323
        cdef long next_gdfo
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
324
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
325
        pending = self._find_tails()
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
326
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
327
        last_item = PyList_GET_SIZE(pending) - 1
328
        while last_item >= 0:
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
329
            # Avoid pop followed by push, instead, peek, and replace
4371.4.16 by John Arbash Meinel
Document some perf issues with using known_parent_gdfos, but leave it as a dict.
330
            # timing shows this is 930ms => 770ms for OOo
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
331
            node = _get_list_node(pending, last_item)
332
            last_item = last_item - 1
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
333
            next_gdfo = node.gdfo + 1
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
334
            for pos from 0 <= pos < PyList_GET_SIZE(node.children):
335
                child = _get_list_node(node.children, pos)
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
336
                if next_gdfo > child.gdfo:
337
                    child.gdfo = next_gdfo
4371.4.27 by John Arbash Meinel
Re-use the child.seen attribute for tracking num parents.
338
                child.seen = child.seen + 1
339
                if child.seen == PyTuple_GET_SIZE(child.parents):
4371.4.17 by John Arbash Meinel
tried a different tack, to use a new list and append.
340
                    # This child is populated, queue it to be walked
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
341
                    last_item = last_item + 1
342
                    if last_item < PyList_GET_SIZE(pending):
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
343
                        Py_INCREF(child) # SetItem steals a ref
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
344
                        PyList_SetItem(pending, last_item, child)
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
345
                    else:
346
                        PyList_Append(pending, child)
4371.4.27 by John Arbash Meinel
Re-use the child.seen attribute for tracking num parents.
347
                    # We have queued this node, we don't need to track it
348
                    # anymore
349
                    child.seen = 0
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
350
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
351
    def add_node(self, key, parent_keys):
352
        """Add a new node to the graph.
353
354
        If this fills in a ghost, then the gdfos of all children will be
355
        updated accordingly.
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
356
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
357
        :param key: The node being added. If this is a duplicate, this is a
358
            no-op.
359
        :param parent_keys: The parents of the given node.
360
        :return: None (should we return if this was a ghost, etc?)
361
        """
362
        cdef PyObject *maybe_node
363
        cdef _KnownGraphNode node, parent_node, child_node
364
        cdef long parent_gdfo, next_gdfo
365
366
        maybe_node = PyDict_GetItem(self._nodes, key)
367
        if maybe_node != NULL:
368
            node = <_KnownGraphNode>maybe_node
369
            if node.parents is None:
370
                # We are filling in a ghost
371
                self._populate_parents(node, parent_keys)
372
                # We can't trust cached heads anymore
373
                self._known_heads.clear()
374
            else: # Ensure that the parent_key list matches
375
                existing_parent_keys = []
376
                for parent_node in node.parents:
377
                    existing_parent_keys.append(parent_node.key)
378
                # Make sure we use a list for the comparison, in case it was a
379
                # tuple, etc
380
                parent_keys = list(parent_keys)
381
                if existing_parent_keys == parent_keys:
382
                    # Exact match, nothing more to do
383
                    return
384
                else:
385
                    raise ValueError('Parent key mismatch, existing node %s'
386
                        ' has parents of %s not %s'
387
                        % (key, existing_parent_keys, parent_keys))
388
        else:
389
            node = _KnownGraphNode(key)
390
            PyDict_SetItem(self._nodes, key, node)
391
            self._populate_parents(node, parent_keys)
392
        parent_gdfo = 0
393
        for parent_node in node.parents:
394
            if parent_node.gdfo == -1:
395
                # This is a newly introduced ghost, so it gets gdfo of 1
396
                parent_node.gdfo = 1
397
            if parent_gdfo < parent_node.gdfo:
398
                parent_gdfo = parent_node.gdfo
399
        node.gdfo = parent_gdfo + 1
400
        # Now fill the gdfo to all children
401
        # Note that this loop is slightly inefficient, in that we may visit the
402
        # same child (and its decendents) more than once, however, it is
403
        # 'efficient' in that we only walk to nodes that would be updated,
404
        # rather than all nodes
405
        # We use a deque rather than a simple list stack, to go for BFD rather
406
        # than DFD. So that if a longer path is possible, we walk it before we
407
        # get to the final child
6800.1.3 by Jelmer Vernooij
Avoid modifying possibly lazily imported module.
408
        pending = collections.deque([node])
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
409
        pending_popleft = pending.popleft
410
        pending_append = pending.append
411
        while pending:
412
            node = pending_popleft()
413
            next_gdfo = node.gdfo + 1
414
            for child_node in node.children:
415
                if child_node.gdfo < next_gdfo:
416
                    # This child is being updated, we need to check its
417
                    # children
418
                    child_node.gdfo = next_gdfo
419
                    pending_append(child_node)
420
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
421
    def heads(self, keys):
422
        """Return the heads from amongst keys.
423
424
        This is done by searching the ancestries of each key.  Any key that is
425
        reachable from another key is not returned; all the others are.
426
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
427
        This operation scales with the relative depth between any two keys. It
428
        uses gdfo to avoid walking all ancestry.
429
430
        :param keys: An iterable of keys.
431
        :return: A set of the heads. Note that as a set there is no ordering
432
            information. Callers will need to filter their input to create
433
            order if they need it.
434
        """
435
        cdef PyObject *maybe_node
436
        cdef PyObject *maybe_heads
437
        cdef PyObject *temp_node
438
        cdef _KnownGraphNode node
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
439
        cdef Py_ssize_t pos, last_item
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
440
        cdef long min_gdfo
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
441
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
442
        heads_key = frozenset(keys)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
443
        maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
444
        if maybe_heads != NULL:
445
            return <object>maybe_heads
446
        # Not cached, compute it ourselves
447
        candidate_nodes = {}
448
        for key in keys:
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
449
            maybe_node = PyDict_GetItem(self._nodes, key)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
450
            if maybe_node == NULL:
451
                raise KeyError('key %s not in nodes' % (key,))
452
            PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
453
        maybe_node = PyDict_GetItem(candidate_nodes, NULL_REVISION)
454
        if maybe_node != NULL:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
455
            # NULL_REVISION is only a head if it is the only entry
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
456
            candidate_nodes.pop(NULL_REVISION)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
457
            if not candidate_nodes:
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
458
                return frozenset([NULL_REVISION])
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
459
            # The keys changed, so recalculate heads_key
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
460
            heads_key = frozenset(candidate_nodes)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
461
        if PyDict_Size(candidate_nodes) < 2:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
462
            return heads_key
463
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
464
        cleanup = []
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
465
        pending = []
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
466
        # we know a gdfo cannot be longer than a linear chain of all nodes
467
        min_gdfo = PyDict_Size(self._nodes) + 1
468
        # Build up nodes that need to be walked, note that starting nodes are
469
        # not added to seen()
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
470
        pos = 0
471
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
472
            node = <_KnownGraphNode>temp_node
473
            if node.parents is not None:
474
                pending.extend(node.parents)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
475
            if node.gdfo < min_gdfo:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
476
                min_gdfo = node.gdfo
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
477
478
        # Now do all the real work
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
479
        last_item = PyList_GET_SIZE(pending) - 1
480
        while last_item >= 0:
481
            node = _get_list_node(pending, last_item)
482
            last_item = last_item - 1
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
483
            if node.seen:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
484
                # node already appears in some ancestry
485
                continue
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
486
            PyList_Append(cleanup, node)
487
            node.seen = 1
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
488
            if node.gdfo <= min_gdfo:
489
                continue
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
490
            if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
491
                for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
492
                    parent_node = _get_tuple_node(node.parents, pos)
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
493
                    last_item = last_item + 1
494
                    if last_item < PyList_GET_SIZE(pending):
495
                        Py_INCREF(parent_node) # SetItem steals a ref
496
                        PyList_SetItem(pending, last_item, parent_node)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
497
                    else:
498
                        PyList_Append(pending, parent_node)
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
499
        heads = []
500
        pos = 0
501
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
502
            node = <_KnownGraphNode>temp_node
503
            if not node.seen:
504
                PyList_Append(heads, node.key)
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
505
        heads = frozenset(heads)
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
506
        for pos from 0 <= pos < PyList_GET_SIZE(cleanup):
507
            node = _get_list_node(cleanup, pos)
508
            node.seen = 0
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
509
        if self.do_cache:
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
510
            PyDict_SetItem(self._known_heads, heads_key, heads)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
511
        return heads
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
512
513
    def topo_sort(self):
514
        """Return the nodes in topological order.
515
516
        All parents must occur before all children.
517
        """
518
        # This is, for the most part, the same iteration order that we used for
519
        # _find_gdfo, consider finding a way to remove the duplication
520
        # In general, we find the 'tails' (nodes with no parents), and then
521
        # walk to the children. For children that have all of their parents
522
        # yielded, we queue up the child to be yielded as well.
523
        cdef _KnownGraphNode node
524
        cdef _KnownGraphNode child
525
        cdef PyObject *temp
526
        cdef Py_ssize_t pos
527
        cdef int replace
528
        cdef Py_ssize_t last_item
529
530
        pending = self._find_tails()
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
531
        if PyList_GET_SIZE(pending) == 0 and len(self._nodes) > 0:
532
            raise errors.GraphCycleError(self._nodes)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
533
534
        topo_order = []
535
536
        last_item = PyList_GET_SIZE(pending) - 1
537
        while last_item >= 0:
538
            # Avoid pop followed by push, instead, peek, and replace
539
            # timing shows this is 930ms => 770ms for OOo
540
            node = _get_list_node(pending, last_item)
541
            last_item = last_item - 1
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
542
            if node.parents is not None:
543
                # We don't include ghost parents
544
                PyList_Append(topo_order, node.key)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
545
            for pos from 0 <= pos < PyList_GET_SIZE(node.children):
546
                child = _get_list_node(node.children, pos)
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
547
                if child.gdfo == -1:
548
                    # We know we have a graph cycle because a node has a parent
549
                    # which we couldn't find
550
                    raise errors.GraphCycleError(self._nodes)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
551
                child.seen = child.seen + 1
552
                if child.seen == PyTuple_GET_SIZE(child.parents):
553
                    # All parents of this child have been yielded, queue this
554
                    # one to be yielded as well
555
                    last_item = last_item + 1
556
                    if last_item < PyList_GET_SIZE(pending):
557
                        Py_INCREF(child) # SetItem steals a ref
558
                        PyList_SetItem(pending, last_item, child)
559
                    else:
560
                        PyList_Append(pending, child)
561
                    # We have queued this node, we don't need to track it
562
                    # anymore
563
                    child.seen = 0
564
        # We started from the parents, so we don't need to do anymore work
565
        return topo_order
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
566
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
567
    def gc_sort(self):
568
        """Return a reverse topological ordering which is 'stable'.
569
570
        There are a few constraints:
571
          1) Reverse topological (all children before all parents)
572
          2) Grouped by prefix
573
          3) 'stable' sorting, so that we get the same result, independent of
574
             machine, or extra data.
575
        To do this, we use the same basic algorithm as topo_sort, but when we
576
        aren't sure what node to access next, we sort them lexicographically.
577
        """
578
        cdef PyObject *temp
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
579
        cdef Py_ssize_t pos, last_item
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
580
        cdef _KnownGraphNode node, node2, parent_node
581
582
        tips = self._find_tips()
583
        # Split the tips based on prefix
584
        prefix_tips = {}
585
        for pos from 0 <= pos < PyList_GET_SIZE(tips):
586
            node = _get_list_node(tips, pos)
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
587
            if PyBytes_CheckExact(node.key) or len(node.key) == 1:
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
588
                prefix = ''
589
            else:
590
                prefix = node.key[0]
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
591
            temp = PyDict_GetItem(prefix_tips, prefix)
592
            if temp == NULL:
593
                prefix_tips[prefix] = [node]
594
            else:
595
                tip_nodes = <object>temp
596
                PyList_Append(tip_nodes, node)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
597
598
        result = []
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
599
        for prefix in sorted(prefix_tips):
600
            temp = PyDict_GetItem(prefix_tips, prefix)
601
            assert temp != NULL
602
            tip_nodes = <object>temp
603
            pending = _sort_list_nodes(tip_nodes, 1)
604
            last_item = PyList_GET_SIZE(pending) - 1
605
            while last_item >= 0:
606
                node = _get_list_node(pending, last_item)
607
                last_item = last_item - 1
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
608
                if node.parents is None:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
609
                    # Ghost
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
610
                    continue
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
611
                PyList_Append(result, node.key)
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
612
                # Sorting the parent keys isn't strictly necessary for stable
613
                # sorting of a given graph. But it does help minimize the
614
                # differences between graphs
615
                # For bzr.dev ancestry:
616
                #   4.73ms  no sort
617
                #   7.73ms  RichCompareBool sort
4641.5.10 by John Arbash Meinel
Go back to sorted parents mode.
618
                parents = _sort_list_nodes(node.parents, 1)
619
                for pos from 0 <= pos < len(parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
620
                    if PyTuple_CheckExact(parents):
621
                        parent_node = _get_tuple_node(parents, pos)
622
                    else:
623
                        parent_node = _get_list_node(parents, pos)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
624
                    # TODO: GraphCycle detection
625
                    parent_node.seen = parent_node.seen + 1
626
                    if (parent_node.seen
627
                        == PyList_GET_SIZE(parent_node.children)):
628
                        # All children have been processed, queue up this
629
                        # parent
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
630
                        last_item = last_item + 1
631
                        if last_item < PyList_GET_SIZE(pending):
632
                            Py_INCREF(parent_node) # SetItem steals a ref
633
                            PyList_SetItem(pending, last_item, parent_node)
634
                        else:
635
                            PyList_Append(pending, parent_node)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
636
                        parent_node.seen = 0
637
        return result
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
638
639
    def merge_sort(self, tip_key):
640
        """Compute the merge sorted graph output."""
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
641
        cdef _MergeSorter sorter
642
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
643
        # TODO: consider disabling gc since we are allocating a lot of nodes
644
        #       that won't be collectable anyway. real world testing has not
645
        #       shown a specific impact, yet.
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
646
        sorter = _MergeSorter(self, tip_key)
647
        return sorter.topo_order()
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
648
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
649
    def get_parent_keys(self, key):
650
        """Get the parents for a key
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
651
7254.3.2 by Jelmer Vernooij
More typo fixes.
652
        Returns a list containing the parents keys. If the key is a ghost,
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
653
        None is returned. A KeyError will be raised if the key is not in
654
        the graph.
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
655
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
656
        :param keys: Key to check (eg revision_id)
657
        :return: A list of parents
658
        """
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
659
        return self._nodes[key].parent_keys
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
660
661
    def get_child_keys(self, key):
662
        """Get the children for a key
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
663
7254.3.2 by Jelmer Vernooij
More typo fixes.
664
        Returns a list containing the children keys. A KeyError will be raised
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
665
        if the key is not in the graph.
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
666
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
667
        :param keys: Key to check (eg revision_id)
668
        :return: A list of children
669
        """
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
670
        return self._nodes[key].child_keys
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
671
672
673
cdef class _MergeSortNode:
674
    """Tracks information about a node during the merge_sort operation."""
675
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
676
    # Public api
677
    cdef public object key
678
    cdef public long merge_depth
679
    cdef public object end_of_merge # True/False Is this the end of the current merge
680
681
    # Private api, used while computing the information
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
682
    cdef _KnownGraphNode left_parent
683
    cdef _KnownGraphNode left_pending_parent
684
    cdef object pending_parents # list of _KnownGraphNode for non-left parents
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
685
    cdef long _revno_first
686
    cdef long _revno_second
687
    cdef long _revno_last
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
688
    # TODO: turn these into flag/bit fields rather than individual members
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
689
    cdef int is_first_child # Is this the first child?
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
690
    cdef int seen_by_child # A child node has seen this parent
691
    cdef int completed # Fully Processed
692
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
693
    def __init__(self, key):
694
        self.key = key
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
695
        self.merge_depth = -1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
696
        self.left_parent = None
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
697
        self.left_pending_parent = None
698
        self.pending_parents = None
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
699
        self._revno_first = -1
700
        self._revno_second = -1
701
        self._revno_last = -1
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
702
        self.is_first_child = 0
703
        self.seen_by_child = 0
704
        self.completed = 0
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
705
706
    def __repr__(self):
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
707
        return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % (
708
            self.__class__.__name__, self.key,
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
709
            self.merge_depth,
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
710
            self._revno_first, self._revno_second, self._revno_last,
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
711
            self.is_first_child, self.seen_by_child)
712
4634.117.10 by John Arbash Meinel
Change 'no except' to 'cannot_raise'
713
    cdef int has_pending_parents(self): # cannot_raise
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
714
        if self.left_pending_parent is not None or self.pending_parents:
715
            return 1
716
        return 0
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
717
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
718
    cdef object _revno(self):
719
        if self._revno_first == -1:
720
            if self._revno_second != -1:
721
                raise RuntimeError('Something wrong with: %s' % (self,))
722
            return (self._revno_last,)
723
        else:
724
            return (self._revno_first, self._revno_second, self._revno_last)
725
726
    property revno:
727
        def __get__(self):
728
            return self._revno()
729
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
730
731
cdef class _MergeSorter:
732
    """This class does the work of computing the merge_sort ordering.
733
734
    We have some small advantages, in that we get all the extra information
735
    that KnownGraph knows, like knowing the child lists, etc.
736
    """
737
4593.5.11 by John Arbash Meinel
Without doing any real tuning yet, we see a decent speedup for merge_sort:
738
    # Current performance numbers for merge_sort(bzr_dev_parent_map):
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
739
    #  302ms tsort.merge_sort()
740
    #   91ms graph.KnownGraph().merge_sort()
4593.5.28 by John Arbash Meinel
Cleanup pass
741
    #   40ms kg.merge_sort()
4593.5.11 by John Arbash Meinel
Without doing any real tuning yet, we see a decent speedup for merge_sort:
742
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
743
    cdef KnownGraph graph
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
744
    cdef object _depth_first_stack  # list
745
    cdef Py_ssize_t _last_stack_item # offset to last item on stack
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
746
    # cdef object _ms_nodes # dict of key => _MergeSortNode
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
747
    cdef object _revno_to_branch_count # {revno => num child branches}
748
    cdef object _scheduled_nodes # List of nodes ready to be yielded
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
749
750
    def __init__(self, known_graph, tip_key):
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
751
        cdef _KnownGraphNode node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
752
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
753
        self.graph = known_graph
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
754
        # self._ms_nodes = {}
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
755
        self._revno_to_branch_count = {}
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
756
        self._depth_first_stack = []
757
        self._last_stack_item = -1
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
758
        self._scheduled_nodes = []
4593.5.42 by John Arbash Meinel
Turns out that some code assumed passing NULL_REVISION to merge_sort
759
        if (tip_key is not None and tip_key != NULL_REVISION
760
            and tip_key != (NULL_REVISION,)):
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
761
            node = self.graph._nodes[tip_key]
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
762
            self._push_node(node, 0)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
763
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
764
    cdef _MergeSortNode _get_ms_node(self, _KnownGraphNode node):
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
765
        cdef PyObject *temp_node
766
        cdef _MergeSortNode ms_node
767
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
768
        if node.extra is None:
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
769
            ms_node = _MergeSortNode(node.key)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
770
            node.extra = ms_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
771
        else:
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
772
            ms_node = <_MergeSortNode>node.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
773
        return ms_node
774
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
775
    cdef _push_node(self, _KnownGraphNode node, long merge_depth):
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
776
        cdef _KnownGraphNode parent_node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
777
        cdef _MergeSortNode ms_node, ms_parent_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
778
        cdef Py_ssize_t pos
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
779
4593.5.27 by John Arbash Meinel
bit of cleanup passes
780
        ms_node = self._get_ms_node(node)
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
781
        ms_node.merge_depth = merge_depth
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
782
        if node.parents is None:
783
            raise RuntimeError('ghost nodes should not be pushed'
784
                               ' onto the stack: %s' % (node,))
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
785
        if PyTuple_GET_SIZE(node.parents) > 0:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
786
            parent_node = _get_tuple_node(node.parents, 0)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
787
            ms_node.left_parent = parent_node
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
788
            if parent_node.parents is None: # left-hand ghost
789
                ms_node.left_pending_parent = None
790
                ms_node.left_parent = None
791
            else:
792
                ms_node.left_pending_parent = parent_node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
793
        if PyTuple_GET_SIZE(node.parents) > 1:
4593.5.20 by John Arbash Meinel
Expose KnownGraph off of VersionedFiles
794
            ms_node.pending_parents = []
795
            for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
796
                parent_node = _get_tuple_node(node.parents, pos)
4593.5.20 by John Arbash Meinel
Expose KnownGraph off of VersionedFiles
797
                if parent_node.parents is None: # ghost
798
                    continue
799
                PyList_Append(ms_node.pending_parents, parent_node)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
800
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
801
        ms_node.is_first_child = 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
802
        if ms_node.left_parent is not None:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
803
            ms_parent_node = self._get_ms_node(ms_node.left_parent)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
804
            if ms_parent_node.seen_by_child:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
805
                ms_node.is_first_child = 0
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
806
            ms_parent_node.seen_by_child = 1
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
807
        self._last_stack_item = self._last_stack_item + 1
808
        if self._last_stack_item < PyList_GET_SIZE(self._depth_first_stack):
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
809
            Py_INCREF(node) # SetItem steals a ref
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
810
            PyList_SetItem(self._depth_first_stack, self._last_stack_item,
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
811
                           node)
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
812
        else:
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
813
            PyList_Append(self._depth_first_stack, node)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
814
815
    cdef _pop_node(self):
4593.5.16 by John Arbash Meinel
Change the revno handling code.
816
        cdef PyObject *temp
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
817
        cdef _MergeSortNode ms_node, ms_parent_node, ms_prev_node
818
        cdef _KnownGraphNode node, parent_node, prev_node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
819
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
820
        node = _get_list_node(self._depth_first_stack, self._last_stack_item)
821
        ms_node = <_MergeSortNode>node.extra
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
822
        self._last_stack_item = self._last_stack_item - 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
823
        if ms_node.left_parent is not None:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
824
            # Assign the revision number from the left-hand parent
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
825
            ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
826
            if ms_node.is_first_child:
827
                # First child just increments the final digit
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
828
                ms_node._revno_first = ms_parent_node._revno_first
829
                ms_node._revno_second = ms_parent_node._revno_second
830
                ms_node._revno_last = ms_parent_node._revno_last + 1
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
831
            else:
4593.5.10 by John Arbash Meinel
finish getting the revno numbers working correctly.
832
                # Not the first child, make a new branch
4593.5.27 by John Arbash Meinel
bit of cleanup passes
833
                #  (mainline_revno, branch_count, 1)
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
834
                if ms_parent_node._revno_first == -1:
4593.5.16 by John Arbash Meinel
Change the revno handling code.
835
                    # Mainline ancestor, the increment is on the last digit
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
836
                    base_revno = ms_parent_node._revno_last
4593.5.16 by John Arbash Meinel
Change the revno handling code.
837
                else:
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
838
                    base_revno = ms_parent_node._revno_first
4593.5.16 by John Arbash Meinel
Change the revno handling code.
839
                temp = PyDict_GetItem(self._revno_to_branch_count,
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
840
                                      base_revno)
4593.5.16 by John Arbash Meinel
Change the revno handling code.
841
                if temp == NULL:
842
                    branch_count = 1
843
                else:
844
                    branch_count = (<object>temp) + 1
845
                PyDict_SetItem(self._revno_to_branch_count, base_revno,
846
                               branch_count)
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
847
                ms_node._revno_first = base_revno
848
                ms_node._revno_second = branch_count
849
                ms_node._revno_last = 1
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
850
        else:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
851
            temp = PyDict_GetItem(self._revno_to_branch_count, 0)
852
            if temp == NULL:
4593.5.16 by John Arbash Meinel
Change the revno handling code.
853
                # The first root node doesn't have a 3-digit revno
4593.5.27 by John Arbash Meinel
bit of cleanup passes
854
                root_count = 0
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
855
                ms_node._revno_first = -1
856
                ms_node._revno_second = -1
857
                ms_node._revno_last = 1
4593.5.27 by John Arbash Meinel
bit of cleanup passes
858
            else:
859
                root_count = (<object>temp) + 1
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
860
                ms_node._revno_first = 0
861
                ms_node._revno_second = root_count
862
                ms_node._revno_last = 1
4593.5.27 by John Arbash Meinel
bit of cleanup passes
863
            PyDict_SetItem(self._revno_to_branch_count, 0, root_count)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
864
        ms_node.completed = 1
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
865
        if PyList_GET_SIZE(self._scheduled_nodes) == 0:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
866
            # The first scheduled node is always the end of merge
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
867
            ms_node.end_of_merge = True
868
        else:
869
            prev_node = _get_list_node(self._scheduled_nodes,
870
                                    PyList_GET_SIZE(self._scheduled_nodes) - 1)
871
            ms_prev_node = <_MergeSortNode>prev_node.extra
872
            if ms_prev_node.merge_depth < ms_node.merge_depth:
873
                # The previously pushed node is to our left, so this is the end
874
                # of this right-hand chain
875
                ms_node.end_of_merge = True
876
            elif (ms_prev_node.merge_depth == ms_node.merge_depth
877
                  and prev_node not in node.parents):
878
                # The next node is not a direct parent of this node
879
                ms_node.end_of_merge = True
880
            else:
881
                ms_node.end_of_merge = False
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
882
        PyList_Append(self._scheduled_nodes, node)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
883
884
    cdef _schedule_stack(self):
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
885
        cdef _KnownGraphNode last_node, next_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
886
        cdef _MergeSortNode ms_node, ms_last_node, ms_next_node
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
887
        cdef long next_merge_depth
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
888
        ordered = []
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
889
        while self._last_stack_item >= 0:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
890
            # Peek at the last item on the stack
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
891
            last_node = _get_list_node(self._depth_first_stack,
892
                                       self._last_stack_item)
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
893
            if last_node.gdfo == -1:
4593.5.28 by John Arbash Meinel
Cleanup pass
894
                # if _find_gdfo skipped a node, that means there is a graph
895
                # cycle, error out now
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
896
                raise errors.GraphCycleError(self.graph._nodes)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
897
            ms_last_node = <_MergeSortNode>last_node.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
898
            if not ms_last_node.has_pending_parents():
899
                # Processed all parents, pop this node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
900
                self._pop_node()
901
                continue
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
902
            while ms_last_node.has_pending_parents():
903
                if ms_last_node.left_pending_parent is not None:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
904
                    # recurse depth first into the primary parent
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
905
                    next_node = ms_last_node.left_pending_parent
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
906
                    ms_last_node.left_pending_parent = None
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
907
                else:
908
                    # place any merges in right-to-left order for scheduling
909
                    # which gives us left-to-right order after we reverse
4593.5.28 by John Arbash Meinel
Cleanup pass
910
                    # the scheduled queue.
911
                    # Note: This has the effect of allocating common-new
912
                    #       revisions to the right-most subtree rather than the
913
                    #       left most, which will display nicely (you get
914
                    #       smaller trees at the top of the combined merge).
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
915
                    next_node = ms_last_node.pending_parents.pop()
4593.5.27 by John Arbash Meinel
bit of cleanup passes
916
                ms_next_node = self._get_ms_node(next_node)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
917
                if ms_next_node.completed:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
918
                    # this parent was completed by a child on the
919
                    # call stack. skip it.
920
                    continue
921
                # otherwise transfer it from the source graph into the
922
                # top of the current depth first search stack.
923
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
924
                if next_node is ms_last_node.left_parent:
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
925
                    next_merge_depth = ms_last_node.merge_depth
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
926
                else:
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
927
                    next_merge_depth = ms_last_node.merge_depth + 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
928
                self._push_node(next_node, next_merge_depth)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
929
                # and do not continue processing parents until this 'call'
930
                # has recursed.
931
                break
932
933
    cdef topo_order(self):
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
934
        cdef _MergeSortNode ms_node
935
        cdef _KnownGraphNode node
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
936
        cdef Py_ssize_t pos
7018.1.1 by Martin
Update _known_graph_pyx for modern cython
937
        cdef PyObject *temp_key
938
        cdef PyObject *temp_node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
939
4593.5.28 by John Arbash Meinel
Cleanup pass
940
        # Note: allocating a _MergeSortNode and deallocating it for all nodes
941
        #       costs approx 8.52ms (21%) of the total runtime
942
        #       We might consider moving the attributes into the base
943
        #       KnownGraph object.
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
944
        self._schedule_stack()
945
946
        # We've set up the basic schedule, now we can continue processing the
947
        # output.
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
948
        # Note: This final loop costs us 40.0ms => 28.8ms (11ms, 25%) on
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
949
        #       bzr.dev, to convert the internal Object representation into a
950
        #       Tuple representation...
951
        #       2ms is walking the data and computing revno tuples
952
        #       7ms is computing the return tuple
953
        #       4ms is PyList_Append()
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
954
        ordered = []
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
955
        # output the result in reverse order, and separate the generated info
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
956
        for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0:
957
            node = _get_list_node(self._scheduled_nodes, pos)
958
            ms_node = <_MergeSortNode>node.extra
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
959
            PyList_Append(ordered, ms_node)
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
960
            node.extra = None
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
961
        # Clear out the scheduled nodes now that we're done
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
962
        self._scheduled_nodes = []
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
963
        return ordered