/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
20
cdef extern from "python-compat.h":
21
    pass
22
23
cdef extern from "Python.h":
24
    ctypedef int Py_ssize_t
25
    ctypedef struct PyObject:
26
        pass
27
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
28
    object PyFrozenSet_New(object)
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
29
    object PyTuple_New(Py_ssize_t n)
30
    void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
31
    PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
32
    Py_ssize_t PyTuple_GET_SIZE(object t)
33
    PyObject * PyDict_GetItem(object d, object k)
34
    Py_ssize_t PyDict_Size(object d) except -1
35
    int PyDict_CheckExact(object d)
36
    int PyDict_Next(object d, Py_ssize_t *pos, PyObject **k, PyObject **v)
37
    int PyList_Append(object l, object v) except -1
38
    PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
39
    Py_ssize_t PyList_GET_SIZE(object l)
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
40
    int PyDict_SetItem(object d, object k, object v) except -1
4371.3.23 by John Arbash Meinel
Using PySet_Add in the inner loop is a little better, not tremendously, though
41
    int PySet_Add(object s, object k) except -1
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
42
    void Py_INCREF(object)
43
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
44
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
45
import heapq
46
47
from bzrlib import revision
48
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
49
# Define these as cdef objects, so we don't have to getattr them later
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
50
cdef object heappush, heappop, heapify, heapreplace
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
51
heappush = heapq.heappush
52
heappop = heapq.heappop
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
53
heapify = heapq.heapify
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
54
heapreplace = heapq.heapreplace
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
55
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
56
57
cdef class _KnownGraphNode:
58
    """Represents a single object in the known graph."""
59
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
60
    cdef object key
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
61
    cdef object parents
62
    cdef object children
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
63
    cdef _KnownGraphNode linear_dominator_node
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
64
    cdef public object gdfo # Int
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
65
    # This could also be simplified
66
    cdef object ancestor_of
67
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
68
    def __init__(self, key):
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
69
        cdef int i
70
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
71
        self.key = key
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
72
        self.parents = None
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
73
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
74
        self.children = []
75
        # oldest ancestor, such that no parents between here and there have >1
76
        # child or >1 parent.
77
        self.linear_dominator_node = None
78
        # Greatest distance from origin
79
        self.gdfo = -1
80
        # This will become a tuple of known heads that have this node as an
81
        # ancestor
82
        self.ancestor_of = None
83
84
    property child_keys:
85
        def __get__(self):
86
            cdef _KnownGraphNode child
87
88
            keys = []
89
            for child in self.children:
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
90
                PyList_Append(keys, child.key)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
91
            return keys
92
93
    property linear_dominator:
94
        def __get__(self):
95
            if self.linear_dominator_node is None:
96
                return None
97
            else:
98
                return self.linear_dominator_node.key
99
100
    cdef clear_references(self):
101
        self.parents = None
102
        self.children = None
103
        self.linear_dominator_node = None
104
105
    def __repr__(self):
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
106
        cdef _KnownGraphNode node
107
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
108
        parent_keys = []
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
109
        if self.parents is not None:
110
            for node in self.parents:
111
                parent_keys.append(node.key)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
112
        child_keys = []
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
113
        if self.children is not None:
114
            for node in self.children:
115
                child_keys.append(node.key)
4371.3.48 by John Arbash Meinel
Clean out some asserts, get rid of the 'dominator_distance' field which isn't used anyway.
116
        return '%s(%s  gdfo:%s par:%s child:%s %s)' % (
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
117
            self.__class__.__name__, self.key, self.gdfo,
118
            parent_keys, child_keys,
4371.3.48 by John Arbash Meinel
Clean out some asserts, get rid of the 'dominator_distance' field which isn't used anyway.
119
            self.linear_dominator)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
120
121
4371.3.37 by John Arbash Meinel
cleanup passes.
122
cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
123
    cdef PyObject *temp_node
124
125
    temp_node = PyList_GET_ITEM(lst, pos)
126
    return <_KnownGraphNode>temp_node
127
128
129
cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
130
    cdef PyObject *temp_node
131
    cdef _KnownGraphNode node
132
133
    temp_node = PyTuple_GET_ITEM(parents, pos)
134
    return <_KnownGraphNode>temp_node
135
136
137
cdef _KnownGraphNode _peek_node(queue):
138
    cdef PyObject *temp_node
139
    cdef _KnownGraphNode node
140
141
    temp_node = PyTuple_GET_ITEM(<object>PyList_GET_ITEM(queue, 0), 1)
142
    node = <_KnownGraphNode>temp_node
143
    return node
144
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
145
# TODO: slab allocate all _KnownGraphNode objects.
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
146
#       We already know how many we are going to need, except for a couple of
147
#       ghosts that could be allocated on demand.
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
148
149
cdef class KnownGraph:
150
    """This is a class which assumes we already know the full graph."""
151
152
    cdef public object _nodes
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
153
    cdef object _known_heads
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
154
    cdef public int do_cache
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
155
    # Nodes we've touched that we'll need to reset their info when heads() is
156
    # done
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
157
    cdef object _to_cleanup
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
158
159
    def __init__(self, parent_map, do_cache=True):
160
        """Create a new KnownGraph instance.
161
162
        :param parent_map: A dictionary mapping key => parent_keys
163
        """
164
        self._nodes = {}
165
        # Maps {sorted(revision_id, revision_id): heads}
166
        self._known_heads = {}
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
167
        self._to_cleanup = []
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
168
        self.do_cache = int(do_cache)
169
        self._initialize_nodes(parent_map)
170
        self._find_linear_dominators()
171
        self._find_gdfo()
172
173
    def __dealloc__(self):
174
        cdef _KnownGraphNode child
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
175
        cdef Py_ssize_t pos
176
        cdef PyObject *temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
177
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
178
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
179
            child = <_KnownGraphNode>temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
180
            child.clear_references()
181
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
182
    cdef _KnownGraphNode _get_or_create_node(self, key):
183
        cdef PyObject *temp_node
184
        cdef _KnownGraphNode node
185
186
        temp_node = PyDict_GetItem(self._nodes, key)
187
        if temp_node == NULL:
188
            node = _KnownGraphNode(key)
189
            PyDict_SetItem(self._nodes, key, node)
190
        else:
191
            node = <_KnownGraphNode>temp_node
192
        return node
193
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
194
    def _initialize_nodes(self, parent_map):
195
        """Populate self._nodes.
196
197
        After this has finished, self._nodes will have an entry for every entry
198
        in parent_map. Ghosts will have a parent_keys = None, all nodes found
199
        will also have .child_keys populated with all known child_keys.
200
        """
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
201
        cdef PyObject *temp_key, *temp_parent_keys, *temp_node
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
202
        cdef Py_ssize_t pos, pos2, num_parent_keys
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
203
        cdef _KnownGraphNode node
204
        cdef _KnownGraphNode parent_node
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
205
206
        nodes = self._nodes
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
207
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
208
        if not PyDict_CheckExact(parent_map):
209
            raise TypeError('parent_map should be a dict of {key:parent_keys}')
210
        # for key, parent_keys in parent_map.iteritems():
211
        pos = 0
212
        while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
213
            key = <object>temp_key
214
            parent_keys = <object>temp_parent_keys
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
215
            node = self._get_or_create_node(key)
216
            # We know how many parents, so we could pre allocate an exact sized
217
            # tuple here
218
            num_parent_keys = len(parent_keys)
219
            parent_nodes = PyTuple_New(num_parent_keys)
220
            # We use iter here, because parent_keys maybe be a list or tuple
221
            for pos2 from 0 <= pos2 < num_parent_keys:
222
                parent_key = parent_keys[pos2]
223
                parent_node = self._get_or_create_node(parent_keys[pos2])
224
                # PyTuple_SET_ITEM will steal a reference, so INCREF first
225
                Py_INCREF(parent_node)
226
                PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
227
                PyList_Append(parent_node.children, node)
228
            node.parents = parent_nodes
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
229
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
230
    cdef _KnownGraphNode _check_is_linear(self, _KnownGraphNode node):
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
231
        """Check to see if a given node is part of a linear chain."""
232
        cdef _KnownGraphNode parent_node
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
233
        if node.parents is None or PyTuple_GET_SIZE(node.parents) != 1:
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
234
            # This node is either a ghost, a tail, or has multiple parents
235
            # It its own dominator
236
            node.linear_dominator_node = node
237
            return None
4371.3.37 by John Arbash Meinel
cleanup passes.
238
        parent_node = _get_parent(node.parents, 0)
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
239
        if PyList_GET_SIZE(parent_node.children) > 1:
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
240
            # The parent has multiple children, so *this* node is the
241
            # dominator
242
            node.linear_dominator_node = node
243
            return None
244
        # The parent is already filled in, so add and continue
245
        if parent_node.linear_dominator_node is not None:
246
            node.linear_dominator_node = parent_node.linear_dominator_node
247
            return None
248
        # We don't know this node, or its parent node, so start walking to
249
        # next
250
        return parent_node
251
252
    def _find_linear_dominators(self):
4371.3.41 by John Arbash Meinel
Add a bit of discussion about why we would want to use linear dominators.
253
        """
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
254
        For any given node, the 'linear dominator' is an ancestor, such that
255
        all parents between this node and that one have a single parent, and a
256
        single child. So if A->B->C->D then B,C,D all have a linear dominator
4371.3.41 by John Arbash Meinel
Add a bit of discussion about why we would want to use linear dominators.
257
        of A.
258
259
        There are two main benefits:
260
        1) When walking the graph, we can jump to the nearest linear dominator,
261
           rather than walking all of the nodes inbetween.
262
        2) When caching heads() results, dominators give the "same" results as
263
           their children. (If the dominator is a head, then the descendant is
264
           a head, if the dominator is not a head, then the child isn't
265
           either.)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
266
        """
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
267
        cdef PyObject *temp_node
268
        cdef Py_ssize_t pos
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
269
        cdef _KnownGraphNode node
270
        cdef _KnownGraphNode next_node
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
271
        cdef _KnownGraphNode dominator
272
        cdef int i, num_elements
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
273
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
274
        pos = 0
275
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
276
            node = <_KnownGraphNode>temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
277
            # The parent is not filled in, so walk until we get somewhere
278
            if node.linear_dominator_node is not None: #already done
279
                continue
280
            next_node = self._check_is_linear(node)
281
            if next_node is None:
282
                # Nothing more needs to be done
283
                continue
284
            stack = []
285
            while next_node is not None:
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
286
                PyList_Append(stack, node)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
287
                node = next_node
288
                next_node = self._check_is_linear(node)
289
            # The stack now contains the linear chain, and 'node' should have
290
            # been labeled
291
            dominator = node.linear_dominator_node
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
292
            num_elements = len(stack)
293
            for i from num_elements > i >= 0:
4371.3.37 by John Arbash Meinel
cleanup passes.
294
                next_node = _get_list_node(stack, i)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
295
                next_node.linear_dominator_node = dominator
296
                node = next_node
297
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
298
    cdef object _find_tails(self):
299
        cdef object tails
300
        cdef PyObject *temp_node
301
        cdef Py_ssize_t pos
302
        cdef _KnownGraphNode node
303
304
        tails = []
305
        pos = 0
306
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
307
            node = <_KnownGraphNode>temp_node
308
            if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
309
                PyList_Append(tails, node)
310
        return tails
311
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
312
    def _find_gdfo(self):
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
313
        cdef Py_ssize_t pos, pos2
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
314
        cdef _KnownGraphNode node
315
        cdef _KnownGraphNode child_node
316
        cdef _KnownGraphNode parent_node
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
317
        cdef int replace_node, missing_parent
318
319
        tails = self._find_tails()
320
        todo = []
321
        for pos from 0 <= pos < PyList_GET_SIZE(tails):
4371.3.37 by John Arbash Meinel
cleanup passes.
322
            node = _get_list_node(tails, pos)
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
323
            node.gdfo = 1
324
            PyList_Append(todo, (1, node))
4371.3.36 by John Arbash Meinel
Cache heads() results from dominator lookups.
325
        # No need to heapify, because all tails have priority=1
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
326
        while PyList_GET_SIZE(todo) > 0:
4371.3.37 by John Arbash Meinel
cleanup passes.
327
            node = _peek_node(todo)
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
328
            next_gdfo = node.gdfo + 1
329
            replace_node = 1
330
            for pos from 0 <= pos < PyList_GET_SIZE(node.children):
4371.3.37 by John Arbash Meinel
cleanup passes.
331
                child_node = _get_list_node(node.children, pos)
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
332
                # We should never have numbered children before we numbered
333
                # a parent
334
                if child_node.gdfo != -1:
335
                    continue
336
                # Only enque children when all of their parents have been
337
                # resolved. With a single parent, we can just take 'this' value
338
                child_gdfo = next_gdfo
339
                if PyTuple_GET_SIZE(child_node.parents) > 1:
340
                    missing_parent = 0
341
                    for pos2 from 0 <= pos2 < PyTuple_GET_SIZE(child_node.parents):
4371.3.37 by John Arbash Meinel
cleanup passes.
342
                        parent_node = _get_parent(child_node.parents, pos2)
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
343
                        if parent_node.gdfo == -1:
344
                            missing_parent = 1
345
                            break
346
                        if parent_node.gdfo >= child_gdfo:
347
                            child_gdfo = parent_node.gdfo + 1
348
                    if missing_parent:
349
                        # One of the parents is not numbered, so wait until we get
350
                        # back here
351
                        continue
352
                child_node.gdfo = child_gdfo
353
                if replace_node:
354
                    heapreplace(todo, (child_gdfo, child_node))
355
                    replace_node = 0
356
                else:
357
                    heappush(todo, (child_gdfo, child_node))
358
            if replace_node:
359
                heappop(todo)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
360
361
    def heads(self, keys):
362
        """Return the heads from amongst keys.
363
364
        This is done by searching the ancestries of each key.  Any key that is
365
        reachable from another key is not returned; all the others are.
366
367
        This operation scales with the relative depth between any two keys. If
368
        any two keys are completely disconnected all ancestry of both sides
369
        will be retrieved.
370
371
        :param keys: An iterable of keys.
372
        :return: A set of the heads. Note that as a set there is no ordering
373
            information. Callers will need to filter their input to create
374
            order if they need it.
375
        """
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
376
        cdef PyObject *maybe_node
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
377
        cdef PyObject *maybe_heads
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
378
4371.3.24 by John Arbash Meinel
A couple of cleanups.
379
        heads_key = PyFrozenSet_New(keys)
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
380
        maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
381
        if maybe_heads != NULL:
382
            return <object>maybe_heads
383
384
        # Not cached, compute it ourselves
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
385
        candidate_nodes = {}
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
386
        nodes = self._nodes
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
387
        for key in keys:
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
388
            maybe_node = PyDict_GetItem(nodes, key)
389
            if maybe_node == NULL:
390
                raise KeyError('key %s not in nodes' % (key,))
391
            PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
392
        if revision.NULL_REVISION in candidate_nodes:
393
            # NULL_REVISION is only a head if it is the only entry
394
            candidate_nodes.pop(revision.NULL_REVISION)
395
            if not candidate_nodes:
396
                return set([revision.NULL_REVISION])
4371.3.24 by John Arbash Meinel
A couple of cleanups.
397
            # The keys changed, so recalculate heads_key
398
            heads_key = PyFrozenSet_New(candidate_nodes)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
399
        if len(candidate_nodes) < 2:
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
400
            return heads_key
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
401
        dom_to_node = self._get_dominators_to_nodes(candidate_nodes)
402
        if PyDict_Size(candidate_nodes) < 2:
403
            return frozenset(candidate_nodes)
404
        dom_lookup_key, heads = self._heads_from_dominators(candidate_nodes,
405
                                                            dom_to_node)
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
406
        if heads is not None:
4371.3.36 by John Arbash Meinel
Cache heads() results from dominator lookups.
407
            if self.do_cache:
408
                # This heads was not in the cache, or it would have been caught
409
                # earlier, but the dom head *was*, so do the simple cache
410
                PyDict_SetItem(self._known_heads, heads_key, heads)
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
411
            return heads
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
412
        heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_node)
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
413
        if self.do_cache:
414
            self._cache_heads(heads, heads_key, dom_lookup_key, candidate_nodes)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
415
        return heads
416
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
417
    cdef object _cache_heads(self, heads, heads_key, dom_lookup_key,
418
                             candidate_nodes):
419
        cdef PyObject *maybe_node
420
        cdef _KnownGraphNode node
421
422
        PyDict_SetItem(self._known_heads, heads_key, heads)
423
        dom_heads = []
424
        for key in heads:
425
            maybe_node = PyDict_GetItem(candidate_nodes, key)
426
            if maybe_node == NULL:
427
                raise KeyError
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
428
            node = <_KnownGraphNode>maybe_node
429
            PyList_Append(dom_heads, node.linear_dominator_node.key)
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
430
        PyDict_SetItem(self._known_heads, dom_lookup_key,
431
                       PyFrozenSet_New(dom_heads))
432
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
433
    cdef _get_dominators_to_nodes(self, candidate_nodes):
434
        """Get the reverse mapping from dominator_key => candidate_nodes.
435
436
        As a side effect, this can also remove potential candidate nodes if we
437
        determine that they share a dominator.
438
        """
439
        cdef Py_ssize_t pos
440
        cdef _KnownGraphNode node, other_node
441
        cdef PyObject *temp_node
442
        cdef PyObject *maybe_node
443
444
        dom_to_node = {}
445
        keys_to_remove = []
446
        pos = 0
447
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
448
            node = <_KnownGraphNode>temp_node
449
            dom_key = node.linear_dominator_node.key
450
            maybe_node = PyDict_GetItem(dom_to_node, dom_key)
451
            if maybe_node == NULL:
452
                PyDict_SetItem(dom_to_node, dom_key, node)
453
            else:
454
                other_node = <_KnownGraphNode>maybe_node
455
                # These nodes share a dominator, one of them obviously
456
                # supersedes the other, figure out which
457
                if other_node.gdfo > node.gdfo:
458
                    PyList_Append(keys_to_remove, node.key)
459
                else:
460
                    # This wins, replace the other
461
                    PyList_Append(keys_to_remove, other_node.key)
462
                    PyDict_SetItem(dom_to_node, dom_key, node)
463
        for pos from 0 <= pos < PyList_GET_SIZE(keys_to_remove):
464
            key = <object>PyList_GET_ITEM(keys_to_remove, pos)
465
            candidate_nodes.pop(key)
466
        return dom_to_node
467
468
    cdef object _heads_from_dominators(self, candidate_nodes, dom_to_node):
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
469
        cdef PyObject *maybe_heads
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
470
        cdef PyObject *maybe_node
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
471
        cdef _KnownGraphNode node
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
472
        cdef Py_ssize_t pos
473
        cdef PyObject *temp_node
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
474
475
        dom_list_key = []
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
476
        pos = 0
477
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
478
            node = <_KnownGraphNode>temp_node
479
            PyList_Append(dom_list_key, node.linear_dominator_node.key)
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
480
        dom_lookup_key = PyFrozenSet_New(dom_list_key)
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
481
        maybe_heads = PyDict_GetItem(self._known_heads, dom_lookup_key)
482
        if maybe_heads == NULL:
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
483
            return dom_lookup_key, None
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
484
        # We need to map back from the dominator head to the original keys
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
485
        dom_heads = <object>maybe_heads
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
486
        heads = []
487
        for dom_key in dom_heads:
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
488
            maybe_node = PyDict_GetItem(dom_to_node, dom_key)
489
            if maybe_node == NULL:
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
490
                # Should never happen
491
                raise KeyError
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
492
            node = <_KnownGraphNode>maybe_node
493
            PyList_Append(heads, node.key)
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
494
        return dom_lookup_key, PyFrozenSet_New(heads)
495
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
496
    cdef int _process_parent(self, _KnownGraphNode node,
497
                             _KnownGraphNode parent_node,
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
498
                             candidate_nodes, dom_to_node,
4371.3.37 by John Arbash Meinel
cleanup passes.
499
                             queue, int *replace_item) except -1:
500
        """Process the parent of a node, seeing if we need to walk it."""
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
501
        cdef PyObject *maybe_candidate
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
502
        cdef PyObject *maybe_node
503
        cdef _KnownGraphNode dom_child_node
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
504
        maybe_candidate = PyDict_GetItem(candidate_nodes, parent_node.key)
505
        if maybe_candidate != NULL:
506
            candidate_nodes.pop(parent_node.key)
4371.3.37 by John Arbash Meinel
cleanup passes.
507
            # We could pass up a flag that tells the caller to stop processing,
508
            # but it doesn't help much, and makes the code uglier
509
            return 0
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
510
        maybe_node = PyDict_GetItem(dom_to_node, parent_node.key)
511
        if maybe_node != NULL:
512
            # This is a dominator of a node
513
            dom_child_node = <_KnownGraphNode>maybe_node
514
            if dom_child_node is not node:
515
                # It isn't a dominator of a node we are searching, so we should
516
                # remove it from the search
517
                maybe_candidate = PyDict_GetItem(candidate_nodes, dom_child_node.key)
518
                if maybe_candidate != NULL:
519
                    candidate_nodes.pop(dom_child_node.key)
520
                    return 0
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
521
        if parent_node.ancestor_of is None:
522
            # This node hasn't been walked yet, so just project node's ancestor
523
            # info directly to parent_node, and enqueue it for later processing
524
            parent_node.ancestor_of = node.ancestor_of
4371.3.37 by John Arbash Meinel
cleanup passes.
525
            if replace_item[0]:
526
                heapreplace(queue, (-parent_node.gdfo, parent_node))
527
                replace_item[0] = 0
528
            else:
529
                heappush(queue, (-parent_node.gdfo, parent_node))
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
530
            PyList_Append(self._to_cleanup, parent_node)
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
531
        elif parent_node.ancestor_of != node.ancestor_of:
532
            # Combine to get the full set of parents
533
            # Rewrite using PySet_* functions, unfortunately you have to use
534
            # PySet_Add since there is no PySet_Update... :(
535
            all_ancestors = set(parent_node.ancestor_of)
4371.3.23 by John Arbash Meinel
Using PySet_Add in the inner loop is a little better, not tremendously, though
536
            for k in node.ancestor_of:
537
                PySet_Add(all_ancestors, k)
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
538
            parent_node.ancestor_of = tuple(sorted(all_ancestors))
539
        return 0
540
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
541
    cdef object _heads_from_candidate_nodes(self, candidate_nodes, dom_to_node):
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
542
        cdef _KnownGraphNode node
543
        cdef _KnownGraphNode parent_node
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
544
        cdef Py_ssize_t num_candidates
4371.3.37 by John Arbash Meinel
cleanup passes.
545
        cdef int num_parents, replace_item
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
546
        cdef Py_ssize_t pos
547
        cdef PyObject *temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
548
549
        queue = []
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
550
        pos = 0
551
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
552
            node = <_KnownGraphNode>temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
553
            node.ancestor_of = (node.key,)
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
554
            PyList_Append(queue, (-node.gdfo, node))
555
            PyList_Append(self._to_cleanup, node)
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
556
        heapify(queue)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
557
        # These are nodes that we determined are 'common' that we are no longer
558
        # walking
559
        # Now we walk nodes until all nodes that are being walked are 'common'
560
        num_candidates = len(candidate_nodes)
4371.3.37 by John Arbash Meinel
cleanup passes.
561
        replace_item = 0
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
562
        while PyList_GET_SIZE(queue) > 0 and PyDict_Size(candidate_nodes) > 1:
4371.3.37 by John Arbash Meinel
cleanup passes.
563
            if replace_item:
564
                # We still need to pop the smallest member out of the queue
565
                # before we peek again
566
                heappop(queue)
567
                if PyList_GET_SIZE(queue) == 0:
568
                    break
569
            # peek at the smallest item. We don't pop, because we expect we'll
570
            # need to push more things into the queue anyway
571
            node = _peek_node(queue)
572
            replace_item = 1
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
573
            if PyTuple_GET_SIZE(node.ancestor_of) == num_candidates:
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
574
                # This node is now considered 'common'
575
                # Make sure all parent nodes are marked as such
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
576
                for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
4371.3.37 by John Arbash Meinel
cleanup passes.
577
                    parent_node = _get_parent(node.parents, pos)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
578
                    if parent_node.ancestor_of is not None:
579
                        parent_node.ancestor_of = node.ancestor_of
580
                if node.linear_dominator_node is not node:
581
                    parent_node = node.linear_dominator_node
582
                    if parent_node.ancestor_of is not None:
583
                        parent_node.ancestor_of = node.ancestor_of
584
                continue
585
            if node.parents is None:
586
                # This is a ghost
587
                continue
588
            # Now project the current nodes ancestor list to the parent nodes,
589
            # and queue them up to be walked
590
            if node.linear_dominator_node is not node:
591
                # We are at the tip of a long linear region
592
                # We know that there is nothing between here and the tail
593
                # that is interesting, so skip to the end
4371.3.37 by John Arbash Meinel
cleanup passes.
594
                self._process_parent(node, node.linear_dominator_node,
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
595
                                     candidate_nodes, dom_to_node, queue, &replace_item)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
596
            else:
4371.3.37 by John Arbash Meinel
cleanup passes.
597
                for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
598
                    parent_node = _get_parent(node.parents, pos)
599
                    self._process_parent(node, parent_node, candidate_nodes,
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
600
                                         dom_to_node, queue, &replace_item)
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
601
        for pos from 0 <= pos < PyList_GET_SIZE(self._to_cleanup):
4371.3.37 by John Arbash Meinel
cleanup passes.
602
            node = _get_list_node(self._to_cleanup, pos)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
603
            node.ancestor_of = None
4371.3.22 by John Arbash Meinel
The slowdown was that _to_cleanup wasn't getting reset.
604
        self._to_cleanup = []
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
605
        return PyFrozenSet_New(candidate_nodes)