/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
20
import heapq
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
21
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
22
from bzrlib import (
23
    revision,
24
    )
25
26
27
class _KnownGraphNode(object):
28
    """Represents a single object in the known graph."""
29
30
    __slots__ = ('key', 'parent_keys', 'child_keys', 'linear_dominator',
4371.3.48 by John Arbash Meinel
Clean out some asserts, get rid of the 'dominator_distance' field which isn't used anyway.
31
                 'gdfo', 'ancestor_of')
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
32
33
    def __init__(self, key, parent_keys):
34
        self.key = key
35
        self.parent_keys = parent_keys
36
        self.child_keys = []
37
        # oldest ancestor, such that no parents between here and there have >1
38
        # child or >1 parent.
39
        self.linear_dominator = None
40
        # Greatest distance from origin
41
        self.gdfo = None
42
        # This will become a tuple of known heads that have this node as an
43
        # ancestor
44
        self.ancestor_of = None
45
46
    def __repr__(self):
4371.3.48 by John Arbash Meinel
Clean out some asserts, get rid of the 'dominator_distance' field which isn't used anyway.
47
        return '%s(%s  gdfo:%s par:%s child:%s %s)' % (
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
48
            self.__class__.__name__, self.key, self.gdfo,
49
            self.parent_keys, self.child_keys,
4371.3.48 by John Arbash Meinel
Clean out some asserts, get rid of the 'dominator_distance' field which isn't used anyway.
50
            self.linear_dominator)
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
51
52
53
class KnownGraph(object):
54
    """This is a class which assumes we already know the full graph."""
55
56
    def __init__(self, parent_map, do_cache=True):
57
        """Create a new KnownGraph instance.
58
59
        :param parent_map: A dictionary mapping key => parent_keys
60
        """
61
        self._nodes = {}
62
        # Maps {sorted(revision_id, revision_id): heads}
63
        self._known_heads = {}
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
64
        self.do_cache = do_cache
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
65
        self._initialize_nodes(parent_map)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
66
        self._find_linear_dominators()
67
        self._find_gdfo()
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
68
69
    def _initialize_nodes(self, parent_map):
70
        """Populate self._nodes.
71
72
        After this has finished, self._nodes will have an entry for every entry
73
        in parent_map. Ghosts will have a parent_keys = None, all nodes found
74
        will also have .child_keys populated with all known child_keys.
75
        """
76
        nodes = self._nodes
77
        for key, parent_keys in parent_map.iteritems():
78
            if key in nodes:
79
                node = nodes[key]
80
                node.parent_keys = parent_keys
81
            else:
82
                node = _KnownGraphNode(key, parent_keys)
83
                nodes[key] = node
84
            for parent_key in parent_keys:
85
                try:
86
                    parent_node = nodes[parent_key]
87
                except KeyError:
88
                    parent_node = _KnownGraphNode(parent_key, None)
89
                    nodes[parent_key] = parent_node
90
                parent_node.child_keys.append(key)
91
92
    def _find_linear_dominators(self):
93
        """For each node in the set, find any linear dominators.
94
95
        For any given node, the 'linear dominator' is an ancestor, such that
96
        all parents between this node and that one have a single parent, and a
97
        single child. So if A->B->C->D then B,C,D all have a linear dominator
4371.3.41 by John Arbash Meinel
Add a bit of discussion about why we would want to use linear dominators.
98
        of A.
99
100
        There are two main benefits:
101
        1) When walking the graph, we can jump to the nearest linear dominator,
102
           rather than walking all of the nodes inbetween.
103
        2) When caching heads() results, dominators give the "same" results as
104
           their children. (If the dominator is a head, then the descendant is
105
           a head, if the dominator is not a head, then the child isn't
106
           either.)
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
107
        """
108
        def check_node(node):
109
            if node.parent_keys is None or len(node.parent_keys) != 1:
110
                # This node is either a ghost, a tail, or has multiple parents
111
                # It its own dominator
112
                node.linear_dominator = node.key
113
                return None
114
            parent_node = self._nodes[node.parent_keys[0]]
115
            if len(parent_node.child_keys) > 1:
116
                # The parent has multiple children, so *this* node is the
117
                # dominator
118
                node.linear_dominator = node.key
119
                return None
120
            # The parent is already filled in, so add and continue
121
            if parent_node.linear_dominator is not None:
122
                node.linear_dominator = parent_node.linear_dominator
123
                return None
124
            # We don't know this node, or its parent node, so start walking to
125
            # next
126
            return parent_node
127
128
        for node in self._nodes.itervalues():
129
            # The parent is not filled in, so walk until we get somewhere
130
            if node.linear_dominator is not None: #already done
131
                continue
132
            next_node = check_node(node)
133
            if next_node is None:
134
                # Nothing more needs to be done
135
                continue
136
            stack = []
137
            while next_node is not None:
138
                stack.append(node)
139
                node = next_node
140
                next_node = check_node(node)
141
            # The stack now contains the linear chain, and 'node' should have
142
            # been labeled
143
            dominator = node.linear_dominator
144
            while stack:
145
                next_node = stack.pop()
146
                next_node.linear_dominator = dominator
147
                node = next_node
148
149
    def _find_gdfo(self):
150
        def find_tails():
151
            return [node for node in self._nodes.itervalues()
152
                       if not node.parent_keys]
153
        tails = find_tails()
154
        todo = []
155
        heappush = heapq.heappush
156
        heappop = heapq.heappop
157
        nodes = self._nodes
158
        for node in tails:
159
            node.gdfo = 1
160
            heappush(todo, (1, node))
161
        processed = 0
162
        while todo:
163
            gdfo, next = heappop(todo)
164
            processed += 1
165
            if next.gdfo is not None and gdfo < next.gdfo:
166
                # This node was reached from a longer path, we assume it was
167
                # enqued correctly with the longer gdfo, so don't continue
168
                # processing now
169
                continue
170
            next_gdfo = gdfo + 1
171
            for child_key in next.child_keys:
172
                child_node = nodes[child_key]
173
                if child_node.gdfo is None or child_node.gdfo < next_gdfo:
174
                    # Only enque children when all of their parents have been
175
                    # resolved
176
                    for parent_key in child_node.parent_keys:
177
                        # We know that 'this' parent is counted
178
                        if parent_key != next.key:
179
                            parent_node = nodes[parent_key]
180
                            if parent_node.gdfo is None:
181
                                break
182
                    else:
183
                        child_node.gdfo = next_gdfo
184
                        heappush(todo, (next_gdfo, child_node))
185
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
186
    def _get_dominators_to_nodes(self, candidate_nodes):
187
        """Get the reverse mapping from dominator_key => candidate_nodes.
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
188
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
189
        As a side effect, this can also remove potential candidate nodes if we
190
        determine that they share a dominator.
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
191
        """
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
192
        dom_to_node = {}
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
193
        keys_to_remove = []
194
        for node in candidate_nodes.values():
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
195
            if node.linear_dominator in dom_to_node:
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
196
                # This node already exists, resolve which node supersedes the
197
                # other
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
198
                other_node = dom_to_node[node.linear_dominator]
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
199
                # There should be no way that nodes sharing a dominator could
200
                # 'tie' for gdfo
201
                if other_node.gdfo > node.gdfo:
202
                    # The other node has this node as an ancestor
203
                    keys_to_remove.append(node.key)
204
                else:
205
                    # Replace the other node, and set this as the new key
206
                    keys_to_remove.append(other_node.key)
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
207
                    dom_to_node[node.linear_dominator] = node
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
208
            else:
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
209
                dom_to_node[node.linear_dominator] = node
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
210
        for key in keys_to_remove:
211
            candidate_nodes.pop(key)
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
212
        return dom_to_node
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
213
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
214
    def heads(self, keys):
215
        """Return the heads from amongst keys.
216
217
        This is done by searching the ancestries of each key.  Any key that is
218
        reachable from another key is not returned; all the others are.
219
220
        This operation scales with the relative depth between any two keys. If
221
        any two keys are completely disconnected all ancestry of both sides
222
        will be retrieved.
223
224
        :param keys: An iterable of keys.
225
        :return: A set of the heads. Note that as a set there is no ordering
226
            information. Callers will need to filter their input to create
227
            order if they need it.
228
        """
229
        candidate_nodes = dict((key, self._nodes[key]) for key in keys)
230
        if revision.NULL_REVISION in candidate_nodes:
231
            # NULL_REVISION is only a head if it is the only entry
232
            candidate_nodes.pop(revision.NULL_REVISION)
233
            if not candidate_nodes:
234
                return set([revision.NULL_REVISION])
235
        if len(candidate_nodes) < 2:
236
            return frozenset(candidate_nodes)
237
        heads_key = frozenset(candidate_nodes)
238
        if heads_key != frozenset(keys):
239
            note('%s != %s', heads_key, frozenset(keys))
240
        try:
241
            heads = self._known_heads[heads_key]
242
            return heads
243
        except KeyError:
244
            pass # compute it ourselves
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
245
        dom_to_node = self._get_dominators_to_nodes(candidate_nodes)
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
246
        if len(candidate_nodes) < 2:
247
            # We shrunk candidate_nodes and determined a new head
248
            return frozenset(candidate_nodes)
249
        dom_heads_key = None
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
250
        # Check the linear dominators of these keys, to see if we already
251
        # know the heads answer
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
252
        dom_heads_key = frozenset([node.linear_dominator
253
                                   for node in candidate_nodes.itervalues()])
254
        if dom_heads_key in self._known_heads:
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
255
            # map back into the original keys
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
256
            heads = self._known_heads[dom_heads_key]
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
257
            heads = frozenset([dom_to_node[key].key for key in heads])
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
258
            return heads
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
259
        heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_node)
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
260
        if self.do_cache:
261
            self._known_heads[heads_key] = heads
262
            # Cache the dominator heads
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
263
            if dom_heads_key is not None:
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
264
                dom_heads = frozenset([candidate_nodes[key].linear_dominator
265
                                       for key in heads])
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
266
                self._known_heads[dom_heads_key] = dom_heads
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
267
        return heads
268
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
269
    def _heads_from_candidate_nodes(self, candidate_nodes, dom_to_node):
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
270
        queue = []
271
        to_cleanup = []
272
        to_cleanup_append = to_cleanup.append
273
        for node in candidate_nodes.itervalues():
274
            node.ancestor_of = (node.key,)
275
            queue.append((-node.gdfo, node))
276
            to_cleanup_append(node)
277
        heapq.heapify(queue)
278
        # These are nodes that we determined are 'common' that we are no longer
279
        # walking
280
        # Now we walk nodes until all nodes that are being walked are 'common'
281
        num_candidates = len(candidate_nodes)
282
        nodes = self._nodes
283
        heappop = heapq.heappop
284
        heappush = heapq.heappush
285
        while queue and len(candidate_nodes) > 1:
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
286
            _, node = heappop(queue)
287
            next_ancestor_of = node.ancestor_of
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
288
            if len(next_ancestor_of) == num_candidates:
289
                # This node is now considered 'common'
290
                # Make sure all parent nodes are marked as such
291
                for parent_key in node.parent_keys:
292
                    parent_node = nodes[parent_key]
293
                    if parent_node.ancestor_of is not None:
294
                        parent_node.ancestor_of = next_ancestor_of
295
                if node.linear_dominator != node.key:
296
                    parent_node = nodes[node.linear_dominator]
297
                    if parent_node.ancestor_of is not None:
298
                        parent_node.ancestor_of = next_ancestor_of
299
                continue
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
300
            if node.parent_keys is None:
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
301
                # This is a ghost
302
                continue
303
            # Now project the current nodes ancestor list to the parent nodes,
304
            # and queue them up to be walked
305
            # Note: using linear_dominator speeds things up quite a bit
306
            #       enough that we actually start to be slightly faster
307
            #       than the default heads() implementation
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
308
            if node.linear_dominator != node.key:
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
309
                # We are at the tip of a long linear region
310
                # We know that there is nothing between here and the tail
311
                # that is interesting, so skip to the end
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
312
                parent_keys = [node.linear_dominator]
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
313
            else:
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
314
                parent_keys = node.parent_keys
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
315
            for parent_key in parent_keys:
316
                if parent_key in candidate_nodes:
317
                    candidate_nodes.pop(parent_key)
318
                    if len(candidate_nodes) <= 1:
319
                        break
4371.3.40 by John Arbash Meinel
Fixes for linear_nodes in the pyrex version.
320
                elif parent_key in dom_to_node:
321
                    orig_node = dom_to_node[parent_key]
322
                    if orig_node is not node:
323
                        if orig_node.key in candidate_nodes:
324
                            candidate_nodes.pop(orig_node.key)
4371.3.39 by John Arbash Meinel
Implement the fix for the python version.
325
                            if len(candidate_nodes) <= 1:
326
                                break
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
327
                parent_node = nodes[parent_key]
328
                ancestor_of = parent_node.ancestor_of
329
                if ancestor_of is None:
330
                    # This node hasn't been walked yet
331
                    parent_node.ancestor_of = next_ancestor_of
332
                    # Enqueue this node
333
                    heappush(queue, (-parent_node.gdfo, parent_node))
334
                    to_cleanup_append(parent_node)
335
                elif ancestor_of != next_ancestor_of:
336
                    # Combine to get the full set of parents
337
                    all_ancestors = set(ancestor_of)
338
                    all_ancestors.update(next_ancestor_of)
339
                    parent_node.ancestor_of = tuple(sorted(all_ancestors))
340
        def cleanup():
341
            for node in to_cleanup:
342
                node.ancestor_of = None
343
        cleanup()
344
        return frozenset(candidate_nodes)