bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
1570.1.7
by Robert Collins
 Replace the slow topo_sort routine with a much faster one for non trivial datasets.  | 
1  | 
# (C) 2005, 2006 Canonical Limited.
 | 
| 
1185.16.113
by mbp at sourcefrog
 Add topo_sort utility function  | 
2  | 
#
 | 
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
16  | 
||
| 
1570.1.7
by Robert Collins
 Replace the slow topo_sort routine with a much faster one for non trivial datasets.  | 
17  | 
|
18  | 
"""Topological sorting routines."""
 | 
|
19  | 
||
20  | 
||
21  | 
import bzrlib.errors as errors  | 
|
22  | 
||
| 
1185.16.114
by mbp at sourcefrog
 Improved topological sort  | 
23  | 
|
24  | 
def topo_sort(graph):  | 
|
| 
1185.16.113
by mbp at sourcefrog
 Add topo_sort utility function  | 
25  | 
"""Topological sort a graph.  | 
26  | 
||
| 
1185.16.114
by mbp at sourcefrog
 Improved topological sort  | 
27  | 
    graph -- sequence of pairs of node->parents_list.
 | 
28  | 
||
29  | 
    The result is a list of node names, such that all parents come before
 | 
|
30  | 
    their children.
 | 
|
31  | 
||
32  | 
    Nodes at the same depth are returned in sorted order.
 | 
|
| 
1185.16.113
by mbp at sourcefrog
 Add topo_sort utility function  | 
33  | 
|
34  | 
    node identifiers can be any hashable object, and are typically strings.
 | 
|
35  | 
    """
 | 
|
| 
1570.1.7
by Robert Collins
 Replace the slow topo_sort routine with a much faster one for non trivial datasets.  | 
36  | 
return TopoSorter(graph).sorted()  | 
37  | 
||
38  | 
||
39  | 
class TopoSorter(object):  | 
|
40  | 
||
41  | 
def __init__(self, graph):  | 
|
42  | 
"""Topological sorting of a graph.  | 
|
43  | 
    
 | 
|
44  | 
        :param graph: sequence of pairs of node_name->parent_names_list.
 | 
|
45  | 
                      i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
 | 
|
46  | 
                      For this input the output from the sort or
 | 
|
47  | 
                      iter_topo_order routines will be:
 | 
|
48  | 
                      'A', 'B', 'C'
 | 
|
49  | 
        
 | 
|
50  | 
        node identifiers can be any hashable object, and are typically strings.
 | 
|
51  | 
||
52  | 
        The graph is sorted lazily: until you iterate or sort the input is
 | 
|
53  | 
        not processed other than to create an internal representation.
 | 
|
54  | 
||
55  | 
        iteration or sortign may raise GraphCycleError if a cycle is present 
 | 
|
56  | 
        in the graph.
 | 
|
57  | 
        """
 | 
|
58  | 
        # a dict of the graph.
 | 
|
59  | 
self._graph = dict(graph)  | 
|
60  | 
        ### if debugging:
 | 
|
61  | 
        # self._original_graph = dict(graph)
 | 
|
62  | 
||
63  | 
        # this is a stack storing the depth first search into the graph.
 | 
|
64  | 
self._node_name_stack = []  | 
|
65  | 
        # at each level of 'recursion' we have to check each parent. This
 | 
|
66  | 
        # stack stores the parents we have not yet checked for the node at the 
 | 
|
67  | 
        # matching depth in _node_name_stack
 | 
|
68  | 
self._pending_parents_stack = []  | 
|
69  | 
        # this is a set of the completed nodes for fast checking whether a
 | 
|
70  | 
        # parent in a node we are processing on the stack has already been
 | 
|
71  | 
        # emitted and thus can be skipped.
 | 
|
72  | 
self._completed_node_names = set()  | 
|
73  | 
||
74  | 
def sorted(self):  | 
|
75  | 
"""Sort the graph and return as a list.  | 
|
76  | 
        
 | 
|
77  | 
        After calling this the sorter is empty and you must create a new one.
 | 
|
78  | 
        """
 | 
|
79  | 
return list(self.iter_topo_order())  | 
|
80  | 
||
81  | 
###        Useful if fiddling with this code.
 | 
|
82  | 
###        # cross check
 | 
|
83  | 
###        sorted_names = list(self.iter_topo_order())
 | 
|
84  | 
###        for index in range(len(sorted_names)):
 | 
|
85  | 
###            rev = sorted_names[index]
 | 
|
86  | 
###            for left_index in range(index):
 | 
|
87  | 
###                if rev in self.original_graph[sorted_names[left_index]]:
 | 
|
88  | 
###                    print "revision in parent list of earlier revision"
 | 
|
89  | 
###                    import pdb;pdb.set_trace()
 | 
|
90  | 
||
91  | 
def iter_topo_order(self):  | 
|
92  | 
"""Yield the nodes of a graph in a topological order.  | 
|
93  | 
    
 | 
|
94  | 
        :param graph: sequence of pairs of node_name->parent_names_list.
 | 
|
95  | 
                      i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
 | 
|
96  | 
                      For this input the following would be yielded:
 | 
|
97  | 
                      'A', 'B', 'C'
 | 
|
98  | 
        
 | 
|
99  | 
        node identifiers can be any hashable object, and are typically strings.
 | 
|
100  | 
||
101  | 
        This may raise GraphCycleError if a cycle is present in the graph.
 | 
|
102  | 
        
 | 
|
103  | 
        After finishing iteration the sorter is empty and you cannot continue
 | 
|
104  | 
        iteration.
 | 
|
105  | 
        """
 | 
|
106  | 
while self._graph:  | 
|
107  | 
            # now pick a random node in the source graph, and transfer it to the
 | 
|
108  | 
            # top of the depth first search stack.
 | 
|
109  | 
node_name, parents = self._graph.popitem()  | 
|
110  | 
self._push_node(node_name, parents)  | 
|
111  | 
while self._node_name_stack:  | 
|
112  | 
                # loop until this call completes.
 | 
|
113  | 
parents_to_visit = self._pending_parents_stack[-1]  | 
|
114  | 
                # if all parents are done, the revision is done
 | 
|
115  | 
if not parents_to_visit:  | 
|
116  | 
                    # append the revision to the topo sorted list
 | 
|
117  | 
                    # all the nodes parents have been added to the output, now
 | 
|
118  | 
                    # we can add it to the output.
 | 
|
119  | 
yield self._pop_node()  | 
|
120  | 
else:  | 
|
121  | 
while self._pending_parents_stack[-1]:  | 
|
122  | 
                        # recurse depth first into a single parent 
 | 
|
123  | 
next_node_name = self._pending_parents_stack[-1].pop()  | 
|
124  | 
if next_node_name in self._completed_node_names:  | 
|
125  | 
                            # this parent was completed by a child on the
 | 
|
126  | 
                            # call stack. skip it.
 | 
|
127  | 
                            continue
 | 
|
128  | 
                        # otherwise transfer it from the source graph into the
 | 
|
129  | 
                        # top of the current depth first search stack.
 | 
|
130  | 
try:  | 
|
131  | 
parents = self._graph.pop(next_node_name)  | 
|
132  | 
except KeyError:  | 
|
133  | 
                            # if the next node is not in the source graph it has
 | 
|
134  | 
                            # already been popped from it and placed into the
 | 
|
135  | 
                            # current search stack (but not completed or we would
 | 
|
136  | 
                            # have hit the continue 4 lines up.
 | 
|
137  | 
                            # this indicates a cycle.
 | 
|
138  | 
raise errors.GraphCycleError(self._node_name_stack)  | 
|
139  | 
self._push_node(next_node_name, parents)  | 
|
140  | 
                        # and do not continue processing parents until this 'call' 
 | 
|
141  | 
                        # has recursed.
 | 
|
142  | 
                        break
 | 
|
143  | 
||
144  | 
def _push_node(self, node_name, parents):  | 
|
145  | 
"""Add node_name to the pending node stack.  | 
|
146  | 
        
 | 
|
147  | 
        Names in this stack will get emitted into the output as they are popped
 | 
|
148  | 
        off the stack.
 | 
|
149  | 
        """
 | 
|
150  | 
||
151  | 
self._node_name_stack.append(node_name)  | 
|
152  | 
self._pending_parents_stack.append(list(parents))  | 
|
153  | 
||
154  | 
def _pop_node(self):  | 
|
155  | 
"""Pop the top node off the stack  | 
|
156  | 
||
157  | 
        The node is appended to the sorted output.
 | 
|
158  | 
        """
 | 
|
159  | 
        # we are returning from the flattened call frame:
 | 
|
160  | 
        # pop off the local variables
 | 
|
161  | 
node_name = self._node_name_stack.pop()  | 
|
162  | 
self._pending_parents_stack.pop()  | 
|
163  | 
||
164  | 
self._completed_node_names.add(node_name)  | 
|
165  | 
return node_name  |