bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
4371.3.18
by John Arbash Meinel
 Change VF.annotate to use the new KnownGraph code.  | 
1  | 
# Copyright (C) 2009 Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
|
16  | 
||
17  | 
"""Implementation of Graph algorithms when we have already loaded everything.
 | 
|
18  | 
"""
 | 
|
19  | 
||
20  | 
from bzrlib import (  | 
|
| 
4593.5.30
by John Arbash Meinel
 Move the topo_sort implementation into KnownGraph, rather than calling back to it.  | 
21  | 
errors,  | 
| 
4371.3.18
by John Arbash Meinel
 Change VF.annotate to use the new KnownGraph code.  | 
22  | 
revision,  | 
23  | 
    )
 | 
|
24  | 
||
25  | 
||
26  | 
class _KnownGraphNode(object):  | 
|
27  | 
"""Represents a single object in the known graph."""  | 
|
28  | 
||
| 
4371.4.10
by Vincent Ladeuil
 Cleanup.  | 
29  | 
__slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')  | 
| 
4371.3.18
by John Arbash Meinel
 Change VF.annotate to use the new KnownGraph code.  | 
30  | 
|
31  | 
def __init__(self, key, parent_keys):  | 
|
32  | 
self.key = key  | 
|
33  | 
self.parent_keys = parent_keys  | 
|
34  | 
self.child_keys = []  | 
|
35  | 
        # Greatest distance from origin
 | 
|
36  | 
self.gdfo = None  | 
|
37  | 
||
38  | 
def __repr__(self):  | 
|
| 
4371.4.10
by Vincent Ladeuil
 Cleanup.  | 
39  | 
return '%s(%s gdfo:%s par:%s child:%s)' % (  | 
| 
4371.3.18
by John Arbash Meinel
 Change VF.annotate to use the new KnownGraph code.  | 
40  | 
self.__class__.__name__, self.key, self.gdfo,  | 
| 
4371.4.10
by Vincent Ladeuil
 Cleanup.  | 
41  | 
self.parent_keys, self.child_keys)  | 
| 
4371.3.18
by John Arbash Meinel
 Change VF.annotate to use the new KnownGraph code.  | 
42  | 
|
43  | 
||
| 
4593.5.34
by John Arbash Meinel
 Change the KnownGraph.merge_sort api.  | 
44  | 
class _MergeSortNode(object):  | 
45  | 
"""Information about a specific node in the merge graph."""  | 
|
46  | 
||
47  | 
__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')  | 
|
48  | 
||
49  | 
def __init__(self, key, merge_depth, revno, end_of_merge):  | 
|
50  | 
self.key = key  | 
|
51  | 
self.merge_depth = merge_depth  | 
|
52  | 
self.revno = revno  | 
|
53  | 
self.end_of_merge = end_of_merge  | 
|
54  | 
||
55  | 
||
| 
4371.3.18
by John Arbash Meinel
 Change VF.annotate to use the new KnownGraph code.  | 
56  | 
class KnownGraph(object):  | 
57  | 
"""This is a class which assumes we already know the full graph."""  | 
|
58  | 
||
59  | 
def __init__(self, parent_map, do_cache=True):  | 
|
60  | 
"""Create a new KnownGraph instance.  | 
|
61  | 
||
62  | 
        :param parent_map: A dictionary mapping key => parent_keys
 | 
|
63  | 
        """
 | 
|
64  | 
self._nodes = {}  | 
|
65  | 
        # Maps {sorted(revision_id, revision_id): heads}
 | 
|
66  | 
self._known_heads = {}  | 
|
| 
4371.3.19
by John Arbash Meinel
 Get an initial pyrex implementation.  | 
67  | 
self.do_cache = do_cache  | 
| 
4371.3.18
by John Arbash Meinel
 Change VF.annotate to use the new KnownGraph code.  | 
68  | 
self._initialize_nodes(parent_map)  | 
| 
4371.3.19
by John Arbash Meinel
 Get an initial pyrex implementation.  | 
69  | 
self._find_gdfo()  | 
| 
4371.3.18
by John Arbash Meinel
 Change VF.annotate to use the new KnownGraph code.  | 
70  | 
|
71  | 
def _initialize_nodes(self, parent_map):  | 
|
72  | 
"""Populate self._nodes.  | 
|
73  | 
||
| 
4371.4.9
by Vincent Ladeuil
 Simplify gdfo computing by finding tails when at graph build time.  | 
74  | 
        After this has finished:
 | 
75  | 
        - self._nodes will have an entry for every entry in parent_map.
 | 
|
76  | 
        - ghosts will have a parent_keys = None,
 | 
|
77  | 
        - all nodes found will also have .child_keys populated with all known
 | 
|
78  | 
          child_keys,
 | 
|
| 
4371.3.18
by John Arbash Meinel
 Change VF.annotate to use the new KnownGraph code.  | 
79  | 
        """
 | 
80  | 
nodes = self._nodes  | 
|
81  | 
for key, parent_keys in parent_map.iteritems():  | 
|
82  | 
if key in nodes:  | 
|
83  | 
node = nodes[key]  | 
|
84  | 
node.parent_keys = parent_keys  | 
|
85  | 
else:  | 
|
86  | 
node = _KnownGraphNode(key, parent_keys)  | 
|
87  | 
nodes[key] = node  | 
|
88  | 
for parent_key in parent_keys:  | 
|
89  | 
try:  | 
|
90  | 
parent_node = nodes[parent_key]  | 
|
91  | 
except KeyError:  | 
|
92  | 
parent_node = _KnownGraphNode(parent_key, None)  | 
|
93  | 
nodes[parent_key] = parent_node  | 
|
94  | 
parent_node.child_keys.append(key)  | 
|
95  | 
||
| 
4454.3.78
by John Arbash Meinel
 Found a bug in the pure-python KnownGraph code.  | 
96  | 
def _find_tails(self):  | 
97  | 
return [node for node in self._nodes.itervalues()  | 
|
98  | 
if not node.parent_keys]  | 
|
99  | 
||
| 
4641.5.4
by John Arbash Meinel
 Implement a basic algorithm for the pure-python KnownGraph code.  | 
100  | 
def _find_tips(self):  | 
101  | 
return [node for node in self._nodes.itervalues()  | 
|
102  | 
if not node.child_keys]  | 
|
103  | 
||
| 
4371.3.18
by John Arbash Meinel
 Change VF.annotate to use the new KnownGraph code.  | 
104  | 
def _find_gdfo(self):  | 
| 
4371.4.1
by Vincent Ladeuil
 A new recursive gdfo processing method.  | 
105  | 
nodes = self._nodes  | 
| 
4371.4.10
by Vincent Ladeuil
 Cleanup.  | 
106  | 
known_parent_gdfos = {}  | 
| 
4371.4.1
by Vincent Ladeuil
 A new recursive gdfo processing method.  | 
107  | 
pending = []  | 
108  | 
||
| 
4454.3.78
by John Arbash Meinel
 Found a bug in the pure-python KnownGraph code.  | 
109  | 
for node in self._find_tails():  | 
| 
4371.4.9
by Vincent Ladeuil
 Simplify gdfo computing by finding tails when at graph build time.  | 
110  | 
node.gdfo = 1  | 
111  | 
pending.append(node)  | 
|
112  | 
||
| 
4371.4.8
by Vincent Ladeuil
 Replace the existing KnownGraph._find_gdfo.  | 
113  | 
while pending:  | 
114  | 
node = pending.pop()  | 
|
| 
4371.4.1
by Vincent Ladeuil
 A new recursive gdfo processing method.  | 
115  | 
for child_key in node.child_keys:  | 
116  | 
child = nodes[child_key]  | 
|
| 
4371.4.19
by John Arbash Meinel
 Update the python code to do the same checking around known_parent_gdfos being present.  | 
117  | 
if child_key in known_parent_gdfos:  | 
118  | 
known_gdfo = known_parent_gdfos[child_key] + 1  | 
|
119  | 
present = True  | 
|
120  | 
else:  | 
|
121  | 
known_gdfo = 1  | 
|
122  | 
present = False  | 
|
| 
4371.4.1
by Vincent Ladeuil
 A new recursive gdfo processing method.  | 
123  | 
if child.gdfo is None or node.gdfo + 1 > child.gdfo:  | 
124  | 
child.gdfo = node.gdfo + 1  | 
|
| 
4371.4.19
by John Arbash Meinel
 Update the python code to do the same checking around known_parent_gdfos being present.  | 
125  | 
if known_gdfo == len(child.parent_keys):  | 
| 
4371.4.1
by Vincent Ladeuil
 A new recursive gdfo processing method.  | 
126  | 
                    # We are the last parent updating that node, we can
 | 
127  | 
                    # continue from there
 | 
|
| 
4371.4.2
by Vincent Ladeuil
 Same gdfo processing, without recursion.  | 
128  | 
pending.append(child)  | 
| 
4371.4.19
by John Arbash Meinel
 Update the python code to do the same checking around known_parent_gdfos being present.  | 
129  | 
if present:  | 
130  | 
del known_parent_gdfos[child_key]  | 
|
131  | 
else:  | 
|
132  | 
                    # Update known_parent_gdfos for a key we couldn't process
 | 
|
133  | 
known_parent_gdfos[child_key] = known_gdfo  | 
|
| 
4371.4.1
by Vincent Ladeuil
 A new recursive gdfo processing method.  | 
134  | 
|
| 
4371.3.18
by John Arbash Meinel
 Change VF.annotate to use the new KnownGraph code.  | 
135  | 
def heads(self, keys):  | 
136  | 
"""Return the heads from amongst keys.  | 
|
137  | 
||
138  | 
        This is done by searching the ancestries of each key.  Any key that is
 | 
|
139  | 
        reachable from another key is not returned; all the others are.
 | 
|
140  | 
||
| 
4371.4.3
by Vincent Ladeuil
 A new heads() method based on gdfo.  | 
141  | 
        This operation scales with the relative depth between any two keys. It
 | 
142  | 
        uses gdfo to avoid walking all ancestry.
 | 
|
143  | 
||
144  | 
        :param keys: An iterable of keys.
 | 
|
145  | 
        :return: A set of the heads. Note that as a set there is no ordering
 | 
|
146  | 
            information. Callers will need to filter their input to create
 | 
|
147  | 
            order if they need it.
 | 
|
148  | 
        """
 | 
|
149  | 
candidate_nodes = dict((key, self._nodes[key]) for key in keys)  | 
|
150  | 
if revision.NULL_REVISION in candidate_nodes:  | 
|
151  | 
            # NULL_REVISION is only a head if it is the only entry
 | 
|
152  | 
candidate_nodes.pop(revision.NULL_REVISION)  | 
|
153  | 
if not candidate_nodes:  | 
|
154  | 
return frozenset([revision.NULL_REVISION])  | 
|
155  | 
if len(candidate_nodes) < 2:  | 
|
156  | 
            # No or only one candidate
 | 
|
157  | 
return frozenset(candidate_nodes)  | 
|
158  | 
heads_key = frozenset(candidate_nodes)  | 
|
159  | 
        # Do we have a cached result ?
 | 
|
160  | 
try:  | 
|
161  | 
heads = self._known_heads[heads_key]  | 
|
162  | 
return heads  | 
|
163  | 
except KeyError:  | 
|
164  | 
            pass
 | 
|
165  | 
        # Let's compute the heads
 | 
|
| 
4371.4.4
by Vincent Ladeuil
 Feedback from John, 'seen' is a set().  | 
166  | 
seen = set()  | 
| 
4371.4.3
by Vincent Ladeuil
 A new heads() method based on gdfo.  | 
167  | 
pending = []  | 
168  | 
min_gdfo = None  | 
|
169  | 
for node in candidate_nodes.values():  | 
|
| 
4371.4.7
by Vincent Ladeuil
 Simple Pyrex version for the new heads() method.  | 
170  | 
if node.parent_keys:  | 
| 
4371.4.3
by Vincent Ladeuil
 A new heads() method based on gdfo.  | 
171  | 
pending.extend(node.parent_keys)  | 
172  | 
if min_gdfo is None or node.gdfo < min_gdfo:  | 
|
173  | 
min_gdfo = node.gdfo  | 
|
174  | 
nodes = self._nodes  | 
|
175  | 
while pending:  | 
|
176  | 
node_key = pending.pop()  | 
|
177  | 
if node_key in seen:  | 
|
178  | 
                # node already appears in some ancestry
 | 
|
179  | 
                continue
 | 
|
| 
4371.4.4
by Vincent Ladeuil
 Feedback from John, 'seen' is a set().  | 
180  | 
seen.add(node_key)  | 
| 
4371.4.3
by Vincent Ladeuil
 A new heads() method based on gdfo.  | 
181  | 
node = nodes[node_key]  | 
182  | 
if node.gdfo <= min_gdfo:  | 
|
183  | 
                continue
 | 
|
| 
4371.4.7
by Vincent Ladeuil
 Simple Pyrex version for the new heads() method.  | 
184  | 
if node.parent_keys:  | 
| 
4371.4.3
by Vincent Ladeuil
 A new heads() method based on gdfo.  | 
185  | 
pending.extend(node.parent_keys)  | 
| 
4371.4.4
by Vincent Ladeuil
 Feedback from John, 'seen' is a set().  | 
186  | 
heads = heads_key.difference(seen)  | 
| 
4371.4.3
by Vincent Ladeuil
 A new heads() method based on gdfo.  | 
187  | 
if self.do_cache:  | 
188  | 
self._known_heads[heads_key] = heads  | 
|
189  | 
return heads  | 
|
190  | 
||
| 
4593.5.2
by John Arbash Meinel
 Implement a very basic version of KnownGraph.topo_sort that just  | 
191  | 
def topo_sort(self):  | 
| 
4593.5.30
by John Arbash Meinel
 Move the topo_sort implementation into KnownGraph, rather than calling back to it.  | 
192  | 
"""Return the nodes in topological order.  | 
193  | 
||
194  | 
        All parents must occur before all children.
 | 
|
195  | 
        """
 | 
|
196  | 
for node in self._nodes.itervalues():  | 
|
197  | 
if node.gdfo is None:  | 
|
198  | 
raise errors.GraphCycleError(self._nodes)  | 
|
199  | 
pending = self._find_tails()  | 
|
200  | 
pending_pop = pending.pop  | 
|
201  | 
pending_append = pending.append  | 
|
202  | 
||
203  | 
topo_order = []  | 
|
204  | 
topo_order_append = topo_order.append  | 
|
205  | 
||
206  | 
num_seen_parents = dict.fromkeys(self._nodes, 0)  | 
|
207  | 
while pending:  | 
|
208  | 
node = pending_pop()  | 
|
209  | 
if node.parent_keys is not None:  | 
|
210  | 
                # We don't include ghost parents
 | 
|
211  | 
topo_order_append(node.key)  | 
|
212  | 
for child_key in node.child_keys:  | 
|
213  | 
child_node = self._nodes[child_key]  | 
|
214  | 
seen_parents = num_seen_parents[child_key] + 1  | 
|
215  | 
if seen_parents == len(child_node.parent_keys):  | 
|
216  | 
                    # All parents have been processed, enqueue this child
 | 
|
217  | 
pending_append(child_node)  | 
|
218  | 
                    # This has been queued up, stop tracking it
 | 
|
219  | 
del num_seen_parents[child_key]  | 
|
220  | 
else:  | 
|
221  | 
num_seen_parents[child_key] = seen_parents  | 
|
222  | 
        # We started from the parents, so we don't need to do anymore work
 | 
|
223  | 
return topo_order  | 
|
| 
4593.5.6
by John Arbash Meinel
 Create the crude merge_sort implementation that just thunks over to the old implementation.  | 
224  | 
|
| 
4641.5.2
by John Arbash Meinel
 Get ready to move the tests into KnownGraph implementation tests.  | 
225  | 
def gc_sort(self):  | 
226  | 
"""Return a reverse topological ordering which is 'stable'.  | 
|
227  | 
||
228  | 
        There are a few constraints:
 | 
|
229  | 
          1) Reverse topological (all children before all parents)
 | 
|
230  | 
          2) Grouped by prefix
 | 
|
231  | 
          3) 'stable' sorting, so that we get the same result, independent of
 | 
|
232  | 
             machine, or extra data.
 | 
|
233  | 
        To do this, we use the same basic algorithm as topo_sort, but when we
 | 
|
234  | 
        aren't sure what node to access next, we sort them lexicographically.
 | 
|
235  | 
        """
 | 
|
| 
4641.5.4
by John Arbash Meinel
 Implement a basic algorithm for the pure-python KnownGraph code.  | 
236  | 
tips = self._find_tips()  | 
237  | 
        # Split the tips based on prefix
 | 
|
238  | 
prefix_tips = {}  | 
|
239  | 
for node in tips:  | 
|
240  | 
if node.key.__class__ is str or len(node.key) == 1:  | 
|
241  | 
prefix = ''  | 
|
242  | 
else:  | 
|
243  | 
prefix = node.key[0]  | 
|
244  | 
prefix_tips.setdefault(prefix, []).append(node)  | 
|
245  | 
||
246  | 
num_seen_children = dict.fromkeys(self._nodes, 0)  | 
|
247  | 
||
248  | 
result = []  | 
|
249  | 
for prefix in sorted(prefix_tips):  | 
|
250  | 
pending = sorted(prefix_tips[prefix], key=lambda n:n.key,  | 
|
251  | 
reverse=True)  | 
|
252  | 
while pending:  | 
|
253  | 
node = pending.pop()  | 
|
| 
4641.5.6
by John Arbash Meinel
 Add support for skipping ghost nodes.  | 
254  | 
if node.parent_keys is None:  | 
255  | 
                    # Ghost node, skip it
 | 
|
256  | 
                    continue
 | 
|
| 
4641.5.4
by John Arbash Meinel
 Implement a basic algorithm for the pure-python KnownGraph code.  | 
257  | 
result.append(node.key)  | 
| 
4641.5.5
by John Arbash Meinel
 Implement initial gc_sort in pyrex.  | 
258  | 
for parent_key in sorted(node.parent_keys, reverse=True):  | 
| 
4641.5.4
by John Arbash Meinel
 Implement a basic algorithm for the pure-python KnownGraph code.  | 
259  | 
parent_node = self._nodes[parent_key]  | 
260  | 
seen_children = num_seen_children[parent_key] + 1  | 
|
261  | 
if seen_children == len(parent_node.child_keys):  | 
|
262  | 
                        # All children have been processed, enqueue this parent
 | 
|
263  | 
pending.append(parent_node)  | 
|
264  | 
                        # This has been queued up, stop tracking it
 | 
|
265  | 
del num_seen_children[parent_key]  | 
|
266  | 
else:  | 
|
| 
4641.5.5
by John Arbash Meinel
 Implement initial gc_sort in pyrex.  | 
267  | 
num_seen_children[parent_key] = seen_children  | 
| 
4641.5.4
by John Arbash Meinel
 Implement a basic algorithm for the pure-python KnownGraph code.  | 
268  | 
return result  | 
| 
4641.5.2
by John Arbash Meinel
 Get ready to move the tests into KnownGraph implementation tests.  | 
269  | 
|
| 
4593.5.6
by John Arbash Meinel
 Create the crude merge_sort implementation that just thunks over to the old implementation.  | 
270  | 
def merge_sort(self, tip_key):  | 
271  | 
"""Compute the merge sorted graph output."""  | 
|
| 
4593.5.33
by John Arbash Meinel
 Late import tsort because it causes an import cycle.  | 
272  | 
from bzrlib import tsort  | 
| 
4593.5.6
by John Arbash Meinel
 Create the crude merge_sort implementation that just thunks over to the old implementation.  | 
273  | 
as_parent_map = dict((node.key, node.parent_keys)  | 
| 
4593.5.25
by John Arbash Meinel
 Add tests that we detect GraphCycleError  | 
274  | 
for node in self._nodes.itervalues()  | 
275  | 
if node.parent_keys is not None)  | 
|
| 
4593.5.6
by John Arbash Meinel
 Create the crude merge_sort implementation that just thunks over to the old implementation.  | 
276  | 
        # We intentionally always generate revnos and never force the
 | 
277  | 
        # mainline_revisions
 | 
|
| 
4593.5.32
by John Arbash Meinel
 With the new api, we don't return 'sequence_number' anymore.  | 
278  | 
        # Strip the sequence_number that merge_sort generates
 | 
| 
4593.5.34
by John Arbash Meinel
 Change the KnownGraph.merge_sort api.  | 
279  | 
return [_MergeSortNode(key, merge_depth, revno, end_of_merge)  | 
280  | 
for _, key, merge_depth, revno, end_of_merge  | 
|
281  | 
in tsort.merge_sort(as_parent_map, tip_key,  | 
|
282  | 
mainline_revisions=None,  | 
|
283  | 
generate_revno=True)]  | 
|
| 
4648.2.1
by Gary van der Merwe
 Add get_parent_keys, and get_child_keys to KnownGraph.  | 
284  | 
|
285  | 
def get_parent_keys(self, key):  | 
|
286  | 
"""Get the parents for a key  | 
|
287  | 
        
 | 
|
288  | 
        Returns a list containg the parents keys. If the key is a ghost,
 | 
|
289  | 
        None is returned. A KeyError will be raised if the key is not in
 | 
|
290  | 
        the graph.
 | 
|
291  | 
        
 | 
|
292  | 
        :param keys: Key to check (eg revision_id)
 | 
|
293  | 
        :return: A list of parents
 | 
|
294  | 
        """
 | 
|
295  | 
return self._nodes[key].parent_keys  | 
|
296  | 
||
297  | 
def get_child_keys(self, key):  | 
|
298  | 
"""Get the children for a key  | 
|
299  | 
        
 | 
|
300  | 
        Returns a list containg the children keys. A KeyError will be raised
 | 
|
301  | 
        if the key is not in the graph.
 | 
|
302  | 
        
 | 
|
303  | 
        :param keys: Key to check (eg revision_id)
 | 
|
304  | 
        :return: A list of children
 | 
|
305  | 
        """
 | 
|
306  | 
return self._nodes[key].child_keys  |