bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
1 |
# Copyright (C) 2009 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Implementation of Graph algorithms when we have already loaded everything.
|
|
18 |
"""
|
|
19 |
||
20 |
import heapq |
|
|
4371.3.19
by John Arbash Meinel
Get an initial pyrex implementation. |
21 |
|
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
22 |
from bzrlib import ( |
23 |
revision, |
|
24 |
)
|
|
25 |
||
26 |
||
27 |
class _KnownGraphNode(object): |
|
28 |
"""Represents a single object in the known graph.""" |
|
29 |
||
30 |
__slots__ = ('key', 'parent_keys', 'child_keys', 'linear_dominator', |
|
|
4371.3.48
by John Arbash Meinel
Clean out some asserts, get rid of the 'dominator_distance' field which isn't used anyway. |
31 |
'gdfo', 'ancestor_of') |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
32 |
|
33 |
def __init__(self, key, parent_keys): |
|
34 |
self.key = key |
|
35 |
self.parent_keys = parent_keys |
|
36 |
self.child_keys = [] |
|
37 |
# oldest ancestor, such that no parents between here and there have >1
|
|
38 |
# child or >1 parent.
|
|
39 |
self.linear_dominator = None |
|
40 |
# Greatest distance from origin
|
|
41 |
self.gdfo = None |
|
42 |
# This will become a tuple of known heads that have this node as an
|
|
43 |
# ancestor
|
|
44 |
self.ancestor_of = None |
|
45 |
||
46 |
def __repr__(self): |
|
|
4371.3.48
by John Arbash Meinel
Clean out some asserts, get rid of the 'dominator_distance' field which isn't used anyway. |
47 |
return '%s(%s gdfo:%s par:%s child:%s %s)' % ( |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
48 |
self.__class__.__name__, self.key, self.gdfo, |
49 |
self.parent_keys, self.child_keys, |
|
|
4371.3.48
by John Arbash Meinel
Clean out some asserts, get rid of the 'dominator_distance' field which isn't used anyway. |
50 |
self.linear_dominator) |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
51 |
|
52 |
||
53 |
class KnownGraph(object): |
|
54 |
"""This is a class which assumes we already know the full graph.""" |
|
55 |
||
56 |
def __init__(self, parent_map, do_cache=True): |
|
57 |
"""Create a new KnownGraph instance. |
|
58 |
||
59 |
:param parent_map: A dictionary mapping key => parent_keys
|
|
60 |
"""
|
|
61 |
self._nodes = {} |
|
62 |
# Maps {sorted(revision_id, revision_id): heads}
|
|
63 |
self._known_heads = {} |
|
|
4371.3.19
by John Arbash Meinel
Get an initial pyrex implementation. |
64 |
self.do_cache = do_cache |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
65 |
self._initialize_nodes(parent_map) |
|
4371.3.19
by John Arbash Meinel
Get an initial pyrex implementation. |
66 |
self._find_linear_dominators() |
67 |
self._find_gdfo() |
|
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
68 |
|
69 |
def _initialize_nodes(self, parent_map): |
|
70 |
"""Populate self._nodes. |
|
71 |
||
72 |
After this has finished, self._nodes will have an entry for every entry
|
|
73 |
in parent_map. Ghosts will have a parent_keys = None, all nodes found
|
|
74 |
will also have .child_keys populated with all known child_keys.
|
|
75 |
"""
|
|
76 |
nodes = self._nodes |
|
77 |
for key, parent_keys in parent_map.iteritems(): |
|
78 |
if key in nodes: |
|
79 |
node = nodes[key] |
|
80 |
node.parent_keys = parent_keys |
|
81 |
else: |
|
82 |
node = _KnownGraphNode(key, parent_keys) |
|
83 |
nodes[key] = node |
|
84 |
for parent_key in parent_keys: |
|
85 |
try: |
|
86 |
parent_node = nodes[parent_key] |
|
87 |
except KeyError: |
|
88 |
parent_node = _KnownGraphNode(parent_key, None) |
|
89 |
nodes[parent_key] = parent_node |
|
90 |
parent_node.child_keys.append(key) |
|
91 |
||
92 |
def _find_linear_dominators(self): |
|
93 |
"""For each node in the set, find any linear dominators. |
|
94 |
||
95 |
For any given node, the 'linear dominator' is an ancestor, such that
|
|
96 |
all parents between this node and that one have a single parent, and a
|
|
97 |
single child. So if A->B->C->D then B,C,D all have a linear dominator
|
|
|
4371.3.41
by John Arbash Meinel
Add a bit of discussion about why we would want to use linear dominators. |
98 |
of A.
|
99 |
||
100 |
There are two main benefits:
|
|
101 |
1) When walking the graph, we can jump to the nearest linear dominator,
|
|
102 |
rather than walking all of the nodes inbetween.
|
|
103 |
2) When caching heads() results, dominators give the "same" results as
|
|
104 |
their children. (If the dominator is a head, then the descendant is
|
|
105 |
a head, if the dominator is not a head, then the child isn't
|
|
106 |
either.)
|
|
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
107 |
"""
|
108 |
def check_node(node): |
|
109 |
if node.parent_keys is None or len(node.parent_keys) != 1: |
|
110 |
# This node is either a ghost, a tail, or has multiple parents
|
|
111 |
# It its own dominator
|
|
112 |
node.linear_dominator = node.key |
|
113 |
return None |
|
114 |
parent_node = self._nodes[node.parent_keys[0]] |
|
115 |
if len(parent_node.child_keys) > 1: |
|
116 |
# The parent has multiple children, so *this* node is the
|
|
117 |
# dominator
|
|
118 |
node.linear_dominator = node.key |
|
119 |
return None |
|
120 |
# The parent is already filled in, so add and continue
|
|
121 |
if parent_node.linear_dominator is not None: |
|
122 |
node.linear_dominator = parent_node.linear_dominator |
|
123 |
return None |
|
124 |
# We don't know this node, or its parent node, so start walking to
|
|
125 |
# next
|
|
126 |
return parent_node |
|
127 |
||
128 |
for node in self._nodes.itervalues(): |
|
129 |
# The parent is not filled in, so walk until we get somewhere
|
|
130 |
if node.linear_dominator is not None: #already done |
|
131 |
continue
|
|
132 |
next_node = check_node(node) |
|
133 |
if next_node is None: |
|
134 |
# Nothing more needs to be done
|
|
135 |
continue
|
|
136 |
stack = [] |
|
137 |
while next_node is not None: |
|
138 |
stack.append(node) |
|
139 |
node = next_node |
|
140 |
next_node = check_node(node) |
|
141 |
# The stack now contains the linear chain, and 'node' should have
|
|
142 |
# been labeled
|
|
143 |
dominator = node.linear_dominator |
|
144 |
while stack: |
|
145 |
next_node = stack.pop() |
|
146 |
next_node.linear_dominator = dominator |
|
147 |
node = next_node |
|
148 |
||
149 |
def _find_gdfo(self): |
|
150 |
def find_tails(): |
|
151 |
return [node for node in self._nodes.itervalues() |
|
152 |
if not node.parent_keys] |
|
153 |
tails = find_tails() |
|
154 |
todo = [] |
|
155 |
heappush = heapq.heappush |
|
156 |
heappop = heapq.heappop |
|
157 |
nodes = self._nodes |
|
158 |
for node in tails: |
|
159 |
node.gdfo = 1 |
|
160 |
heappush(todo, (1, node)) |
|
161 |
processed = 0 |
|
162 |
while todo: |
|
163 |
gdfo, next = heappop(todo) |
|
164 |
processed += 1 |
|
165 |
if next.gdfo is not None and gdfo < next.gdfo: |
|
166 |
# This node was reached from a longer path, we assume it was
|
|
167 |
# enqued correctly with the longer gdfo, so don't continue
|
|
168 |
# processing now
|
|
169 |
continue
|
|
170 |
next_gdfo = gdfo + 1 |
|
171 |
for child_key in next.child_keys: |
|
172 |
child_node = nodes[child_key] |
|
173 |
if child_node.gdfo is None or child_node.gdfo < next_gdfo: |
|
174 |
# Only enque children when all of their parents have been
|
|
175 |
# resolved
|
|
176 |
for parent_key in child_node.parent_keys: |
|
177 |
# We know that 'this' parent is counted
|
|
178 |
if parent_key != next.key: |
|
179 |
parent_node = nodes[parent_key] |
|
180 |
if parent_node.gdfo is None: |
|
181 |
break
|
|
182 |
else: |
|
183 |
child_node.gdfo = next_gdfo |
|
184 |
heappush(todo, (next_gdfo, child_node)) |
|
185 |
||
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
186 |
def _get_dominators_to_nodes(self, candidate_nodes): |
187 |
"""Get the reverse mapping from dominator_key => candidate_nodes. |
|
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
188 |
|
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
189 |
As a side effect, this can also remove potential candidate nodes if we
|
190 |
determine that they share a dominator.
|
|
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
191 |
"""
|
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
192 |
dom_to_node = {} |
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
193 |
keys_to_remove = [] |
194 |
for node in candidate_nodes.values(): |
|
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
195 |
if node.linear_dominator in dom_to_node: |
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
196 |
# This node already exists, resolve which node supersedes the
|
197 |
# other
|
|
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
198 |
other_node = dom_to_node[node.linear_dominator] |
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
199 |
# There should be no way that nodes sharing a dominator could
|
200 |
# 'tie' for gdfo
|
|
201 |
if other_node.gdfo > node.gdfo: |
|
202 |
# The other node has this node as an ancestor
|
|
203 |
keys_to_remove.append(node.key) |
|
204 |
else: |
|
205 |
# Replace the other node, and set this as the new key
|
|
206 |
keys_to_remove.append(other_node.key) |
|
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
207 |
dom_to_node[node.linear_dominator] = node |
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
208 |
else: |
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
209 |
dom_to_node[node.linear_dominator] = node |
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
210 |
for key in keys_to_remove: |
211 |
candidate_nodes.pop(key) |
|
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
212 |
return dom_to_node |
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
213 |
|
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
214 |
def heads(self, keys): |
215 |
"""Return the heads from amongst keys. |
|
216 |
||
217 |
This is done by searching the ancestries of each key. Any key that is
|
|
218 |
reachable from another key is not returned; all the others are.
|
|
219 |
||
220 |
This operation scales with the relative depth between any two keys. If
|
|
221 |
any two keys are completely disconnected all ancestry of both sides
|
|
222 |
will be retrieved.
|
|
223 |
||
224 |
:param keys: An iterable of keys.
|
|
225 |
:return: A set of the heads. Note that as a set there is no ordering
|
|
226 |
information. Callers will need to filter their input to create
|
|
227 |
order if they need it.
|
|
228 |
"""
|
|
229 |
candidate_nodes = dict((key, self._nodes[key]) for key in keys) |
|
230 |
if revision.NULL_REVISION in candidate_nodes: |
|
231 |
# NULL_REVISION is only a head if it is the only entry
|
|
232 |
candidate_nodes.pop(revision.NULL_REVISION) |
|
233 |
if not candidate_nodes: |
|
234 |
return set([revision.NULL_REVISION]) |
|
235 |
if len(candidate_nodes) < 2: |
|
236 |
return frozenset(candidate_nodes) |
|
237 |
heads_key = frozenset(candidate_nodes) |
|
238 |
if heads_key != frozenset(keys): |
|
239 |
note('%s != %s', heads_key, frozenset(keys)) |
|
240 |
try: |
|
241 |
heads = self._known_heads[heads_key] |
|
242 |
return heads |
|
243 |
except KeyError: |
|
244 |
pass # compute it ourselves |
|
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
245 |
dom_to_node = self._get_dominators_to_nodes(candidate_nodes) |
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
246 |
if len(candidate_nodes) < 2: |
247 |
# We shrunk candidate_nodes and determined a new head
|
|
248 |
return frozenset(candidate_nodes) |
|
249 |
dom_heads_key = None |
|
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
250 |
# Check the linear dominators of these keys, to see if we already
|
251 |
# know the heads answer
|
|
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
252 |
dom_heads_key = frozenset([node.linear_dominator |
253 |
for node in candidate_nodes.itervalues()]) |
|
254 |
if dom_heads_key in self._known_heads: |
|
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
255 |
# map back into the original keys
|
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
256 |
heads = self._known_heads[dom_heads_key] |
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
257 |
heads = frozenset([dom_to_node[key].key for key in heads]) |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
258 |
return heads |
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
259 |
heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_node) |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
260 |
if self.do_cache: |
261 |
self._known_heads[heads_key] = heads |
|
262 |
# Cache the dominator heads
|
|
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
263 |
if dom_heads_key is not None: |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
264 |
dom_heads = frozenset([candidate_nodes[key].linear_dominator |
265 |
for key in heads]) |
|
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
266 |
self._known_heads[dom_heads_key] = dom_heads |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
267 |
return heads |
268 |
||
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
269 |
def _heads_from_candidate_nodes(self, candidate_nodes, dom_to_node): |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
270 |
queue = [] |
271 |
to_cleanup = [] |
|
272 |
to_cleanup_append = to_cleanup.append |
|
273 |
for node in candidate_nodes.itervalues(): |
|
274 |
node.ancestor_of = (node.key,) |
|
275 |
queue.append((-node.gdfo, node)) |
|
276 |
to_cleanup_append(node) |
|
277 |
heapq.heapify(queue) |
|
278 |
# These are nodes that we determined are 'common' that we are no longer
|
|
279 |
# walking
|
|
280 |
# Now we walk nodes until all nodes that are being walked are 'common'
|
|
281 |
num_candidates = len(candidate_nodes) |
|
282 |
nodes = self._nodes |
|
283 |
heappop = heapq.heappop |
|
284 |
heappush = heapq.heappush |
|
285 |
while queue and len(candidate_nodes) > 1: |
|
|
4371.3.19
by John Arbash Meinel
Get an initial pyrex implementation. |
286 |
_, node = heappop(queue) |
287 |
next_ancestor_of = node.ancestor_of |
|
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
288 |
if len(next_ancestor_of) == num_candidates: |
289 |
# This node is now considered 'common'
|
|
290 |
# Make sure all parent nodes are marked as such
|
|
291 |
for parent_key in node.parent_keys: |
|
292 |
parent_node = nodes[parent_key] |
|
293 |
if parent_node.ancestor_of is not None: |
|
294 |
parent_node.ancestor_of = next_ancestor_of |
|
295 |
if node.linear_dominator != node.key: |
|
296 |
parent_node = nodes[node.linear_dominator] |
|
297 |
if parent_node.ancestor_of is not None: |
|
298 |
parent_node.ancestor_of = next_ancestor_of |
|
299 |
continue
|
|
|
4371.3.19
by John Arbash Meinel
Get an initial pyrex implementation. |
300 |
if node.parent_keys is None: |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
301 |
# This is a ghost
|
302 |
continue
|
|
303 |
# Now project the current nodes ancestor list to the parent nodes,
|
|
304 |
# and queue them up to be walked
|
|
305 |
# Note: using linear_dominator speeds things up quite a bit
|
|
306 |
# enough that we actually start to be slightly faster
|
|
307 |
# than the default heads() implementation
|
|
|
4371.3.19
by John Arbash Meinel
Get an initial pyrex implementation. |
308 |
if node.linear_dominator != node.key: |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
309 |
# We are at the tip of a long linear region
|
310 |
# We know that there is nothing between here and the tail
|
|
311 |
# that is interesting, so skip to the end
|
|
|
4371.3.19
by John Arbash Meinel
Get an initial pyrex implementation. |
312 |
parent_keys = [node.linear_dominator] |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
313 |
else: |
|
4371.3.19
by John Arbash Meinel
Get an initial pyrex implementation. |
314 |
parent_keys = node.parent_keys |
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
315 |
for parent_key in parent_keys: |
316 |
if parent_key in candidate_nodes: |
|
317 |
candidate_nodes.pop(parent_key) |
|
318 |
if len(candidate_nodes) <= 1: |
|
319 |
break
|
|
|
4371.3.40
by John Arbash Meinel
Fixes for linear_nodes in the pyrex version. |
320 |
elif parent_key in dom_to_node: |
321 |
orig_node = dom_to_node[parent_key] |
|
322 |
if orig_node is not node: |
|
323 |
if orig_node.key in candidate_nodes: |
|
324 |
candidate_nodes.pop(orig_node.key) |
|
|
4371.3.39
by John Arbash Meinel
Implement the fix for the python version. |
325 |
if len(candidate_nodes) <= 1: |
326 |
break
|
|
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
327 |
parent_node = nodes[parent_key] |
328 |
ancestor_of = parent_node.ancestor_of |
|
329 |
if ancestor_of is None: |
|
330 |
# This node hasn't been walked yet
|
|
331 |
parent_node.ancestor_of = next_ancestor_of |
|
332 |
# Enqueue this node
|
|
333 |
heappush(queue, (-parent_node.gdfo, parent_node)) |
|
334 |
to_cleanup_append(parent_node) |
|
335 |
elif ancestor_of != next_ancestor_of: |
|
336 |
# Combine to get the full set of parents
|
|
337 |
all_ancestors = set(ancestor_of) |
|
338 |
all_ancestors.update(next_ancestor_of) |
|
339 |
parent_node.ancestor_of = tuple(sorted(all_ancestors)) |
|
340 |
def cleanup(): |
|
341 |
for node in to_cleanup: |
|
342 |
node.ancestor_of = None |
|
343 |
cleanup() |
|
344 |
return frozenset(candidate_nodes) |