1
# Copyright (C) 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
20
from __future__ import absolute_import
33
class _KnownGraphNode(object):
34
"""Represents a single object in the known graph."""
36
__slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
38
def __init__(self, key, parent_keys):
40
self.parent_keys = parent_keys
42
# Greatest distance from origin
46
return '%s(%s gdfo:%s par:%s child:%s)' % (
47
self.__class__.__name__, self.key, self.gdfo,
48
self.parent_keys, self.child_keys)
51
class _MergeSortNode(object):
52
"""Information about a specific node in the merge graph."""
54
__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
56
def __init__(self, key, merge_depth, revno, end_of_merge):
58
self.merge_depth = merge_depth
60
self.end_of_merge = end_of_merge
63
class KnownGraph(object):
64
"""This is a class which assumes we already know the full graph."""
66
def __init__(self, parent_map, do_cache=True):
67
"""Create a new KnownGraph instance.
69
:param parent_map: A dictionary mapping key => parent_keys
72
# Maps {frozenset(revision_id, revision_id): heads}
73
self._known_heads = {}
74
self.do_cache = do_cache
75
self._initialize_nodes(parent_map)
78
def _initialize_nodes(self, parent_map):
79
"""Populate self._nodes.
81
After this has finished:
82
- self._nodes will have an entry for every entry in parent_map.
83
- ghosts will have a parent_keys = None,
84
- all nodes found will also have .child_keys populated with all known
88
for key, parent_keys in viewitems(parent_map):
91
node.parent_keys = parent_keys
93
node = _KnownGraphNode(key, parent_keys)
95
for parent_key in parent_keys:
97
parent_node = nodes[parent_key]
99
parent_node = _KnownGraphNode(parent_key, None)
100
nodes[parent_key] = parent_node
101
parent_node.child_keys.append(key)
103
def _find_tails(self):
104
return [node for node in viewvalues(self._nodes)
105
if not node.parent_keys]
107
def _find_tips(self):
108
return [node for node in viewvalues(self._nodes)
109
if not node.child_keys]
111
def _find_gdfo(self):
113
known_parent_gdfos = {}
116
for node in self._find_tails():
122
for child_key in node.child_keys:
123
child = nodes[child_key]
124
if child_key in known_parent_gdfos:
125
known_gdfo = known_parent_gdfos[child_key] + 1
130
if child.gdfo is None or node.gdfo + 1 > child.gdfo:
131
child.gdfo = node.gdfo + 1
132
if known_gdfo == len(child.parent_keys):
133
# We are the last parent updating that node, we can
134
# continue from there
135
pending.append(child)
137
del known_parent_gdfos[child_key]
139
# Update known_parent_gdfos for a key we couldn't process
140
known_parent_gdfos[child_key] = known_gdfo
142
def add_node(self, key, parent_keys):
143
"""Add a new node to the graph.
145
If this fills in a ghost, then the gdfos of all children will be
148
:param key: The node being added. If this is a duplicate, this is a
150
:param parent_keys: The parents of the given node.
151
:return: None (should we return if this was a ghost, etc?)
156
if node.parent_keys is None:
157
node.parent_keys = parent_keys
158
# A ghost is being added, we can no-longer trust the heads
160
self._known_heads.clear()
162
# Make sure we compare a list to a list, as tuple != list.
163
parent_keys = list(parent_keys)
164
existing_parent_keys = list(node.parent_keys)
165
if parent_keys == existing_parent_keys:
166
return # Identical content
168
raise ValueError('Parent key mismatch, existing node %s'
169
' has parents of %s not %s'
170
% (key, existing_parent_keys, parent_keys))
172
node = _KnownGraphNode(key, parent_keys)
175
for parent_key in parent_keys:
177
parent_node = nodes[parent_key]
179
parent_node = _KnownGraphNode(parent_key, None)
180
# Ghosts and roots have gdfo 1
182
nodes[parent_key] = parent_node
183
if parent_gdfo < parent_node.gdfo:
184
parent_gdfo = parent_node.gdfo
185
parent_node.child_keys.append(key)
186
node.gdfo = parent_gdfo + 1
187
# Now fill the gdfo to all children
188
# Note that this loop is slightly inefficient, in that we may visit the
189
# same child (and its decendents) more than once, however, it is
190
# 'efficient' in that we only walk to nodes that would be updated,
191
# rather than all nodes
192
# We use a deque rather than a simple list stack, to go for BFD rather
193
# than DFD. So that if a longer path is possible, we walk it before we
194
# get to the final child
195
pending = collections.deque([node])
197
node = pending.popleft()
198
next_gdfo = node.gdfo + 1
199
for child_key in node.child_keys:
200
child = nodes[child_key]
201
if child.gdfo < next_gdfo:
202
# This child is being updated, we need to check its
204
child.gdfo = next_gdfo
205
pending.append(child)
207
def heads(self, keys):
208
"""Return the heads from amongst keys.
210
This is done by searching the ancestries of each key. Any key that is
211
reachable from another key is not returned; all the others are.
213
This operation scales with the relative depth between any two keys. It
214
uses gdfo to avoid walking all ancestry.
216
:param keys: An iterable of keys.
217
:return: A set of the heads. Note that as a set there is no ordering
218
information. Callers will need to filter their input to create
219
order if they need it.
221
candidate_nodes = dict((key, self._nodes[key]) for key in keys)
222
if revision.NULL_REVISION in candidate_nodes:
223
# NULL_REVISION is only a head if it is the only entry
224
candidate_nodes.pop(revision.NULL_REVISION)
225
if not candidate_nodes:
226
return frozenset([revision.NULL_REVISION])
227
if len(candidate_nodes) < 2:
228
# No or only one candidate
229
return frozenset(candidate_nodes)
230
heads_key = frozenset(candidate_nodes)
231
# Do we have a cached result ?
233
heads = self._known_heads[heads_key]
237
# Let's compute the heads
241
for node in viewvalues(candidate_nodes):
243
pending.extend(node.parent_keys)
244
if min_gdfo is None or node.gdfo < min_gdfo:
248
node_key = pending.pop()
250
# node already appears in some ancestry
253
node = nodes[node_key]
254
if node.gdfo <= min_gdfo:
257
pending.extend(node.parent_keys)
258
heads = heads_key.difference(seen)
260
self._known_heads[heads_key] = heads
264
"""Return the nodes in topological order.
266
All parents must occur before all children.
268
for node in viewvalues(self._nodes):
269
if node.gdfo is None:
270
raise errors.GraphCycleError(self._nodes)
271
pending = self._find_tails()
272
pending_pop = pending.pop
273
pending_append = pending.append
276
topo_order_append = topo_order.append
278
num_seen_parents = dict.fromkeys(self._nodes, 0)
281
if node.parent_keys is not None:
282
# We don't include ghost parents
283
topo_order_append(node.key)
284
for child_key in node.child_keys:
285
child_node = self._nodes[child_key]
286
seen_parents = num_seen_parents[child_key] + 1
287
if seen_parents == len(child_node.parent_keys):
288
# All parents have been processed, enqueue this child
289
pending_append(child_node)
290
# This has been queued up, stop tracking it
291
del num_seen_parents[child_key]
293
num_seen_parents[child_key] = seen_parents
294
# We started from the parents, so we don't need to do anymore work
298
"""Return a reverse topological ordering which is 'stable'.
300
There are a few constraints:
301
1) Reverse topological (all children before all parents)
303
3) 'stable' sorting, so that we get the same result, independent of
304
machine, or extra data.
305
To do this, we use the same basic algorithm as topo_sort, but when we
306
aren't sure what node to access next, we sort them lexicographically.
308
tips = self._find_tips()
309
# Split the tips based on prefix
312
if node.key.__class__ is str or len(node.key) == 1:
316
prefix_tips.setdefault(prefix, []).append(node)
318
num_seen_children = dict.fromkeys(self._nodes, 0)
321
for prefix in sorted(prefix_tips):
322
pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
326
if node.parent_keys is None:
327
# Ghost node, skip it
329
result.append(node.key)
330
for parent_key in sorted(node.parent_keys, reverse=True):
331
parent_node = self._nodes[parent_key]
332
seen_children = num_seen_children[parent_key] + 1
333
if seen_children == len(parent_node.child_keys):
334
# All children have been processed, enqueue this parent
335
pending.append(parent_node)
336
# This has been queued up, stop tracking it
337
del num_seen_children[parent_key]
339
num_seen_children[parent_key] = seen_children
342
def merge_sort(self, tip_key):
343
"""Compute the merge sorted graph output."""
344
from breezy import tsort
345
as_parent_map = dict((node.key, node.parent_keys)
346
for node in viewvalues(self._nodes)
347
if node.parent_keys is not None)
348
# We intentionally always generate revnos and never force the
350
# Strip the sequence_number that merge_sort generates
351
return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
352
for _, key, merge_depth, revno, end_of_merge
353
in tsort.merge_sort(as_parent_map, tip_key,
354
mainline_revisions=None,
355
generate_revno=True)]
357
def get_parent_keys(self, key):
358
"""Get the parents for a key
360
Returns a list containg the parents keys. If the key is a ghost,
361
None is returned. A KeyError will be raised if the key is not in
364
:param keys: Key to check (eg revision_id)
365
:return: A list of parents
367
return self._nodes[key].parent_keys
369
def get_child_keys(self, key):
370
"""Get the children for a key
372
Returns a list containg the children keys. A KeyError will be raised
373
if the key is not in the graph.
375
:param keys: Key to check (eg revision_id)
376
:return: A list of children
378
return self._nodes[key].child_keys