1
# Copyright (C) 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
20
from __future__ import absolute_import
33
class _KnownGraphNode(object):
34
"""Represents a single object in the known graph."""
36
__slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
38
def __init__(self, key, parent_keys):
40
self.parent_keys = parent_keys
42
# Greatest distance from origin
46
return '%s(%s gdfo:%s par:%s child:%s)' % (
47
self.__class__.__name__, self.key, self.gdfo,
48
self.parent_keys, self.child_keys)
51
class _MergeSortNode(object):
52
"""Information about a specific node in the merge graph."""
54
__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
56
def __init__(self, key, merge_depth, revno, end_of_merge):
58
self.merge_depth = merge_depth
60
self.end_of_merge = end_of_merge
63
class KnownGraph(object):
64
"""This is a class which assumes we already know the full graph."""
66
def __init__(self, parent_map, do_cache=True):
67
"""Create a new KnownGraph instance.
69
:param parent_map: A dictionary mapping key => parent_keys
72
# Maps {frozenset(revision_id, revision_id): heads}
73
self._known_heads = {}
74
self.do_cache = do_cache
75
self._initialize_nodes(parent_map)
78
def _initialize_nodes(self, parent_map):
79
"""Populate self._nodes.
81
After this has finished:
82
- self._nodes will have an entry for every entry in parent_map.
83
- ghosts will have a parent_keys = None,
84
- all nodes found will also have .child_keys populated with all known
88
for key, parent_keys in viewitems(parent_map):
91
node.parent_keys = parent_keys
93
node = _KnownGraphNode(key, parent_keys)
95
for parent_key in parent_keys:
97
parent_node = nodes[parent_key]
99
parent_node = _KnownGraphNode(parent_key, None)
100
nodes[parent_key] = parent_node
101
parent_node.child_keys.append(key)
103
def _find_tails(self):
104
return [node for node in viewvalues(self._nodes)
105
if not node.parent_keys]
107
def _find_tips(self):
108
return [node for node in viewvalues(self._nodes)
109
if not node.child_keys]
111
def _find_gdfo(self):
113
known_parent_gdfos = {}
116
for node in self._find_tails():
122
for child_key in node.child_keys:
123
child = nodes[child_key]
124
if child_key in known_parent_gdfos:
125
known_gdfo = known_parent_gdfos[child_key] + 1
130
if child.gdfo is None or node.gdfo + 1 > child.gdfo:
131
child.gdfo = node.gdfo + 1
132
if known_gdfo == len(child.parent_keys):
133
# We are the last parent updating that node, we can
134
# continue from there
135
pending.append(child)
137
del known_parent_gdfos[child_key]
139
# Update known_parent_gdfos for a key we couldn't process
140
known_parent_gdfos[child_key] = known_gdfo
142
def add_node(self, key, parent_keys):
143
"""Add a new node to the graph.
145
If this fills in a ghost, then the gdfos of all children will be
148
:param key: The node being added. If this is a duplicate, this is a
150
:param parent_keys: The parents of the given node.
151
:return: None (should we return if this was a ghost, etc?)
156
if node.parent_keys is None:
157
node.parent_keys = parent_keys
158
# A ghost is being added, we can no-longer trust the heads
160
self._known_heads.clear()
162
# Make sure we compare a list to a list, as tuple != list.
163
parent_keys = list(parent_keys)
164
existing_parent_keys = list(node.parent_keys)
165
if parent_keys == existing_parent_keys:
166
return # Identical content
169
'Parent key mismatch, existing node %s'
170
' has parents of %s not %s'
171
% (key, existing_parent_keys, parent_keys))
173
node = _KnownGraphNode(key, parent_keys)
176
for parent_key in parent_keys:
178
parent_node = nodes[parent_key]
180
parent_node = _KnownGraphNode(parent_key, None)
181
# Ghosts and roots have gdfo 1
183
nodes[parent_key] = parent_node
184
if parent_gdfo < parent_node.gdfo:
185
parent_gdfo = parent_node.gdfo
186
parent_node.child_keys.append(key)
187
node.gdfo = parent_gdfo + 1
188
# Now fill the gdfo to all children
189
# Note that this loop is slightly inefficient, in that we may visit the
190
# same child (and its decendents) more than once, however, it is
191
# 'efficient' in that we only walk to nodes that would be updated,
192
# rather than all nodes
193
# We use a deque rather than a simple list stack, to go for BFD rather
194
# than DFD. So that if a longer path is possible, we walk it before we
195
# get to the final child
196
pending = collections.deque([node])
198
node = pending.popleft()
199
next_gdfo = node.gdfo + 1
200
for child_key in node.child_keys:
201
child = nodes[child_key]
202
if child.gdfo < next_gdfo:
203
# This child is being updated, we need to check its
205
child.gdfo = next_gdfo
206
pending.append(child)
208
def heads(self, keys):
209
"""Return the heads from amongst keys.
211
This is done by searching the ancestries of each key. Any key that is
212
reachable from another key is not returned; all the others are.
214
This operation scales with the relative depth between any two keys. It
215
uses gdfo to avoid walking all ancestry.
217
:param keys: An iterable of keys.
218
:return: A set of the heads. Note that as a set there is no ordering
219
information. Callers will need to filter their input to create
220
order if they need it.
222
candidate_nodes = dict((key, self._nodes[key]) for key in keys)
223
if revision.NULL_REVISION in candidate_nodes:
224
# NULL_REVISION is only a head if it is the only entry
225
candidate_nodes.pop(revision.NULL_REVISION)
226
if not candidate_nodes:
227
return frozenset([revision.NULL_REVISION])
228
if len(candidate_nodes) < 2:
229
# No or only one candidate
230
return frozenset(candidate_nodes)
231
heads_key = frozenset(candidate_nodes)
232
# Do we have a cached result ?
234
heads = self._known_heads[heads_key]
238
# Let's compute the heads
242
for node in viewvalues(candidate_nodes):
244
pending.extend(node.parent_keys)
245
if min_gdfo is None or node.gdfo < min_gdfo:
249
node_key = pending.pop()
251
# node already appears in some ancestry
254
node = nodes[node_key]
255
if node.gdfo <= min_gdfo:
258
pending.extend(node.parent_keys)
259
heads = heads_key.difference(seen)
261
self._known_heads[heads_key] = heads
265
"""Return the nodes in topological order.
267
All parents must occur before all children.
269
for node in viewvalues(self._nodes):
270
if node.gdfo is None:
271
raise errors.GraphCycleError(self._nodes)
272
pending = self._find_tails()
273
pending_pop = pending.pop
274
pending_append = pending.append
277
topo_order_append = topo_order.append
279
num_seen_parents = dict.fromkeys(self._nodes, 0)
282
if node.parent_keys is not None:
283
# We don't include ghost parents
284
topo_order_append(node.key)
285
for child_key in node.child_keys:
286
child_node = self._nodes[child_key]
287
seen_parents = num_seen_parents[child_key] + 1
288
if seen_parents == len(child_node.parent_keys):
289
# All parents have been processed, enqueue this child
290
pending_append(child_node)
291
# This has been queued up, stop tracking it
292
del num_seen_parents[child_key]
294
num_seen_parents[child_key] = seen_parents
295
# We started from the parents, so we don't need to do anymore work
299
"""Return a reverse topological ordering which is 'stable'.
301
There are a few constraints:
302
1) Reverse topological (all children before all parents)
304
3) 'stable' sorting, so that we get the same result, independent of
305
machine, or extra data.
306
To do this, we use the same basic algorithm as topo_sort, but when we
307
aren't sure what node to access next, we sort them lexicographically.
309
tips = self._find_tips()
310
# Split the tips based on prefix
313
if node.key.__class__ is str or len(node.key) == 1:
317
prefix_tips.setdefault(prefix, []).append(node)
319
num_seen_children = dict.fromkeys(self._nodes, 0)
322
for prefix in sorted(prefix_tips):
323
pending = sorted(prefix_tips[prefix], key=lambda n: n.key,
327
if node.parent_keys is None:
328
# Ghost node, skip it
330
result.append(node.key)
331
for parent_key in sorted(node.parent_keys, reverse=True):
332
parent_node = self._nodes[parent_key]
333
seen_children = num_seen_children[parent_key] + 1
334
if seen_children == len(parent_node.child_keys):
335
# All children have been processed, enqueue this parent
336
pending.append(parent_node)
337
# This has been queued up, stop tracking it
338
del num_seen_children[parent_key]
340
num_seen_children[parent_key] = seen_children
343
def merge_sort(self, tip_key):
344
"""Compute the merge sorted graph output."""
345
from breezy import tsort
346
as_parent_map = dict((node.key, node.parent_keys)
347
for node in viewvalues(self._nodes)
348
if node.parent_keys is not None)
349
# We intentionally always generate revnos and never force the
351
# Strip the sequence_number that merge_sort generates
352
return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
353
for _, key, merge_depth, revno, end_of_merge
354
in tsort.merge_sort(as_parent_map, tip_key,
355
mainline_revisions=None,
356
generate_revno=True)]
358
def get_parent_keys(self, key):
359
"""Get the parents for a key
361
Returns a list containg the parents keys. If the key is a ghost,
362
None is returned. A KeyError will be raised if the key is not in
365
:param keys: Key to check (eg revision_id)
366
:return: A list of parents
368
return self._nodes[key].parent_keys
370
def get_child_keys(self, key):
371
"""Get the children for a key
373
Returns a list containg the children keys. A KeyError will be raised
374
if the key is not in the graph.
376
:param keys: Key to check (eg revision_id)
377
:return: A list of children
379
return self._nodes[key].child_keys