1
# Copyright (C) 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
21
from collections.abc import deque
22
except ImportError: # python < 3.7
23
from collections import deque
30
class _KnownGraphNode(object):
31
"""Represents a single object in the known graph."""
33
__slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
35
def __init__(self, key, parent_keys):
37
self.parent_keys = parent_keys
39
# Greatest distance from origin
43
return '%s(%s gdfo:%s par:%s child:%s)' % (
44
self.__class__.__name__, self.key, self.gdfo,
45
self.parent_keys, self.child_keys)
48
class _MergeSortNode(object):
49
"""Information about a specific node in the merge graph."""
51
__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
53
def __init__(self, key, merge_depth, revno, end_of_merge):
55
self.merge_depth = merge_depth
57
self.end_of_merge = end_of_merge
60
class KnownGraph(object):
61
"""This is a class which assumes we already know the full graph."""
63
def __init__(self, parent_map, do_cache=True):
64
"""Create a new KnownGraph instance.
66
:param parent_map: A dictionary mapping key => parent_keys
69
# Maps {frozenset(revision_id, revision_id): heads}
70
self._known_heads = {}
71
self.do_cache = do_cache
72
self._initialize_nodes(parent_map)
75
def _initialize_nodes(self, parent_map):
76
"""Populate self._nodes.
78
After this has finished:
79
- self._nodes will have an entry for every entry in parent_map.
80
- ghosts will have a parent_keys = None,
81
- all nodes found will also have .child_keys populated with all known
85
for key, parent_keys in parent_map.items():
88
node.parent_keys = parent_keys
90
node = _KnownGraphNode(key, parent_keys)
92
for parent_key in parent_keys:
94
parent_node = nodes[parent_key]
96
parent_node = _KnownGraphNode(parent_key, None)
97
nodes[parent_key] = parent_node
98
parent_node.child_keys.append(key)
100
def _find_tails(self):
101
return [node for node in self._nodes.values()
102
if not node.parent_keys]
104
def _find_tips(self):
105
return [node for node in self._nodes.values()
106
if not node.child_keys]
108
def _find_gdfo(self):
110
known_parent_gdfos = {}
113
for node in self._find_tails():
119
for child_key in node.child_keys:
120
child = nodes[child_key]
121
if child_key in known_parent_gdfos:
122
known_gdfo = known_parent_gdfos[child_key] + 1
127
if child.gdfo is None or node.gdfo + 1 > child.gdfo:
128
child.gdfo = node.gdfo + 1
129
if known_gdfo == len(child.parent_keys):
130
# We are the last parent updating that node, we can
131
# continue from there
132
pending.append(child)
134
del known_parent_gdfos[child_key]
136
# Update known_parent_gdfos for a key we couldn't process
137
known_parent_gdfos[child_key] = known_gdfo
139
def add_node(self, key, parent_keys):
140
"""Add a new node to the graph.
142
If this fills in a ghost, then the gdfos of all children will be
145
:param key: The node being added. If this is a duplicate, this is a
147
:param parent_keys: The parents of the given node.
148
:return: None (should we return if this was a ghost, etc?)
153
if node.parent_keys is None:
154
node.parent_keys = parent_keys
155
# A ghost is being added, we can no-longer trust the heads
157
self._known_heads.clear()
159
# Make sure we compare a list to a list, as tuple != list.
160
parent_keys = list(parent_keys)
161
existing_parent_keys = list(node.parent_keys)
162
if parent_keys == existing_parent_keys:
163
return # Identical content
166
'Parent key mismatch, existing node %s'
167
' has parents of %s not %s'
168
% (key, existing_parent_keys, parent_keys))
170
node = _KnownGraphNode(key, parent_keys)
173
for parent_key in parent_keys:
175
parent_node = nodes[parent_key]
177
parent_node = _KnownGraphNode(parent_key, None)
178
# Ghosts and roots have gdfo 1
180
nodes[parent_key] = parent_node
181
if parent_gdfo < parent_node.gdfo:
182
parent_gdfo = parent_node.gdfo
183
parent_node.child_keys.append(key)
184
node.gdfo = parent_gdfo + 1
185
# Now fill the gdfo to all children
186
# Note that this loop is slightly inefficient, in that we may visit the
187
# same child (and its decendents) more than once, however, it is
188
# 'efficient' in that we only walk to nodes that would be updated,
189
# rather than all nodes
190
# We use a deque rather than a simple list stack, to go for BFD rather
191
# than DFD. So that if a longer path is possible, we walk it before we
192
# get to the final child
193
pending = deque([node])
195
node = pending.popleft()
196
next_gdfo = node.gdfo + 1
197
for child_key in node.child_keys:
198
child = nodes[child_key]
199
if child.gdfo < next_gdfo:
200
# This child is being updated, we need to check its
202
child.gdfo = next_gdfo
203
pending.append(child)
205
def heads(self, keys):
206
"""Return the heads from amongst keys.
208
This is done by searching the ancestries of each key. Any key that is
209
reachable from another key is not returned; all the others are.
211
This operation scales with the relative depth between any two keys. It
212
uses gdfo to avoid walking all ancestry.
214
:param keys: An iterable of keys.
215
:return: A set of the heads. Note that as a set there is no ordering
216
information. Callers will need to filter their input to create
217
order if they need it.
219
candidate_nodes = dict((key, self._nodes[key]) for key in keys)
220
if revision.NULL_REVISION in candidate_nodes:
221
# NULL_REVISION is only a head if it is the only entry
222
candidate_nodes.pop(revision.NULL_REVISION)
223
if not candidate_nodes:
224
return frozenset([revision.NULL_REVISION])
225
if len(candidate_nodes) < 2:
226
# No or only one candidate
227
return frozenset(candidate_nodes)
228
heads_key = frozenset(candidate_nodes)
229
# Do we have a cached result ?
231
heads = self._known_heads[heads_key]
235
# Let's compute the heads
239
for node in candidate_nodes.values():
241
pending.extend(node.parent_keys)
242
if min_gdfo is None or node.gdfo < min_gdfo:
246
node_key = pending.pop()
248
# node already appears in some ancestry
251
node = nodes[node_key]
252
if node.gdfo <= min_gdfo:
255
pending.extend(node.parent_keys)
256
heads = heads_key.difference(seen)
258
self._known_heads[heads_key] = heads
262
"""Return the nodes in topological order.
264
All parents must occur before all children.
266
for node in self._nodes.values():
267
if node.gdfo is None:
268
raise errors.GraphCycleError(self._nodes)
269
pending = self._find_tails()
270
pending_pop = pending.pop
271
pending_append = pending.append
274
topo_order_append = topo_order.append
276
num_seen_parents = dict.fromkeys(self._nodes, 0)
279
if node.parent_keys is not None:
280
# We don't include ghost parents
281
topo_order_append(node.key)
282
for child_key in node.child_keys:
283
child_node = self._nodes[child_key]
284
seen_parents = num_seen_parents[child_key] + 1
285
if seen_parents == len(child_node.parent_keys):
286
# All parents have been processed, enqueue this child
287
pending_append(child_node)
288
# This has been queued up, stop tracking it
289
del num_seen_parents[child_key]
291
num_seen_parents[child_key] = seen_parents
292
# We started from the parents, so we don't need to do anymore work
296
"""Return a reverse topological ordering which is 'stable'.
298
There are a few constraints:
299
1) Reverse topological (all children before all parents)
301
3) 'stable' sorting, so that we get the same result, independent of
302
machine, or extra data.
303
To do this, we use the same basic algorithm as topo_sort, but when we
304
aren't sure what node to access next, we sort them lexicographically.
306
tips = self._find_tips()
307
# Split the tips based on prefix
310
if node.key.__class__ is str or len(node.key) == 1:
314
prefix_tips.setdefault(prefix, []).append(node)
316
num_seen_children = dict.fromkeys(self._nodes, 0)
319
for prefix in sorted(prefix_tips):
320
pending = sorted(prefix_tips[prefix], key=lambda n: n.key,
324
if node.parent_keys is None:
325
# Ghost node, skip it
327
result.append(node.key)
328
for parent_key in sorted(node.parent_keys, reverse=True):
329
parent_node = self._nodes[parent_key]
330
seen_children = num_seen_children[parent_key] + 1
331
if seen_children == len(parent_node.child_keys):
332
# All children have been processed, enqueue this parent
333
pending.append(parent_node)
334
# This has been queued up, stop tracking it
335
del num_seen_children[parent_key]
337
num_seen_children[parent_key] = seen_children
340
def merge_sort(self, tip_key):
341
"""Compute the merge sorted graph output."""
342
from breezy import tsort
343
as_parent_map = dict((node.key, node.parent_keys)
344
for node in self._nodes.values()
345
if node.parent_keys is not None)
346
# We intentionally always generate revnos and never force the
348
# Strip the sequence_number that merge_sort generates
349
return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
350
for _, key, merge_depth, revno, end_of_merge
351
in tsort.merge_sort(as_parent_map, tip_key,
352
mainline_revisions=None,
353
generate_revno=True)]
355
def get_parent_keys(self, key):
356
"""Get the parents for a key
358
Returns a list containg the parents keys. If the key is a ghost,
359
None is returned. A KeyError will be raised if the key is not in
362
:param keys: Key to check (eg revision_id)
363
:return: A list of parents
365
return self._nodes[key].parent_keys
367
def get_child_keys(self, key):
368
"""Get the children for a key
370
Returns a list containg the children keys. A KeyError will be raised
371
if the key is not in the graph.
373
:param keys: Key to check (eg revision_id)
374
:return: A list of children
376
return self._nodes[key].child_keys