1
# Copyright (C) 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
20
from __future__ import absolute_import
23
from collections.abc import deque
24
except ImportError: # python < 3.7
25
from collections import deque
36
class _KnownGraphNode(object):
37
"""Represents a single object in the known graph."""
39
__slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
41
def __init__(self, key, parent_keys):
43
self.parent_keys = parent_keys
45
# Greatest distance from origin
49
return '%s(%s gdfo:%s par:%s child:%s)' % (
50
self.__class__.__name__, self.key, self.gdfo,
51
self.parent_keys, self.child_keys)
54
class _MergeSortNode(object):
55
"""Information about a specific node in the merge graph."""
57
__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
59
def __init__(self, key, merge_depth, revno, end_of_merge):
61
self.merge_depth = merge_depth
63
self.end_of_merge = end_of_merge
66
class KnownGraph(object):
67
"""This is a class which assumes we already know the full graph."""
69
def __init__(self, parent_map, do_cache=True):
70
"""Create a new KnownGraph instance.
72
:param parent_map: A dictionary mapping key => parent_keys
75
# Maps {frozenset(revision_id, revision_id): heads}
76
self._known_heads = {}
77
self.do_cache = do_cache
78
self._initialize_nodes(parent_map)
81
def _initialize_nodes(self, parent_map):
82
"""Populate self._nodes.
84
After this has finished:
85
- self._nodes will have an entry for every entry in parent_map.
86
- ghosts will have a parent_keys = None,
87
- all nodes found will also have .child_keys populated with all known
91
for key, parent_keys in viewitems(parent_map):
94
node.parent_keys = parent_keys
96
node = _KnownGraphNode(key, parent_keys)
98
for parent_key in parent_keys:
100
parent_node = nodes[parent_key]
102
parent_node = _KnownGraphNode(parent_key, None)
103
nodes[parent_key] = parent_node
104
parent_node.child_keys.append(key)
106
def _find_tails(self):
107
return [node for node in viewvalues(self._nodes)
108
if not node.parent_keys]
110
def _find_tips(self):
111
return [node for node in viewvalues(self._nodes)
112
if not node.child_keys]
114
def _find_gdfo(self):
116
known_parent_gdfos = {}
119
for node in self._find_tails():
125
for child_key in node.child_keys:
126
child = nodes[child_key]
127
if child_key in known_parent_gdfos:
128
known_gdfo = known_parent_gdfos[child_key] + 1
133
if child.gdfo is None or node.gdfo + 1 > child.gdfo:
134
child.gdfo = node.gdfo + 1
135
if known_gdfo == len(child.parent_keys):
136
# We are the last parent updating that node, we can
137
# continue from there
138
pending.append(child)
140
del known_parent_gdfos[child_key]
142
# Update known_parent_gdfos for a key we couldn't process
143
known_parent_gdfos[child_key] = known_gdfo
145
def add_node(self, key, parent_keys):
146
"""Add a new node to the graph.
148
If this fills in a ghost, then the gdfos of all children will be
151
:param key: The node being added. If this is a duplicate, this is a
153
:param parent_keys: The parents of the given node.
154
:return: None (should we return if this was a ghost, etc?)
159
if node.parent_keys is None:
160
node.parent_keys = parent_keys
161
# A ghost is being added, we can no-longer trust the heads
163
self._known_heads.clear()
165
# Make sure we compare a list to a list, as tuple != list.
166
parent_keys = list(parent_keys)
167
existing_parent_keys = list(node.parent_keys)
168
if parent_keys == existing_parent_keys:
169
return # Identical content
172
'Parent key mismatch, existing node %s'
173
' has parents of %s not %s'
174
% (key, existing_parent_keys, parent_keys))
176
node = _KnownGraphNode(key, parent_keys)
179
for parent_key in parent_keys:
181
parent_node = nodes[parent_key]
183
parent_node = _KnownGraphNode(parent_key, None)
184
# Ghosts and roots have gdfo 1
186
nodes[parent_key] = parent_node
187
if parent_gdfo < parent_node.gdfo:
188
parent_gdfo = parent_node.gdfo
189
parent_node.child_keys.append(key)
190
node.gdfo = parent_gdfo + 1
191
# Now fill the gdfo to all children
192
# Note that this loop is slightly inefficient, in that we may visit the
193
# same child (and its decendents) more than once, however, it is
194
# 'efficient' in that we only walk to nodes that would be updated,
195
# rather than all nodes
196
# We use a deque rather than a simple list stack, to go for BFD rather
197
# than DFD. So that if a longer path is possible, we walk it before we
198
# get to the final child
199
pending = deque([node])
201
node = pending.popleft()
202
next_gdfo = node.gdfo + 1
203
for child_key in node.child_keys:
204
child = nodes[child_key]
205
if child.gdfo < next_gdfo:
206
# This child is being updated, we need to check its
208
child.gdfo = next_gdfo
209
pending.append(child)
211
def heads(self, keys):
212
"""Return the heads from amongst keys.
214
This is done by searching the ancestries of each key. Any key that is
215
reachable from another key is not returned; all the others are.
217
This operation scales with the relative depth between any two keys. It
218
uses gdfo to avoid walking all ancestry.
220
:param keys: An iterable of keys.
221
:return: A set of the heads. Note that as a set there is no ordering
222
information. Callers will need to filter their input to create
223
order if they need it.
225
candidate_nodes = dict((key, self._nodes[key]) for key in keys)
226
if revision.NULL_REVISION in candidate_nodes:
227
# NULL_REVISION is only a head if it is the only entry
228
candidate_nodes.pop(revision.NULL_REVISION)
229
if not candidate_nodes:
230
return frozenset([revision.NULL_REVISION])
231
if len(candidate_nodes) < 2:
232
# No or only one candidate
233
return frozenset(candidate_nodes)
234
heads_key = frozenset(candidate_nodes)
235
# Do we have a cached result ?
237
heads = self._known_heads[heads_key]
241
# Let's compute the heads
245
for node in viewvalues(candidate_nodes):
247
pending.extend(node.parent_keys)
248
if min_gdfo is None or node.gdfo < min_gdfo:
252
node_key = pending.pop()
254
# node already appears in some ancestry
257
node = nodes[node_key]
258
if node.gdfo <= min_gdfo:
261
pending.extend(node.parent_keys)
262
heads = heads_key.difference(seen)
264
self._known_heads[heads_key] = heads
268
"""Return the nodes in topological order.
270
All parents must occur before all children.
272
for node in viewvalues(self._nodes):
273
if node.gdfo is None:
274
raise errors.GraphCycleError(self._nodes)
275
pending = self._find_tails()
276
pending_pop = pending.pop
277
pending_append = pending.append
280
topo_order_append = topo_order.append
282
num_seen_parents = dict.fromkeys(self._nodes, 0)
285
if node.parent_keys is not None:
286
# We don't include ghost parents
287
topo_order_append(node.key)
288
for child_key in node.child_keys:
289
child_node = self._nodes[child_key]
290
seen_parents = num_seen_parents[child_key] + 1
291
if seen_parents == len(child_node.parent_keys):
292
# All parents have been processed, enqueue this child
293
pending_append(child_node)
294
# This has been queued up, stop tracking it
295
del num_seen_parents[child_key]
297
num_seen_parents[child_key] = seen_parents
298
# We started from the parents, so we don't need to do anymore work
302
"""Return a reverse topological ordering which is 'stable'.
304
There are a few constraints:
305
1) Reverse topological (all children before all parents)
307
3) 'stable' sorting, so that we get the same result, independent of
308
machine, or extra data.
309
To do this, we use the same basic algorithm as topo_sort, but when we
310
aren't sure what node to access next, we sort them lexicographically.
312
tips = self._find_tips()
313
# Split the tips based on prefix
316
if node.key.__class__ is str or len(node.key) == 1:
320
prefix_tips.setdefault(prefix, []).append(node)
322
num_seen_children = dict.fromkeys(self._nodes, 0)
325
for prefix in sorted(prefix_tips):
326
pending = sorted(prefix_tips[prefix], key=lambda n: n.key,
330
if node.parent_keys is None:
331
# Ghost node, skip it
333
result.append(node.key)
334
for parent_key in sorted(node.parent_keys, reverse=True):
335
parent_node = self._nodes[parent_key]
336
seen_children = num_seen_children[parent_key] + 1
337
if seen_children == len(parent_node.child_keys):
338
# All children have been processed, enqueue this parent
339
pending.append(parent_node)
340
# This has been queued up, stop tracking it
341
del num_seen_children[parent_key]
343
num_seen_children[parent_key] = seen_children
346
def merge_sort(self, tip_key):
347
"""Compute the merge sorted graph output."""
348
from breezy import tsort
349
as_parent_map = dict((node.key, node.parent_keys)
350
for node in viewvalues(self._nodes)
351
if node.parent_keys is not None)
352
# We intentionally always generate revnos and never force the
354
# Strip the sequence_number that merge_sort generates
355
return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
356
for _, key, merge_depth, revno, end_of_merge
357
in tsort.merge_sort(as_parent_map, tip_key,
358
mainline_revisions=None,
359
generate_revno=True)]
361
def get_parent_keys(self, key):
362
"""Get the parents for a key
364
Returns a list containg the parents keys. If the key is a ghost,
365
None is returned. A KeyError will be raised if the key is not in
368
:param keys: Key to check (eg revision_id)
369
:return: A list of parents
371
return self._nodes[key].parent_keys
373
def get_child_keys(self, key):
374
"""Get the children for a key
376
Returns a list containg the children keys. A KeyError will be raised
377
if the key is not in the graph.
379
:param keys: Key to check (eg revision_id)
380
:return: A list of children
382
return self._nodes[key].child_keys