27
27
class _KnownGraphNode(object):
28
28
"""Represents a single object in the known graph."""
30
__slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
30
__slots__ = ('key', 'parent_keys', 'child_keys', 'linear_dominator',
31
'dominator_distance', 'gdfo', 'ancestor_of')
32
33
def __init__(self, key, parent_keys):
34
35
self.parent_keys = parent_keys
35
36
self.child_keys = []
37
# oldest ancestor, such that no parents between here and there have >1
39
self.linear_dominator = None
40
self.dominator_distance = 0
36
41
# Greatest distance from origin
43
# This will become a tuple of known heads that have this node as an
45
self.ancestor_of = None
39
47
def __repr__(self):
40
return '%s(%s gdfo:%s par:%s child:%s)' % (
48
return '%s(%s gdfo:%s par:%s child:%s dom:%s %s)' % (
41
49
self.__class__.__name__, self.key, self.gdfo,
42
self.parent_keys, self.child_keys)
45
class _MergeSortNode(object):
46
"""Information about a specific node in the merge graph."""
48
__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
50
def __init__(self, key, merge_depth, revno, end_of_merge):
52
self.merge_depth = merge_depth
54
self.end_of_merge = end_of_merge
50
self.parent_keys, self.child_keys,
51
self.linear_dominator, self.dominator_distance)
57
54
class KnownGraph(object):
94
91
nodes[parent_key] = parent_node
95
92
parent_node.child_keys.append(key)
97
def _find_tails(self):
98
return [node for node in self._nodes.itervalues()
99
if not node.parent_keys]
101
def _find_tips(self):
102
return [node for node in self._nodes.itervalues()
103
if not node.child_keys]
94
def _find_linear_dominators(self):
95
"""For each node in the set, find any linear dominators.
97
For any given node, the 'linear dominator' is an ancestor, such that
98
all parents between this node and that one have a single parent, and a
99
single child. So if A->B->C->D then B,C,D all have a linear dominator
100
of A. Because there are no interesting siblings, we can quickly skip to
101
the nearest dominator when doing comparisons.
103
def check_node(node):
104
if node.parent_keys is None or len(node.parent_keys) != 1:
105
# This node is either a ghost, a tail, or has multiple parents
106
# It its own dominator
107
node.linear_dominator = node.key
108
node.dominator_distance = 0
110
parent_node = self._nodes[node.parent_keys[0]]
111
if len(parent_node.child_keys) > 1:
112
# The parent has multiple children, so *this* node is the
114
node.linear_dominator = node.key
115
node.dominator_distance = 0
117
# The parent is already filled in, so add and continue
118
if parent_node.linear_dominator is not None:
119
node.linear_dominator = parent_node.linear_dominator
120
node.dominator_distance = parent_node.dominator_distance + 1
122
# We don't know this node, or its parent node, so start walking to
126
for node in self._nodes.itervalues():
127
# The parent is not filled in, so walk until we get somewhere
128
if node.linear_dominator is not None: #already done
130
next_node = check_node(node)
131
if next_node is None:
132
# Nothing more needs to be done
135
while next_node is not None:
138
next_node = check_node(node)
139
# The stack now contains the linear chain, and 'node' should have
141
assert node.linear_dominator is not None
142
dominator = node.linear_dominator
144
next_node = stack.pop()
145
next_node.linear_dominator = dominator
146
next_node.dominator_distance = node.dominator_distance + 1
105
149
def _find_gdfo(self):
151
return [node for node in self._nodes.itervalues()
152
if not node.parent_keys]
155
heappush = heapq.heappush
156
heappop = heapq.heappop
106
157
nodes = self._nodes
107
known_parent_gdfos = {}
110
for node in self._find_tails():
116
for child_key in node.child_keys:
117
child = nodes[child_key]
118
if child_key in known_parent_gdfos:
119
known_gdfo = known_parent_gdfos[child_key] + 1
124
if child.gdfo is None or node.gdfo + 1 > child.gdfo:
125
child.gdfo = node.gdfo + 1
126
if known_gdfo == len(child.parent_keys):
127
# We are the last parent updating that node, we can
128
# continue from there
129
pending.append(child)
131
del known_parent_gdfos[child_key]
133
# Update known_parent_gdfos for a key we couldn't process
134
known_parent_gdfos[child_key] = known_gdfo
136
def add_node(self, key, parent_keys):
137
"""Add a new node to the graph.
139
If this fills in a ghost, then the gdfos of all children will be
142
:param key: The node being added. If this is a duplicate, this is a
144
:param parent_keys: The parents of the given node.
145
:return: None (should we return if this was a ghost, etc?)
150
if node.parent_keys is None:
151
node.parent_keys = parent_keys
152
# A ghost is being added, we can no-longer trust the heads
154
self._known_heads.clear()
156
# Make sure we compare a list to a list, as tuple != list.
157
parent_keys = list(parent_keys)
158
existing_parent_keys = list(node.parent_keys)
159
if parent_keys == existing_parent_keys:
160
return # Identical content
162
raise ValueError('Parent key mismatch, existing node %s'
163
' has parents of %s not %s'
164
% (key, existing_parent_keys, parent_keys))
166
node = _KnownGraphNode(key, parent_keys)
169
for parent_key in parent_keys:
171
parent_node = nodes[parent_key]
173
parent_node = _KnownGraphNode(parent_key, None)
174
# Ghosts and roots have gdfo 1
176
nodes[parent_key] = parent_node
177
if parent_gdfo < parent_node.gdfo:
178
parent_gdfo = parent_node.gdfo
179
parent_node.child_keys.append(key)
180
node.gdfo = parent_gdfo + 1
181
# Now fill the gdfo to all children
182
# Note that this loop is slightly inefficient, in that we may visit the
183
# same child (and its decendents) more than once, however, it is
184
# 'efficient' in that we only walk to nodes that would be updated,
185
# rather than all nodes
186
# We use a deque rather than a simple list stack, to go for BFD rather
187
# than DFD. So that if a longer path is possible, we walk it before we
188
# get to the final child
189
pending = deque([node])
191
node = pending.popleft()
192
next_gdfo = node.gdfo + 1
193
for child_key in node.child_keys:
194
child = nodes[child_key]
195
if child.gdfo < next_gdfo:
196
# This child is being updated, we need to check its
198
child.gdfo = next_gdfo
199
pending.append(child)
160
heappush(todo, (1, node))
162
max_gdfo = len(self._nodes) + 1
164
gdfo, next = heappop(todo)
166
if next.gdfo is not None and gdfo < next.gdfo:
167
# This node was reached from a longer path, we assume it was
168
# enqued correctly with the longer gdfo, so don't continue
170
assert gdfo < next.gdfo
173
assert next_gdfo <= max_gdfo
174
for child_key in next.child_keys:
175
child_node = nodes[child_key]
176
if child_node.gdfo is None or child_node.gdfo < next_gdfo:
177
# Only enque children when all of their parents have been
179
for parent_key in child_node.parent_keys:
180
# We know that 'this' parent is counted
181
if parent_key != next.key:
182
parent_node = nodes[parent_key]
183
if parent_node.gdfo is None:
186
child_node.gdfo = next_gdfo
187
heappush(todo, (next_gdfo, child_node))
201
189
def heads(self, keys):
202
190
"""Return the heads from amongst keys.
217
206
# NULL_REVISION is only a head if it is the only entry
218
207
candidate_nodes.pop(revision.NULL_REVISION)
219
208
if not candidate_nodes:
220
return frozenset([revision.NULL_REVISION])
209
return set([revision.NULL_REVISION])
221
210
if len(candidate_nodes) < 2:
222
# No or only one candidate
223
211
return frozenset(candidate_nodes)
224
212
heads_key = frozenset(candidate_nodes)
225
# Do we have a cached result ?
213
if heads_key != frozenset(keys):
214
note('%s != %s', heads_key, frozenset(keys))
227
216
heads = self._known_heads[heads_key]
231
# Let's compute the heads
235
for node in candidate_nodes.values():
237
pending.extend(node.parent_keys)
238
if min_gdfo is None or node.gdfo < min_gdfo:
242
node_key = pending.pop()
244
# node already appears in some ancestry
247
node = nodes[node_key]
248
if node.gdfo <= min_gdfo:
251
pending.extend(node.parent_keys)
252
heads = heads_key.difference(seen)
219
pass # compute it ourselves
220
# We could do a check here to see if all nodes have the same
221
# 'linear_dominator'. However, in testing, this only was encountered 1
222
# during 'bzr annotate' so it is likely to not be particularly helpful
224
# Check the linear dominators of these keys, to see if we already
225
# know the heads answer
226
dom_key = frozenset([node.linear_dominator
227
for node in candidate_nodes.itervalues()])
228
if dom_key in self._known_heads:
229
dom_to_node = dict([(node.linear_dominator, key)
230
for key, node in candidate_nodes.iteritems()])
231
# map back into the original keys
232
heads = self._known_heads[dom_key]
233
heads = frozenset([dom_to_node[key] for key in heads])
235
heads = self._heads_from_candidate_nodes(candidate_nodes)
253
236
if self.do_cache:
254
237
self._known_heads[heads_key] = heads
238
# Cache the dominator heads
239
if dom_key is not None:
240
dom_heads = frozenset([candidate_nodes[key].linear_dominator
242
self._known_heads[dom_key] = dom_heads
258
"""Return the nodes in topological order.
260
All parents must occur before all children.
262
for node in self._nodes.itervalues():
263
if node.gdfo is None:
264
raise errors.GraphCycleError(self._nodes)
265
pending = self._find_tails()
266
pending_pop = pending.pop
267
pending_append = pending.append
270
topo_order_append = topo_order.append
272
num_seen_parents = dict.fromkeys(self._nodes, 0)
275
if node.parent_keys is not None:
276
# We don't include ghost parents
277
topo_order_append(node.key)
278
for child_key in node.child_keys:
279
child_node = self._nodes[child_key]
280
seen_parents = num_seen_parents[child_key] + 1
281
if seen_parents == len(child_node.parent_keys):
282
# All parents have been processed, enqueue this child
283
pending_append(child_node)
284
# This has been queued up, stop tracking it
285
del num_seen_parents[child_key]
287
num_seen_parents[child_key] = seen_parents
288
# We started from the parents, so we don't need to do anymore work
292
"""Return a reverse topological ordering which is 'stable'.
294
There are a few constraints:
295
1) Reverse topological (all children before all parents)
297
3) 'stable' sorting, so that we get the same result, independent of
298
machine, or extra data.
299
To do this, we use the same basic algorithm as topo_sort, but when we
300
aren't sure what node to access next, we sort them lexicographically.
302
tips = self._find_tips()
303
# Split the tips based on prefix
306
if node.key.__class__ is str or len(node.key) == 1:
245
def _heads_from_candidate_nodes(self, candidate_nodes):
248
to_cleanup_append = to_cleanup.append
249
for node in candidate_nodes.itervalues():
250
assert node.ancestor_of is None
251
node.ancestor_of = (node.key,)
252
queue.append((-node.gdfo, node))
253
to_cleanup_append(node)
255
# These are nodes that we determined are 'common' that we are no longer
257
# Now we walk nodes until all nodes that are being walked are 'common'
258
num_candidates = len(candidate_nodes)
260
heappop = heapq.heappop
261
heappush = heapq.heappush
262
while queue and len(candidate_nodes) > 1:
263
_, node = heappop(queue)
264
# assert node.ancestor_of is not None
265
next_ancestor_of = node.ancestor_of
266
if len(next_ancestor_of) == num_candidates:
267
# This node is now considered 'common'
268
# Make sure all parent nodes are marked as such
269
for parent_key in node.parent_keys:
270
parent_node = nodes[parent_key]
271
if parent_node.ancestor_of is not None:
272
parent_node.ancestor_of = next_ancestor_of
273
if node.linear_dominator != node.key:
274
parent_node = nodes[node.linear_dominator]
275
if parent_node.ancestor_of is not None:
276
parent_node.ancestor_of = next_ancestor_of
278
if node.parent_keys is None:
281
# Now project the current nodes ancestor list to the parent nodes,
282
# and queue them up to be walked
283
# Note: using linear_dominator speeds things up quite a bit
284
# enough that we actually start to be slightly faster
285
# than the default heads() implementation
286
if node.linear_dominator != node.key:
287
# We are at the tip of a long linear region
288
# We know that there is nothing between here and the tail
289
# that is interesting, so skip to the end
290
parent_keys = [node.linear_dominator]
310
prefix_tips.setdefault(prefix, []).append(node)
312
num_seen_children = dict.fromkeys(self._nodes, 0)
315
for prefix in sorted(prefix_tips):
316
pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
320
if node.parent_keys is None:
321
# Ghost node, skip it
323
result.append(node.key)
324
for parent_key in sorted(node.parent_keys, reverse=True):
325
parent_node = self._nodes[parent_key]
326
seen_children = num_seen_children[parent_key] + 1
327
if seen_children == len(parent_node.child_keys):
328
# All children have been processed, enqueue this parent
329
pending.append(parent_node)
330
# This has been queued up, stop tracking it
331
del num_seen_children[parent_key]
333
num_seen_children[parent_key] = seen_children
336
def merge_sort(self, tip_key):
337
"""Compute the merge sorted graph output."""
338
from bzrlib import tsort
339
as_parent_map = dict((node.key, node.parent_keys)
340
for node in self._nodes.itervalues()
341
if node.parent_keys is not None)
342
# We intentionally always generate revnos and never force the
344
# Strip the sequence_number that merge_sort generates
345
return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
346
for _, key, merge_depth, revno, end_of_merge
347
in tsort.merge_sort(as_parent_map, tip_key,
348
mainline_revisions=None,
349
generate_revno=True)]
351
def get_parent_keys(self, key):
352
"""Get the parents for a key
354
Returns a list containg the parents keys. If the key is a ghost,
355
None is returned. A KeyError will be raised if the key is not in
358
:param keys: Key to check (eg revision_id)
359
:return: A list of parents
361
return self._nodes[key].parent_keys
363
def get_child_keys(self, key):
364
"""Get the children for a key
366
Returns a list containg the children keys. A KeyError will be raised
367
if the key is not in the graph.
369
:param keys: Key to check (eg revision_id)
370
:return: A list of children
372
return self._nodes[key].child_keys
292
parent_keys = node.parent_keys
293
for parent_key in parent_keys:
294
if parent_key in candidate_nodes:
295
candidate_nodes.pop(parent_key)
296
if len(candidate_nodes) <= 1:
298
parent_node = nodes[parent_key]
299
ancestor_of = parent_node.ancestor_of
300
if ancestor_of is None:
301
# This node hasn't been walked yet
302
parent_node.ancestor_of = next_ancestor_of
304
heappush(queue, (-parent_node.gdfo, parent_node))
305
to_cleanup_append(parent_node)
306
elif ancestor_of != next_ancestor_of:
307
# Combine to get the full set of parents
308
all_ancestors = set(ancestor_of)
309
all_ancestors.update(next_ancestor_of)
310
parent_node.ancestor_of = tuple(sorted(all_ancestors))
312
for node in to_cleanup:
313
node.ancestor_of = None
315
return frozenset(candidate_nodes)