1179
1176
self._heads[frozenset(keys)] = frozenset(heads)
1182
class _KnownGraphNode(object):
1183
"""Represents a single object in the known graph."""
1185
__slots__ = ('key', 'parent_keys', 'child_keys', 'linear_dominator',
1186
'dominator_distance', 'gdfo', 'ancestor_of')
1188
def __init__(self, key, parent_keys):
1190
self.parent_keys = parent_keys
1191
self.child_keys = []
1192
self.linear_dominator = None
1193
self.dominator_distance = 0
1194
# Greatest distance from origin
1196
# This will become a tuple of known heads that have this node as an
1198
self.ancestor_of = None
1201
return '%s(%s gdfo:%s par:%s child:%s dom:%s %s)' % (
1202
self.__class__.__name__, self.key, self.gdfo,
1203
self.parent_keys, self.child_keys,
1204
self.linear_dominator, self.dominator_distance)
1207
_counters = [0, 0, 0, 0, 0, 0]
1208
class KnownGraph(object):
1209
"""This is a class which assumes we already know the full graph."""
1211
def __init__(self, parent_map):
1212
"""Create a new KnownGraph instance.
1214
:param parent_map: A dictionary mapping key => parent_keys
1217
self._initialize_nodes(parent_map)
1219
def _initialize_nodes(self, parent_map):
1220
"""Populate self._nodes.
1222
After this has finished, self._nodes will have an entry for every entry
1223
in parent_map. Ghosts will have a parent_keys = None, all nodes found
1224
will also have .child_keys populated with all known child_keys.
1227
for key, parent_keys in parent_map.iteritems():
1230
assert node.parent_keys is None
1231
node.parent_keys = parent_keys
1233
node = _KnownGraphNode(key, parent_keys)
1235
for parent_key in parent_keys:
1237
parent_node = nodes[parent_key]
1239
parent_node = _KnownGraphNode(parent_key, None)
1240
nodes[parent_key] = parent_node
1241
parent_node.child_keys.append(key)
1242
self._find_linear_dominators()
1245
def _find_linear_dominators(self):
1246
"""For each node in the set, find any linear dominators.
1248
For any given node, the 'linear dominator' is an ancestor, such that
1249
all parents between this node and that one have a single parent, and a
1250
single child. So if A->B->C->D then B,C,D all have a linear dominator
1251
of A. Because there are no interesting siblings, we can quickly skip to
1252
the nearest dominator when doing comparisons.
1254
def check_node(node):
1255
if node.parent_keys is None or len(node.parent_keys) != 1:
1256
# This node is either a ghost, a tail, or has multiple parents
1257
# It its own dominator
1258
node.linear_dominator = node.key
1259
node.dominator_distance = 0
1261
parent_node = self._nodes[node.parent_keys[0]]
1262
if len(parent_node.child_keys) > 1:
1263
# The parent has multiple children, so *this* node is the
1265
node.linear_dominator = node.key
1266
node.dominator_distance = 0
1268
# The parent is already filled in, so add and continue
1269
if parent_node.linear_dominator is not None:
1270
node.linear_dominator = parent_node.linear_dominator
1271
node.dominator_distance = parent_node.dominator_distance + 1
1273
# We don't know this node, or its parent node, so start walking to
1277
for node in self._nodes.itervalues():
1278
# The parent is not filled in, so walk until we get somewhere
1279
if node.linear_dominator is not None: #already done
1281
next_node = check_node(node)
1282
if next_node is None:
1283
# Nothing more needs to be done
1286
while next_node is not None:
1289
next_node = check_node(node)
1290
# The stack now contains the linear chain, and 'node' should have
1292
assert node.linear_dominator is not None
1293
dominator = node.linear_dominator
1295
next_node = stack.pop()
1296
next_node.linear_dominator = dominator
1297
next_node.dominator_distance = node.dominator_distance + 1
1300
def _find_gdfo(self):
1301
# TODO: Consider moving the tails search into the first-pass over the
1302
# data, inside _find_linear_dominators
1304
return [node for node in self._nodes.itervalues()
1305
if not node.parent_keys]
1306
tails = find_tails()
1310
heapq.heappush(todo, (1, node))
1313
max_gdfo = len(self._nodes) + 1
1315
gdfo, next = heapq.heappop(todo)
1317
if next.gdfo is not None and gdfo < next.gdfo:
1318
# This node was reached from a longer path, we assume it was
1319
# enqued correctly with the longer gdfo, so don't continue
1321
assert gdfo < next.gdfo
1324
next_gdfo = gdfo + 1
1325
assert next_gdfo <= max_gdfo
1326
for child_key in next.child_keys:
1327
child_node = self._nodes[child_key]
1328
if child_node.gdfo is None or child_node.gdfo < next_gdfo:
1329
# Only enque children when all of their parents have been
1331
for parent_key in child_node.parent_keys:
1332
# We know that 'this' parent is counted
1333
if parent_key != next.key:
1334
parent_node = self._nodes[parent_key]
1335
if parent_node.gdfo is None:
1338
child_node.gdfo = next_gdfo
1339
heapq.heappush(todo, (next_gdfo, child_node))
1341
def heads(self, keys):
1342
"""Return the heads from amongst keys.
1344
This is done by searching the ancestries of each key. Any key that is
1345
reachable from another key is not returned; all the others are.
1347
This operation scales with the relative depth between any two keys. If
1348
any two keys are completely disconnected all ancestry of both sides
1351
:param keys: An iterable of keys.
1352
:return: A set of the heads. Note that as a set there is no ordering
1353
information. Callers will need to filter their input to create
1354
order if they need it.
1356
candidate_nodes = dict((key, self._nodes[key]) for key in keys)
1357
_counters[0] += len(candidate_nodes)
1358
if revision.NULL_REVISION in candidate_nodes:
1359
# NULL_REVISION is only a head if it is the only entry
1360
candidate_nodes.pop(revision.NULL_REVISION)
1361
if not candidate_nodes:
1362
return set([revision.NULL_REVISION])
1363
if len(candidate_nodes) < 2:
1364
return set(candidate_nodes)
1366
# TODO: We could optimize for the len(candidate_nodes) > 2 by checking
1367
# for *any* pair-wise matching, and then eliminating one of the
1368
# nodes trivially. However, the fairly common case is just 2
1369
# keys, so we'll focus on that, first
1370
for node in candidate_nodes.itervalues():
1371
if dominator is None:
1372
dominator = node.linear_dominator
1373
elif dominator != node.linear_dominator:
1376
# All of these nodes have the same linear_dominator, which means
1377
# they are in a line, the head is just the one with the highest
1379
def get_distance(key):
1380
return self._nodes[key].dominator_distance
1381
def get_linear_head():
1382
return max(candidate_nodes, key=get_distance)
1383
return set([get_linear_head()])
1384
return self._heads_from_candidate_nodes(candidate_nodes)
1386
def _heads_from_candidate_nodes(self, candidate_nodes):
1389
to_cleanup_append = to_cleanup.append
1390
for node in candidate_nodes.itervalues():
1391
assert node.ancestor_of is None
1392
node.ancestor_of = (node.key,)
1393
queue.append((-node.gdfo, node))
1394
to_cleanup_append(node)
1395
heapq.heapify(queue)
1396
# These are nodes that we determined are 'common' that we are no longer
1398
# Now we walk nodes until all nodes that are being walked are 'common'
1399
num_candidates = len(candidate_nodes)
1400
counters = _counters
1402
heappop = heapq.heappop
1403
heappush = heapq.heappush
1404
while queue and len(candidate_nodes) > 1:
1406
_, next = heappop(queue)
1407
# assert next.ancestor_of is not None
1408
next_ancestor_of = next.ancestor_of
1409
if len(next_ancestor_of) == num_candidates:
1410
# This node is now considered 'common'
1411
# Make sure all parent nodes are marked as such
1412
for parent_key in node.parent_keys:
1414
parent_node = nodes[parent_key]
1415
if parent_node.ancestor_of is not None:
1416
parent_node.ancestor_of = next_ancestor_of
1417
if node.linear_dominator != node.key:
1419
parent_node = nodes[node.linear_dominator]
1420
if parent_node.ancestor_of is not None:
1421
parent_node.ancestor_of = next_ancestor_of
1423
if next.parent_keys is None:
1426
# Now project the current nodes ancestor list to the parent nodes,
1427
# and queue them up to be walked
1428
# Note: using linear_dominator speeds things up quite a bit
1429
# enough that we actually start to be slightly faster
1430
# than the default heads() implementation
1431
if next.linear_dominator != next.key:
1432
# We are at the tip of a long linear region
1433
# We know that there is nothing between here and the tail
1434
# that is interesting, so skip to the end
1436
parent_keys = [next.linear_dominator]
1438
parent_keys = next.parent_keys
1439
for parent_key in parent_keys:
1441
if parent_key in candidate_nodes:
1442
candidate_nodes.pop(parent_key)
1443
if len(candidate_nodes) <= 1:
1446
parent_node = nodes[parent_key]
1447
ancestor_of = parent_node.ancestor_of
1448
if ancestor_of is None:
1450
# This node hasn't been walked yet
1451
parent_node.ancestor_of = next_ancestor_of
1453
heappush(queue, (-parent_node.gdfo, parent_node))
1454
to_cleanup_append(parent_node)
1455
elif ancestor_of != next_ancestor_of:
1457
# Combine to get the full set of parents
1458
all_ancestors = set(ancestor_of)
1459
all_ancestors.update(next_ancestor_of)
1460
parent_node.ancestor_of = tuple(sorted(all_ancestors))
1462
for node in to_cleanup:
1464
node.ancestor_of = None
1466
return set(candidate_nodes)
1468
def get_parent_map(self, keys):
1469
# Thunk to match the Graph._parents_provider api.
1470
nodes = [self._nodes[key] for key in keys]
1471
return dict((node.key, node.parent_keys)
1472
for node in nodes if node.parent_keys is not None)
1476
1179
class _BreadthFirstSearcher(object):
1477
1180
"""Parallel search breadth-first the ancestry of revisions.