1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
29
STEP_UNIQUE_SEARCHER_EVERY = 5
31
# DIAGRAM of terminology
41
# In this diagram, relative to G and H:
42
# A, B, C, D, E are common ancestors.
43
# C, D and E are border ancestors, because each has a non-common descendant.
44
# D and E are least common ancestors because none of their descendants are
46
# C is not a least common ancestor because its descendant, E, is a common
49
# The find_unique_lca algorithm will pick A in two steps:
50
# 1. find_lca('G', 'H') => ['D', 'E']
51
# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']
54
class DictParentsProvider(object):
55
"""A parents provider for Graph objects."""
57
def __init__(self, ancestry):
58
self.ancestry = ancestry
61
return 'DictParentsProvider(%r)' % self.ancestry
63
# Note: DictParentsProvider does not implement get_cached_parent_map
64
# Arguably, the data is clearly cached in memory. However, this class
65
# is mostly used for testing, and it keeps the tests clean to not
68
def get_parent_map(self, keys):
69
"""See StackedParentsProvider.get_parent_map"""
70
ancestry = self.ancestry
71
return dict([(k, ancestry[k]) for k in keys if k in ancestry])
74
class StackedParentsProvider(object):
75
"""A parents provider which stacks (or unions) multiple providers.
77
The providers are queries in the order of the provided parent_providers.
80
def __init__(self, parent_providers):
81
self._parent_providers = parent_providers
84
return "%s(%r)" % (self.__class__.__name__, self._parent_providers)
86
def get_parent_map(self, keys):
87
"""Get a mapping of keys => parents
89
A dictionary is returned with an entry for each key present in this
90
source. If this source doesn't have information about a key, it should
93
[NULL_REVISION] is used as the parent of the first user-committed
94
revision. Its parent list is empty.
96
:param keys: An iterable returning keys to check (eg revision_ids)
97
:return: A dictionary mapping each key to its parents
100
remaining = set(keys)
101
# This adds getattr() overhead to each get_parent_map call. However,
102
# this is StackedParentsProvider, which means we're dealing with I/O
103
# (either local indexes, or remote RPCs), so CPU overhead should be
105
for parents_provider in self._parent_providers:
106
get_cached = getattr(parents_provider, 'get_cached_parent_map',
108
if get_cached is None:
110
new_found = get_cached(remaining)
111
found.update(new_found)
112
remaining.difference_update(new_found)
117
for parents_provider in self._parent_providers:
119
new_found = parents_provider.get_parent_map(remaining)
120
except errors.UnsupportedOperation:
122
found.update(new_found)
123
remaining.difference_update(new_found)
129
class CachingParentsProvider(object):
130
"""A parents provider which will cache the revision => parents as a dict.
132
This is useful for providers which have an expensive look up.
134
Either a ParentsProvider or a get_parent_map-like callback may be
135
supplied. If it provides extra un-asked-for parents, they will be cached,
136
but filtered out of get_parent_map.
138
The cache is enabled by default, but may be disabled and re-enabled.
141
def __init__(self, parent_provider=None, get_parent_map=None):
144
:param parent_provider: The ParentProvider to use. It or
145
get_parent_map must be supplied.
146
:param get_parent_map: The get_parent_map callback to use. It or
147
parent_provider must be supplied.
149
self._real_provider = parent_provider
150
if get_parent_map is None:
151
self._get_parent_map = self._real_provider.get_parent_map
153
self._get_parent_map = get_parent_map
155
self.enable_cache(True)
158
return "%s(%r)" % (self.__class__.__name__, self._real_provider)
160
def enable_cache(self, cache_misses=True):
162
if self._cache is not None:
163
raise AssertionError('Cache enabled when already enabled.')
165
self._cache_misses = cache_misses
166
self.missing_keys = set()
168
def disable_cache(self):
169
"""Disable and clear the cache."""
171
self._cache_misses = None
172
self.missing_keys = set()
174
def get_cached_map(self):
175
"""Return any cached get_parent_map values."""
176
if self._cache is None:
178
return dict(self._cache)
180
def get_cached_parent_map(self, keys):
181
"""Return items from the cache.
183
This returns the same info as get_parent_map, but explicitly does not
184
invoke the supplied ParentsProvider to search for uncached values.
189
return dict([(key, cache[key]) for key in keys if key in cache])
191
def get_parent_map(self, keys):
192
"""See StackedParentsProvider.get_parent_map."""
195
cache = self._get_parent_map(keys)
197
needed_revisions = set(key for key in keys if key not in cache)
198
# Do not ask for negatively cached keys
199
needed_revisions.difference_update(self.missing_keys)
201
parent_map = self._get_parent_map(needed_revisions)
202
cache.update(parent_map)
203
if self._cache_misses:
204
for key in needed_revisions:
205
if key not in parent_map:
206
self.note_missing_key(key)
209
value = cache.get(key)
210
if value is not None:
214
def note_missing_key(self, key):
215
"""Note that key is a missing key."""
216
if self._cache_misses:
217
self.missing_keys.add(key)
220
class CallableToParentsProviderAdapter(object):
221
"""A parents provider that adapts any callable to the parents provider API.
223
i.e. it accepts calls to self.get_parent_map and relays them to the
224
callable it was constructed with.
227
def __init__(self, a_callable):
228
self.callable = a_callable
231
return "%s(%r)" % (self.__class__.__name__, self.callable)
233
def get_parent_map(self, keys):
234
return self.callable(keys)
238
"""Provide incremental access to revision graphs.
240
This is the generic implementation; it is intended to be subclassed to
241
specialize it for other repository types.
244
def __init__(self, parents_provider):
245
"""Construct a Graph that uses several graphs as its input
247
This should not normally be invoked directly, because there may be
248
specialized implementations for particular repository types. See
249
Repository.get_graph().
251
:param parents_provider: An object providing a get_parent_map call
252
conforming to the behavior of
253
StackedParentsProvider.get_parent_map.
255
if getattr(parents_provider, 'get_parents', None) is not None:
256
self.get_parents = parents_provider.get_parents
257
if getattr(parents_provider, 'get_parent_map', None) is not None:
258
self.get_parent_map = parents_provider.get_parent_map
259
self._parents_provider = parents_provider
262
return 'Graph(%r)' % self._parents_provider
264
def find_lca(self, *revisions):
265
"""Determine the lowest common ancestors of the provided revisions
267
A lowest common ancestor is a common ancestor none of whose
268
descendants are common ancestors. In graphs, unlike trees, there may
269
be multiple lowest common ancestors.
271
This algorithm has two phases. Phase 1 identifies border ancestors,
272
and phase 2 filters border ancestors to determine lowest common
275
In phase 1, border ancestors are identified, using a breadth-first
276
search starting at the bottom of the graph. Searches are stopped
277
whenever a node or one of its descendants is determined to be common
279
In phase 2, the border ancestors are filtered to find the least
280
common ancestors. This is done by searching the ancestries of each
283
Phase 2 is perfomed on the principle that a border ancestor that is
284
not an ancestor of any other border ancestor is a least common
287
Searches are stopped when they find a node that is determined to be a
288
common ancestor of all border ancestors, because this shows that it
289
cannot be a descendant of any border ancestor.
291
The scaling of this operation should be proportional to:
293
1. The number of uncommon ancestors
294
2. The number of border ancestors
295
3. The length of the shortest path between a border ancestor and an
296
ancestor of all border ancestors.
298
border_common, common, sides = self._find_border_ancestors(revisions)
299
# We may have common ancestors that can be reached from each other.
300
# - ask for the heads of them to filter it down to only ones that
301
# cannot be reached from each other - phase 2.
302
return self.heads(border_common)
304
def find_difference(self, left_revision, right_revision):
305
"""Determine the graph difference between two revisions"""
306
border, common, searchers = self._find_border_ancestors(
307
[left_revision, right_revision])
308
self._search_for_extra_common(common, searchers)
309
left = searchers[0].seen
310
right = searchers[1].seen
311
return (left.difference(right), right.difference(left))
313
def find_descendants(self, old_key, new_key):
314
"""Find descendants of old_key that are ancestors of new_key."""
315
child_map = self.get_child_map(self._find_descendant_ancestors(
317
graph = Graph(DictParentsProvider(child_map))
318
searcher = graph._make_breadth_first_searcher([old_key])
322
def _find_descendant_ancestors(self, old_key, new_key):
323
"""Find ancestors of new_key that may be descendants of old_key."""
324
stop = self._make_breadth_first_searcher([old_key])
325
descendants = self._make_breadth_first_searcher([new_key])
326
for revisions in descendants:
327
old_stop = stop.seen.intersection(revisions)
328
descendants.stop_searching_any(old_stop)
329
seen_stop = descendants.find_seen_ancestors(stop.step())
330
descendants.stop_searching_any(seen_stop)
331
return descendants.seen.difference(stop.seen)
333
def get_child_map(self, keys):
334
"""Get a mapping from parents to children of the specified keys.
336
This is simply the inversion of get_parent_map. Only supplied keys
337
will be discovered as children.
338
:return: a dict of key:child_list for keys.
340
parent_map = self._parents_provider.get_parent_map(keys)
342
for child, parents in sorted(parent_map.items()):
343
for parent in parents:
344
parent_child.setdefault(parent, []).append(child)
347
def find_distance_to_null(self, target_revision_id, known_revision_ids):
348
"""Find the left-hand distance to the NULL_REVISION.
350
(This can also be considered the revno of a branch at
353
:param target_revision_id: A revision_id which we would like to know
355
:param known_revision_ids: [(revision_id, revno)] A list of known
356
revno, revision_id tuples. We'll use this to seed the search.
358
# Map from revision_ids to a known value for their revno
359
known_revnos = dict(known_revision_ids)
360
cur_tip = target_revision_id
362
NULL_REVISION = revision.NULL_REVISION
363
known_revnos[NULL_REVISION] = 0
365
searching_known_tips = list(known_revnos)
367
unknown_searched = {}
369
while cur_tip not in known_revnos:
370
unknown_searched[cur_tip] = num_steps
372
to_search = {cur_tip}
373
to_search.update(searching_known_tips)
374
parent_map = self.get_parent_map(to_search)
375
parents = parent_map.get(cur_tip, None)
376
if not parents: # An empty list or None is a ghost
377
raise errors.GhostRevisionsHaveNoRevno(target_revision_id,
381
for revision_id in searching_known_tips:
382
parents = parent_map.get(revision_id, None)
386
next_revno = known_revnos[revision_id] - 1
387
if next in unknown_searched:
388
# We have enough information to return a value right now
389
return next_revno + unknown_searched[next]
390
if next in known_revnos:
392
known_revnos[next] = next_revno
393
next_known_tips.append(next)
394
searching_known_tips = next_known_tips
396
# We reached a known revision, so just add in how many steps it took to
398
return known_revnos[cur_tip] + num_steps
400
def find_lefthand_distances(self, keys):
401
"""Find the distance to null for all the keys in keys.
403
:param keys: keys to lookup.
404
:return: A dict key->distance for all of keys.
406
# Optimisable by concurrent searching, but a random spread should get
407
# some sort of hit rate.
413
(key, self.find_distance_to_null(key, known_revnos)))
414
except errors.GhostRevisionsHaveNoRevno:
417
known_revnos.append((key, -1))
418
return dict(known_revnos)
420
def find_unique_ancestors(self, unique_revision, common_revisions):
421
"""Find the unique ancestors for a revision versus others.
423
This returns the ancestry of unique_revision, excluding all revisions
424
in the ancestry of common_revisions. If unique_revision is in the
425
ancestry, then the empty set will be returned.
427
:param unique_revision: The revision_id whose ancestry we are
429
(XXX: Would this API be better if we allowed multiple revisions on
430
to be searched here?)
431
:param common_revisions: Revision_ids of ancestries to exclude.
432
:return: A set of revisions in the ancestry of unique_revision
434
if unique_revision in common_revisions:
437
# Algorithm description
438
# 1) Walk backwards from the unique node and all common nodes.
439
# 2) When a node is seen by both sides, stop searching it in the unique
440
# walker, include it in the common walker.
441
# 3) Stop searching when there are no nodes left for the unique walker.
442
# At this point, you have a maximal set of unique nodes. Some of
443
# them may actually be common, and you haven't reached them yet.
444
# 4) Start new searchers for the unique nodes, seeded with the
445
# information you have so far.
446
# 5) Continue searching, stopping the common searches when the search
447
# tip is an ancestor of all unique nodes.
448
# 6) Aggregate together unique searchers when they are searching the
449
# same tips. When all unique searchers are searching the same node,
450
# stop move it to a single 'all_unique_searcher'.
451
# 7) The 'all_unique_searcher' represents the very 'tip' of searching.
452
# Most of the time this produces very little important information.
453
# So don't step it as quickly as the other searchers.
454
# 8) Search is done when all common searchers have completed.
456
unique_searcher, common_searcher = self._find_initial_unique_nodes(
457
[unique_revision], common_revisions)
459
unique_nodes = unique_searcher.seen.difference(common_searcher.seen)
463
(all_unique_searcher,
464
unique_tip_searchers) = self._make_unique_searchers(
465
unique_nodes, unique_searcher, common_searcher)
467
self._refine_unique_nodes(unique_searcher, all_unique_searcher,
468
unique_tip_searchers, common_searcher)
469
true_unique_nodes = unique_nodes.difference(common_searcher.seen)
470
if 'graph' in debug.debug_flags:
471
trace.mutter('Found %d truly unique nodes out of %d',
472
len(true_unique_nodes), len(unique_nodes))
473
return true_unique_nodes
475
def _find_initial_unique_nodes(self, unique_revisions, common_revisions):
476
"""Steps 1-3 of find_unique_ancestors.
478
Find the maximal set of unique nodes. Some of these might actually
479
still be common, but we are sure that there are no other unique nodes.
481
:return: (unique_searcher, common_searcher)
484
unique_searcher = self._make_breadth_first_searcher(unique_revisions)
485
# we know that unique_revisions aren't in common_revisions, so skip
487
next(unique_searcher)
488
common_searcher = self._make_breadth_first_searcher(common_revisions)
490
# As long as we are still finding unique nodes, keep searching
491
while unique_searcher._next_query:
492
next_unique_nodes = set(unique_searcher.step())
493
next_common_nodes = set(common_searcher.step())
495
# Check if either searcher encounters new nodes seen by the other
497
unique_are_common_nodes = next_unique_nodes.intersection(
498
common_searcher.seen)
499
unique_are_common_nodes.update(
500
next_common_nodes.intersection(unique_searcher.seen))
501
if unique_are_common_nodes:
502
ancestors = unique_searcher.find_seen_ancestors(
503
unique_are_common_nodes)
504
# TODO: This is a bit overboard, we only really care about
505
# the ancestors of the tips because the rest we
506
# already know. This is *correct* but causes us to
507
# search too much ancestry.
509
common_searcher.find_seen_ancestors(ancestors))
510
unique_searcher.stop_searching_any(ancestors)
511
common_searcher.start_searching(ancestors)
513
return unique_searcher, common_searcher
515
def _make_unique_searchers(self, unique_nodes, unique_searcher,
517
"""Create a searcher for all the unique search tips (step 4).
519
As a side effect, the common_searcher will stop searching any nodes
520
that are ancestors of the unique searcher tips.
522
:return: (all_unique_searcher, unique_tip_searchers)
524
unique_tips = self._remove_simple_descendants(
525
unique_nodes, self.get_parent_map(unique_nodes))
527
if len(unique_tips) == 1:
528
unique_tip_searchers = []
529
ancestor_all_unique = unique_searcher.find_seen_ancestors(
532
unique_tip_searchers = []
533
for tip in unique_tips:
534
revs_to_search = unique_searcher.find_seen_ancestors([tip])
535
revs_to_search.update(
536
common_searcher.find_seen_ancestors(revs_to_search))
537
searcher = self._make_breadth_first_searcher(revs_to_search)
538
# We don't care about the starting nodes.
539
searcher._label = tip
541
unique_tip_searchers.append(searcher)
543
ancestor_all_unique = None
544
for searcher in unique_tip_searchers:
545
if ancestor_all_unique is None:
546
ancestor_all_unique = set(searcher.seen)
548
ancestor_all_unique = ancestor_all_unique.intersection(
550
# Collapse all the common nodes into a single searcher
551
all_unique_searcher = self._make_breadth_first_searcher(
553
if ancestor_all_unique:
554
# We've seen these nodes in all the searchers, so we'll just go to
556
all_unique_searcher.step()
558
# Stop any search tips that are already known as ancestors of the
560
stopped_common = common_searcher.stop_searching_any(
561
common_searcher.find_seen_ancestors(ancestor_all_unique))
564
for searcher in unique_tip_searchers:
565
total_stopped += len(searcher.stop_searching_any(
566
searcher.find_seen_ancestors(ancestor_all_unique)))
567
if 'graph' in debug.debug_flags:
568
trace.mutter('For %d unique nodes, created %d + 1 unique searchers'
569
' (%d stopped search tips, %d common ancestors'
570
' (%d stopped common)',
571
len(unique_nodes), len(unique_tip_searchers),
572
total_stopped, len(ancestor_all_unique),
574
return all_unique_searcher, unique_tip_searchers
576
def _step_unique_and_common_searchers(self, common_searcher,
577
unique_tip_searchers,
579
"""Step all the searchers"""
580
newly_seen_common = set(common_searcher.step())
581
newly_seen_unique = set()
582
for searcher in unique_tip_searchers:
583
next = set(searcher.step())
584
next.update(unique_searcher.find_seen_ancestors(next))
585
next.update(common_searcher.find_seen_ancestors(next))
586
for alt_searcher in unique_tip_searchers:
587
if alt_searcher is searcher:
589
next.update(alt_searcher.find_seen_ancestors(next))
590
searcher.start_searching(next)
591
newly_seen_unique.update(next)
592
return newly_seen_common, newly_seen_unique
594
def _find_nodes_common_to_all_unique(self, unique_tip_searchers,
596
newly_seen_unique, step_all_unique):
597
"""Find nodes that are common to all unique_tip_searchers.
599
If it is time, step the all_unique_searcher, and add its nodes to the
602
common_to_all_unique_nodes = newly_seen_unique.copy()
603
for searcher in unique_tip_searchers:
604
common_to_all_unique_nodes.intersection_update(searcher.seen)
605
common_to_all_unique_nodes.intersection_update(
606
all_unique_searcher.seen)
607
# Step all-unique less frequently than the other searchers.
608
# In the common case, we don't need to spider out far here, so
609
# avoid doing extra work.
611
tstart = osutils.perf_counter()
612
nodes = all_unique_searcher.step()
613
common_to_all_unique_nodes.update(nodes)
614
if 'graph' in debug.debug_flags:
615
tdelta = osutils.perf_counter() - tstart
616
trace.mutter('all_unique_searcher step() took %.3fs'
617
'for %d nodes (%d total), iteration: %s',
618
tdelta, len(nodes), len(all_unique_searcher.seen),
619
all_unique_searcher._iterations)
620
return common_to_all_unique_nodes
622
def _collapse_unique_searchers(self, unique_tip_searchers,
623
common_to_all_unique_nodes):
624
"""Combine searchers that are searching the same tips.
626
When two searchers are searching the same tips, we can stop one of the
627
searchers. We also know that the maximal set of common ancestors is the
628
intersection of the two original searchers.
630
:return: A list of searchers that are searching unique nodes.
632
# Filter out searchers that don't actually search different
633
# nodes. We already have the ancestry intersection for them
634
unique_search_tips = {}
635
for searcher in unique_tip_searchers:
636
stopped = searcher.stop_searching_any(common_to_all_unique_nodes)
637
will_search_set = frozenset(searcher._next_query)
638
if not will_search_set:
639
if 'graph' in debug.debug_flags:
640
trace.mutter('Unique searcher %s was stopped.'
641
' (%s iterations) %d nodes stopped',
643
searcher._iterations,
645
elif will_search_set not in unique_search_tips:
646
# This searcher is searching a unique set of nodes, let it
647
unique_search_tips[will_search_set] = [searcher]
649
unique_search_tips[will_search_set].append(searcher)
650
# TODO: it might be possible to collapse searchers faster when they
651
# only have *some* search tips in common.
652
next_unique_searchers = []
653
for searchers in unique_search_tips.values():
654
if len(searchers) == 1:
655
# Searching unique tips, go for it
656
next_unique_searchers.append(searchers[0])
658
# These searchers have started searching the same tips, we
659
# don't need them to cover the same ground. The
660
# intersection of their ancestry won't change, so create a
661
# new searcher, combining their histories.
662
next_searcher = searchers[0]
663
for searcher in searchers[1:]:
664
next_searcher.seen.intersection_update(searcher.seen)
665
if 'graph' in debug.debug_flags:
666
trace.mutter('Combining %d searchers into a single'
667
' searcher searching %d nodes with'
670
len(next_searcher._next_query),
671
len(next_searcher.seen))
672
next_unique_searchers.append(next_searcher)
673
return next_unique_searchers
675
def _refine_unique_nodes(self, unique_searcher, all_unique_searcher,
676
unique_tip_searchers, common_searcher):
677
"""Steps 5-8 of find_unique_ancestors.
679
This function returns when common_searcher has stopped searching for
682
# We step the ancestor_all_unique searcher only every
683
# STEP_UNIQUE_SEARCHER_EVERY steps.
684
step_all_unique_counter = 0
685
# While we still have common nodes to search
686
while common_searcher._next_query:
688
newly_seen_unique) = self._step_unique_and_common_searchers(
689
common_searcher, unique_tip_searchers, unique_searcher)
690
# These nodes are common ancestors of all unique nodes
691
common_to_all_unique_nodes = self._find_nodes_common_to_all_unique(
692
unique_tip_searchers, all_unique_searcher, newly_seen_unique,
693
step_all_unique_counter == 0)
694
step_all_unique_counter = ((step_all_unique_counter + 1)
695
% STEP_UNIQUE_SEARCHER_EVERY)
697
if newly_seen_common:
698
# If a 'common' node is an ancestor of all unique searchers, we
699
# can stop searching it.
700
common_searcher.stop_searching_any(
701
all_unique_searcher.seen.intersection(newly_seen_common))
702
if common_to_all_unique_nodes:
703
common_to_all_unique_nodes.update(
704
common_searcher.find_seen_ancestors(
705
common_to_all_unique_nodes))
706
# The all_unique searcher can start searching the common nodes
707
# but everyone else can stop.
708
# This is the sort of thing where we would like to not have it
709
# start_searching all of the nodes, but only mark all of them
710
# as seen, and have it search only the actual tips. Otherwise
711
# it is another get_parent_map() traversal for it to figure out
712
# what we already should know.
713
all_unique_searcher.start_searching(common_to_all_unique_nodes)
714
common_searcher.stop_searching_any(common_to_all_unique_nodes)
716
next_unique_searchers = self._collapse_unique_searchers(
717
unique_tip_searchers, common_to_all_unique_nodes)
718
if len(unique_tip_searchers) != len(next_unique_searchers):
719
if 'graph' in debug.debug_flags:
720
trace.mutter('Collapsed %d unique searchers => %d'
722
len(unique_tip_searchers),
723
len(next_unique_searchers),
724
all_unique_searcher._iterations)
725
unique_tip_searchers = next_unique_searchers
727
def get_parent_map(self, revisions):
728
"""Get a map of key:parent_list for revisions.
730
This implementation delegates to get_parents, for old parent_providers
731
that do not supply get_parent_map.
734
for rev, parents in self.get_parents(revisions):
735
if parents is not None:
736
result[rev] = parents
739
def _make_breadth_first_searcher(self, revisions):
740
return _BreadthFirstSearcher(revisions, self)
742
def _find_border_ancestors(self, revisions):
743
"""Find common ancestors with at least one uncommon descendant.
745
Border ancestors are identified using a breadth-first
746
search starting at the bottom of the graph. Searches are stopped
747
whenever a node or one of its descendants is determined to be common.
749
This will scale with the number of uncommon ancestors.
751
As well as the border ancestors, a set of seen common ancestors and a
752
list of sets of seen ancestors for each input revision is returned.
753
This allows calculation of graph difference from the results of this
756
if None in revisions:
757
raise errors.InvalidRevisionId(None, self)
758
common_ancestors = set()
759
searchers = [self._make_breadth_first_searcher([r])
761
border_ancestors = set()
765
for searcher in searchers:
766
new_ancestors = searcher.step()
768
newly_seen.update(new_ancestors)
770
for revision in newly_seen:
771
if revision in common_ancestors:
772
# Not a border ancestor because it was seen as common
774
new_common.add(revision)
776
for searcher in searchers:
777
if revision not in searcher.seen:
780
# This is a border because it is a first common that we see
781
# after walking for a while.
782
border_ancestors.add(revision)
783
new_common.add(revision)
785
for searcher in searchers:
786
new_common.update(searcher.find_seen_ancestors(new_common))
787
for searcher in searchers:
788
searcher.start_searching(new_common)
789
common_ancestors.update(new_common)
791
# Figure out what the searchers will be searching next, and if
792
# there is only 1 set being searched, then we are done searching,
793
# since all searchers would have to be searching the same data,
794
# thus it *must* be in common.
795
unique_search_sets = set()
796
for searcher in searchers:
797
will_search_set = frozenset(searcher._next_query)
798
if will_search_set not in unique_search_sets:
799
# This searcher is searching a unique set of nodes, let it
800
unique_search_sets.add(will_search_set)
802
if len(unique_search_sets) == 1:
803
nodes = unique_search_sets.pop()
804
uncommon_nodes = nodes.difference(common_ancestors)
806
raise AssertionError("Somehow we ended up converging"
807
" without actually marking them as"
810
"\nuncommon_nodes: %s"
811
% (revisions, uncommon_nodes))
813
return border_ancestors, common_ancestors, searchers
815
def heads(self, keys):
816
"""Return the heads from amongst keys.
818
This is done by searching the ancestries of each key. Any key that is
819
reachable from another key is not returned; all the others are.
821
This operation scales with the relative depth between any two keys. If
822
any two keys are completely disconnected all ancestry of both sides
825
:param keys: An iterable of keys.
826
:return: A set of the heads. Note that as a set there is no ordering
827
information. Callers will need to filter their input to create
828
order if they need it.
830
candidate_heads = set(keys)
831
if revision.NULL_REVISION in candidate_heads:
832
# NULL_REVISION is only a head if it is the only entry
833
candidate_heads.remove(revision.NULL_REVISION)
834
if not candidate_heads:
835
return {revision.NULL_REVISION}
836
if len(candidate_heads) < 2:
837
return candidate_heads
838
searchers = dict((c, self._make_breadth_first_searcher([c]))
839
for c in candidate_heads)
840
active_searchers = dict(searchers)
841
# skip over the actual candidate for each searcher
842
for searcher in active_searchers.values():
844
# The common walker finds nodes that are common to two or more of the
845
# input keys, so that we don't access all history when a currently
846
# uncommon search point actually meets up with something behind a
847
# common search point. Common search points do not keep searches
848
# active; they just allow us to make searches inactive without
849
# accessing all history.
850
common_walker = self._make_breadth_first_searcher([])
851
while len(active_searchers) > 0:
856
except StopIteration:
857
# No common points being searched at this time.
859
for candidate in list(active_searchers):
861
searcher = active_searchers[candidate]
863
# rare case: we deleted candidate in a previous iteration
864
# through this for loop, because it was determined to be
865
# a descendant of another candidate.
868
ancestors.update(next(searcher))
869
except StopIteration:
870
del active_searchers[candidate]
872
# process found nodes
874
for ancestor in ancestors:
875
if ancestor in candidate_heads:
876
candidate_heads.remove(ancestor)
877
del searchers[ancestor]
878
if ancestor in active_searchers:
879
del active_searchers[ancestor]
880
# it may meet up with a known common node
881
if ancestor in common_walker.seen:
882
# some searcher has encountered our known common nodes:
884
ancestor_set = {ancestor}
885
for searcher in searchers.values():
886
searcher.stop_searching_any(ancestor_set)
888
# or it may have been just reached by all the searchers:
889
for searcher in searchers.values():
890
if ancestor not in searcher.seen:
893
# The final active searcher has just reached this node,
894
# making it be known as a descendant of all candidates,
895
# so we can stop searching it, and any seen ancestors
896
new_common.add(ancestor)
897
for searcher in searchers.values():
899
searcher.find_seen_ancestors([ancestor])
900
searcher.stop_searching_any(seen_ancestors)
901
common_walker.start_searching(new_common)
902
return candidate_heads
904
def find_merge_order(self, tip_revision_id, lca_revision_ids):
905
"""Find the order that each revision was merged into tip.
907
This basically just walks backwards with a stack, and walks left-first
908
until it finds a node to stop.
910
if len(lca_revision_ids) == 1:
911
return list(lca_revision_ids)
912
looking_for = set(lca_revision_ids)
913
# TODO: Is there a way we could do this "faster" by batching up the
914
# get_parent_map requests?
915
# TODO: Should we also be culling the ancestry search right away? We
916
# could add looking_for to the "stop" list, and walk their
917
# ancestry in batched mode. The flip side is it might mean we walk a
918
# lot of "stop" nodes, rather than only the minimum.
919
# Then again, without it we may trace back into ancestry we could have
921
stack = [tip_revision_id]
924
while stack and looking_for:
927
if next in looking_for:
929
looking_for.remove(next)
930
if len(looking_for) == 1:
931
found.append(looking_for.pop())
934
parent_ids = self.get_parent_map([next]).get(next, None)
935
if not parent_ids: # Ghost, nothing to search here
937
for parent_id in reversed(parent_ids):
938
# TODO: (performance) We see the parent at this point, but we
939
# wait to mark it until later to make sure we get left
940
# parents before right parents. However, instead of
941
# waiting until we have traversed enough parents, we
942
# could instead note that we've found it, and once all
943
# parents are in the stack, just reverse iterate the
945
if parent_id not in stop:
946
# this will need to be searched
947
stack.append(parent_id)
951
def find_lefthand_merger(self, merged_key, tip_key):
952
"""Find the first lefthand ancestor of tip_key that merged merged_key.
954
We do this by first finding the descendants of merged_key, then
955
walking through the lefthand ancestry of tip_key until we find a key
956
that doesn't descend from merged_key. Its child is the key that
959
:return: The first lefthand ancestor of tip_key to merge merged_key.
960
merged_key if it is a lefthand ancestor of tip_key.
961
None if no ancestor of tip_key merged merged_key.
963
descendants = self.find_descendants(merged_key, tip_key)
964
candidate_iterator = self.iter_lefthand_ancestry(tip_key)
965
last_candidate = None
966
for candidate in candidate_iterator:
967
if candidate not in descendants:
968
return last_candidate
969
last_candidate = candidate
971
def find_unique_lca(self, left_revision, right_revision,
973
"""Find a unique LCA.
975
Find lowest common ancestors. If there is no unique common
976
ancestor, find the lowest common ancestors of those ancestors.
978
Iteration stops when a unique lowest common ancestor is found.
979
The graph origin is necessarily a unique lowest common ancestor.
981
Note that None is not an acceptable substitute for NULL_REVISION.
982
in the input for this method.
984
:param count_steps: If True, the return value will be a tuple of
985
(unique_lca, steps) where steps is the number of times that
986
find_lca was run. If False, only unique_lca is returned.
988
revisions = [left_revision, right_revision]
992
lca = self.find_lca(*revisions)
1000
raise errors.NoCommonAncestor(left_revision, right_revision)
1003
def iter_ancestry(self, revision_ids):
1004
"""Iterate the ancestry of this revision.
1006
:param revision_ids: Nodes to start the search
1007
:return: Yield tuples mapping a revision_id to its parents for the
1008
ancestry of revision_id.
1009
Ghosts will be returned with None as their parents, and nodes
1010
with no parents will have NULL_REVISION as their only parent. (As
1011
defined by get_parent_map.)
1012
There will also be a node for (NULL_REVISION, ())
1014
pending = set(revision_ids)
1017
processed.update(pending)
1018
next_map = self.get_parent_map(pending)
1019
next_pending = set()
1020
for item in next_map.items():
1022
next_pending.update(p for p in item[1] if p not in processed)
1023
ghosts = pending.difference(next_map)
1024
for ghost in ghosts:
1026
pending = next_pending
1028
def iter_lefthand_ancestry(self, start_key, stop_keys=None):
1029
if stop_keys is None:
1031
next_key = start_key
1033
def get_parents(key):
1035
return self._parents_provider.get_parent_map([key])[key]
1037
raise errors.RevisionNotPresent(next_key, self)
1039
if next_key in stop_keys:
1041
parents = get_parents(next_key)
1043
if len(parents) == 0:
1046
next_key = parents[0]
1048
def iter_topo_order(self, revisions):
1049
"""Iterate through the input revisions in topological order.
1051
This sorting only ensures that parents come before their children.
1052
An ancestor may sort after a descendant if the relationship is not
1053
visible in the supplied list of revisions.
1055
from breezy import tsort
1056
sorter = tsort.TopoSorter(self.get_parent_map(revisions))
1057
return sorter.iter_topo_order()
1059
def is_ancestor(self, candidate_ancestor, candidate_descendant):
1060
"""Determine whether a revision is an ancestor of another.
1062
We answer this using heads() as heads() has the logic to perform the
1063
smallest number of parent lookups to determine the ancestral
1064
relationship between N revisions.
1066
return {candidate_descendant} == self.heads(
1067
[candidate_ancestor, candidate_descendant])
1069
def is_between(self, revid, lower_bound_revid, upper_bound_revid):
1070
"""Determine whether a revision is between two others.
1072
returns true if and only if:
1073
lower_bound_revid <= revid <= upper_bound_revid
1075
return ((upper_bound_revid is None or
1076
self.is_ancestor(revid, upper_bound_revid)) and
1077
(lower_bound_revid is None or
1078
self.is_ancestor(lower_bound_revid, revid)))
1080
def _search_for_extra_common(self, common, searchers):
1081
"""Make sure that unique nodes are genuinely unique.
1083
After _find_border_ancestors, all nodes marked "common" are indeed
1084
common. Some of the nodes considered unique are not, due to history
1085
shortcuts stopping the searches early.
1087
We know that we have searched enough when all common search tips are
1088
descended from all unique (uncommon) nodes because we know that a node
1089
cannot be an ancestor of its own ancestor.
1091
:param common: A set of common nodes
1092
:param searchers: The searchers returned from _find_border_ancestors
1095
# Basic algorithm...
1096
# A) The passed in searchers should all be on the same tips, thus
1097
# they should be considered the "common" searchers.
1098
# B) We find the difference between the searchers, these are the
1099
# "unique" nodes for each side.
1100
# C) We do a quick culling so that we only start searching from the
1101
# more interesting unique nodes. (A unique ancestor is more
1102
# interesting than any of its children.)
1103
# D) We start searching for ancestors common to all unique nodes.
1104
# E) We have the common searchers stop searching any ancestors of
1105
# nodes found by (D)
1106
# F) When there are no more common search tips, we stop
1108
# TODO: We need a way to remove unique_searchers when they overlap with
1109
# other unique searchers.
1110
if len(searchers) != 2:
1111
raise NotImplementedError(
1112
"Algorithm not yet implemented for > 2 searchers")
1113
common_searchers = searchers
1114
left_searcher = searchers[0]
1115
right_searcher = searchers[1]
1116
unique = left_searcher.seen.symmetric_difference(right_searcher.seen)
1117
if not unique: # No unique nodes, nothing to do
1119
total_unique = len(unique)
1120
unique = self._remove_simple_descendants(unique,
1121
self.get_parent_map(unique))
1122
simple_unique = len(unique)
1124
unique_searchers = []
1125
for revision_id in unique:
1126
if revision_id in left_searcher.seen:
1127
parent_searcher = left_searcher
1129
parent_searcher = right_searcher
1130
revs_to_search = parent_searcher.find_seen_ancestors([revision_id])
1131
if not revs_to_search: # XXX: This shouldn't be possible
1132
revs_to_search = [revision_id]
1133
searcher = self._make_breadth_first_searcher(revs_to_search)
1134
# We don't care about the starting nodes.
1136
unique_searchers.append(searcher)
1138
# possible todo: aggregate the common searchers into a single common
1139
# searcher, just make sure that we include the nodes into the .seen
1140
# properties of the original searchers
1142
ancestor_all_unique = None
1143
for searcher in unique_searchers:
1144
if ancestor_all_unique is None:
1145
ancestor_all_unique = set(searcher.seen)
1147
ancestor_all_unique = ancestor_all_unique.intersection(
1150
trace.mutter('Started %d unique searchers for %d unique revisions',
1151
simple_unique, total_unique)
1153
while True: # If we have no more nodes we have nothing to do
1154
newly_seen_common = set()
1155
for searcher in common_searchers:
1156
newly_seen_common.update(searcher.step())
1157
newly_seen_unique = set()
1158
for searcher in unique_searchers:
1159
newly_seen_unique.update(searcher.step())
1160
new_common_unique = set()
1161
for revision in newly_seen_unique:
1162
for searcher in unique_searchers:
1163
if revision not in searcher.seen:
1166
# This is a border because it is a first common that we see
1167
# after walking for a while.
1168
new_common_unique.add(revision)
1169
if newly_seen_common:
1170
# These are nodes descended from one of the 'common' searchers.
1171
# Make sure all searchers are on the same page
1172
for searcher in common_searchers:
1173
newly_seen_common.update(
1174
searcher.find_seen_ancestors(newly_seen_common))
1175
# We start searching the whole ancestry. It is a bit wasteful,
1176
# though. We really just want to mark all of these nodes as
1177
# 'seen' and then start just the tips. However, it requires a
1178
# get_parent_map() call to figure out the tips anyway, and all
1179
# redundant requests should be fairly fast.
1180
for searcher in common_searchers:
1181
searcher.start_searching(newly_seen_common)
1183
# If a 'common' node is an ancestor of all unique searchers, we
1184
# can stop searching it.
1185
stop_searching_common = ancestor_all_unique.intersection(
1187
if stop_searching_common:
1188
for searcher in common_searchers:
1189
searcher.stop_searching_any(stop_searching_common)
1190
if new_common_unique:
1191
# We found some ancestors that are common
1192
for searcher in unique_searchers:
1193
new_common_unique.update(
1194
searcher.find_seen_ancestors(new_common_unique))
1195
# Since these are common, we can grab another set of ancestors
1197
for searcher in common_searchers:
1198
new_common_unique.update(
1199
searcher.find_seen_ancestors(new_common_unique))
1201
# We can tell all of the unique searchers to start at these
1202
# nodes, and tell all of the common searchers to *stop*
1203
# searching these nodes
1204
for searcher in unique_searchers:
1205
searcher.start_searching(new_common_unique)
1206
for searcher in common_searchers:
1207
searcher.stop_searching_any(new_common_unique)
1208
ancestor_all_unique.update(new_common_unique)
1210
# Filter out searchers that don't actually search different
1211
# nodes. We already have the ancestry intersection for them
1212
next_unique_searchers = []
1213
unique_search_sets = set()
1214
for searcher in unique_searchers:
1215
will_search_set = frozenset(searcher._next_query)
1216
if will_search_set not in unique_search_sets:
1217
# This searcher is searching a unique set of nodes, let
1219
unique_search_sets.add(will_search_set)
1220
next_unique_searchers.append(searcher)
1221
unique_searchers = next_unique_searchers
1222
for searcher in common_searchers:
1223
if searcher._next_query:
1226
# All common searcher have stopped searching
1229
def _remove_simple_descendants(self, revisions, parent_map):
1230
"""remove revisions which are children of other ones in the set
1232
This doesn't do any graph searching, it just checks the immediate
1233
parent_map to find if there are any children which can be removed.
1235
:param revisions: A set of revision_ids
1236
:return: A set of revision_ids with the children removed
1238
simple_ancestors = revisions.copy()
1239
# TODO: jam 20071214 we *could* restrict it to searching only the
1240
# parent_map of revisions already present in 'revisions', but
1241
# considering the general use case, I think this is actually
1244
# This is the same as the following loop. I don't know that it is any
1246
# simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems()
1247
# if p_ids is not None and revisions.intersection(p_ids))
1248
# return simple_ancestors
1250
# Yet Another Way, invert the parent map (which can be cached)
1252
# for revision_id, parent_ids in parent_map.iteritems():
1253
# for p_id in parent_ids:
1254
## descendants.setdefault(p_id, []).append(revision_id)
1255
# for revision in revisions.intersection(descendants):
1256
# simple_ancestors.difference_update(descendants[revision])
1257
# return simple_ancestors
1258
for revision, parent_ids in parent_map.items():
1259
if parent_ids is None:
1261
for parent_id in parent_ids:
1262
if parent_id in revisions:
1263
# This node has a parent present in the set, so we can
1265
simple_ancestors.discard(revision)
1267
return simple_ancestors
1270
class HeadsCache(object):
1271
"""A cache of results for graph heads calls."""
1273
def __init__(self, graph):
1277
def heads(self, keys):
1278
"""Return the heads of keys.
1280
This matches the API of Graph.heads(), specifically the return value is
1281
a set which can be mutated, and ordering of the input is not preserved
1284
:see also: Graph.heads.
1285
:param keys: The keys to calculate heads for.
1286
:return: A set containing the heads, which may be mutated without
1287
affecting future lookups.
1289
keys = frozenset(keys)
1291
return set(self._heads[keys])
1293
heads = self.graph.heads(keys)
1294
self._heads[keys] = heads
1298
class FrozenHeadsCache(object):
1299
"""Cache heads() calls, assuming the caller won't modify them."""
1301
def __init__(self, graph):
1305
def heads(self, keys):
1306
"""Return the heads of keys.
1308
Similar to Graph.heads(). The main difference is that the return value
1309
is a frozen set which cannot be mutated.
1311
:see also: Graph.heads.
1312
:param keys: The keys to calculate heads for.
1313
:return: A frozenset containing the heads.
1315
keys = frozenset(keys)
1317
return self._heads[keys]
1319
heads = frozenset(self.graph.heads(keys))
1320
self._heads[keys] = heads
1323
def cache(self, keys, heads):
1324
"""Store a known value."""
1325
self._heads[frozenset(keys)] = frozenset(heads)
1328
class _BreadthFirstSearcher(object):
1329
"""Parallel search breadth-first the ancestry of revisions.
1331
This class implements the iterator protocol, but additionally
1332
1. provides a set of seen ancestors, and
1333
2. allows some ancestries to be unsearched, via stop_searching_any
1336
def __init__(self, revisions, parents_provider):
1337
self._iterations = 0
1338
self._next_query = set(revisions)
1340
self._started_keys = set(self._next_query)
1341
self._stopped_keys = set()
1342
self._parents_provider = parents_provider
1343
self._returning = 'next_with_ghosts'
1344
self._current_present = set()
1345
self._current_ghosts = set()
1346
self._current_parents = {}
1349
if self._iterations:
1350
prefix = "searching"
1353
search = '%s=%r' % (prefix, list(self._next_query))
1354
return ('_BreadthFirstSearcher(iterations=%d, %s,'
1355
' seen=%r)' % (self._iterations, search, list(self.seen)))
1357
def get_state(self):
1358
"""Get the current state of this searcher.
1360
:return: Tuple with started keys, excludes and included keys
1362
if self._returning == 'next':
1363
# We have to know the current nodes children to be able to list the
1364
# exclude keys for them. However, while we could have a second
1365
# look-ahead result buffer and shuffle things around, this method
1366
# is typically only called once per search - when memoising the
1367
# results of the search.
1368
found, ghosts, next, parents = self._do_query(self._next_query)
1369
# pretend we didn't query: perhaps we should tweak _do_query to be
1370
# entirely stateless?
1371
self.seen.difference_update(next)
1372
next_query = next.union(ghosts)
1374
next_query = self._next_query
1375
excludes = self._stopped_keys.union(next_query)
1376
included_keys = self.seen.difference(excludes)
1377
return self._started_keys, excludes, included_keys
1382
except StopIteration:
1386
"""Return the next ancestors of this revision.
1388
Ancestors are returned in the order they are seen in a breadth-first
1389
traversal. No ancestor will be returned more than once. Ancestors are
1390
returned before their parentage is queried, so ghosts and missing
1391
revisions (including the start revisions) are included in the result.
1392
This can save a round trip in LCA style calculation by allowing
1393
convergence to be detected without reading the data for the revision
1394
the convergence occurs on.
1396
:return: A set of revision_ids.
1398
if self._returning != 'next':
1399
# switch to returning the query, not the results.
1400
self._returning = 'next'
1401
self._iterations += 1
1404
if len(self._next_query) == 0:
1405
raise StopIteration()
1406
# We have seen what we're querying at this point as we are returning
1407
# the query, not the results.
1408
self.seen.update(self._next_query)
1409
return self._next_query
1413
def next_with_ghosts(self):
1414
"""Return the next found ancestors, with ghosts split out.
1416
Ancestors are returned in the order they are seen in a breadth-first
1417
traversal. No ancestor will be returned more than once. Ancestors are
1418
returned only after asking for their parents, which allows us to detect
1419
which revisions are ghosts and which are not.
1421
:return: A tuple with (present ancestors, ghost ancestors) sets.
1423
if self._returning != 'next_with_ghosts':
1424
# switch to returning the results, not the current query.
1425
self._returning = 'next_with_ghosts'
1427
if len(self._next_query) == 0:
1428
raise StopIteration()
1430
return self._current_present, self._current_ghosts
1433
"""Advance the search.
1435
Updates self.seen, self._next_query, self._current_present,
1436
self._current_ghosts, self._current_parents and self._iterations.
1438
self._iterations += 1
1439
found, ghosts, next, parents = self._do_query(self._next_query)
1440
self._current_present = found
1441
self._current_ghosts = ghosts
1442
self._next_query = next
1443
self._current_parents = parents
1444
# ghosts are implicit stop points, otherwise the search cannot be
1445
# repeated when ghosts are filled.
1446
self._stopped_keys.update(ghosts)
1448
def _do_query(self, revisions):
1449
"""Query for revisions.
1451
Adds revisions to the seen set.
1453
:param revisions: Revisions to query.
1454
:return: A tuple: (set(found_revisions), set(ghost_revisions),
1455
set(parents_of_found_revisions), dict(found_revisions:parents)).
1457
found_revisions = set()
1458
parents_of_found = set()
1459
# revisions may contain nodes that point to other nodes in revisions:
1460
# we want to filter them out.
1462
seen.update(revisions)
1463
parent_map = self._parents_provider.get_parent_map(revisions)
1464
found_revisions.update(parent_map)
1465
for rev_id, parents in parent_map.items():
1468
new_found_parents = [p for p in parents if p not in seen]
1469
if new_found_parents:
1470
# Calling set.update() with an empty generator is actually
1472
parents_of_found.update(new_found_parents)
1473
ghost_revisions = revisions - found_revisions
1474
return found_revisions, ghost_revisions, parents_of_found, parent_map
1479
def find_seen_ancestors(self, revisions):
1480
"""Find ancestors of these revisions that have already been seen.
1482
This function generally makes the assumption that querying for the
1483
parents of a node that has already been queried is reasonably cheap.
1484
(eg, not a round trip to a remote host).
1486
# TODO: Often we might ask one searcher for its seen ancestors, and
1487
# then ask another searcher the same question. This can result in
1488
# searching the same revisions repeatedly if the two searchers
1489
# have a lot of overlap.
1490
all_seen = self.seen
1491
pending = set(revisions).intersection(all_seen)
1492
seen_ancestors = set(pending)
1494
if self._returning == 'next':
1495
# self.seen contains what nodes have been returned, not what nodes
1496
# have been queried. We don't want to probe for nodes that haven't
1497
# been searched yet.
1498
not_searched_yet = self._next_query
1500
not_searched_yet = ()
1501
pending.difference_update(not_searched_yet)
1502
get_parent_map = self._parents_provider.get_parent_map
1504
parent_map = get_parent_map(pending)
1506
# We don't care if it is a ghost, since it can't be seen if it is
1508
for parent_ids in parent_map.values():
1509
all_parents.extend(parent_ids)
1510
next_pending = all_seen.intersection(
1511
all_parents).difference(seen_ancestors)
1512
seen_ancestors.update(next_pending)
1513
next_pending.difference_update(not_searched_yet)
1514
pending = next_pending
1516
return seen_ancestors
1518
def stop_searching_any(self, revisions):
1520
Remove any of the specified revisions from the search list.
1522
None of the specified revisions are required to be present in the
1525
It is okay to call stop_searching_any() for revisions which were seen
1526
in previous iterations. It is the callers responsibility to call
1527
find_seen_ancestors() to make sure that current search tips that are
1528
ancestors of those revisions are also stopped. All explicitly stopped
1529
revisions will be excluded from the search result's get_keys(), though.
1531
# TODO: does this help performance?
1534
revisions = frozenset(revisions)
1535
if self._returning == 'next':
1536
stopped = self._next_query.intersection(revisions)
1537
self._next_query = self._next_query.difference(revisions)
1539
stopped_present = self._current_present.intersection(revisions)
1540
stopped = stopped_present.union(
1541
self._current_ghosts.intersection(revisions))
1542
self._current_present.difference_update(stopped)
1543
self._current_ghosts.difference_update(stopped)
1544
# stopping 'x' should stop returning parents of 'x', but
1545
# not if 'y' always references those same parents
1546
stop_rev_references = {}
1547
for rev in stopped_present:
1548
for parent_id in self._current_parents[rev]:
1549
if parent_id not in stop_rev_references:
1550
stop_rev_references[parent_id] = 0
1551
stop_rev_references[parent_id] += 1
1552
# if only the stopped revisions reference it, the ref count will be
1554
for parents in self._current_parents.values():
1555
for parent_id in parents:
1557
stop_rev_references[parent_id] -= 1
1560
stop_parents = set()
1561
for rev_id, refs in stop_rev_references.items():
1563
stop_parents.add(rev_id)
1564
self._next_query.difference_update(stop_parents)
1565
self._stopped_keys.update(stopped)
1566
self._stopped_keys.update(revisions)
1569
def start_searching(self, revisions):
1570
"""Add revisions to the search.
1572
The parents of revisions will be returned from the next call to next()
1573
or next_with_ghosts(). If next_with_ghosts was the most recently used
1574
next* call then the return value is the result of looking up the
1575
ghost/not ghost status of revisions. (A tuple (present, ghosted)).
1577
revisions = frozenset(revisions)
1578
self._started_keys.update(revisions)
1579
new_revisions = revisions.difference(self.seen)
1580
if self._returning == 'next':
1581
self._next_query.update(new_revisions)
1582
self.seen.update(new_revisions)
1584
# perform a query on revisions
1585
revs, ghosts, query, parents = self._do_query(revisions)
1586
self._stopped_keys.update(ghosts)
1587
self._current_present.update(revs)
1588
self._current_ghosts.update(ghosts)
1589
self._next_query.update(query)
1590
self._current_parents.update(parents)
1594
def invert_parent_map(parent_map):
1595
"""Given a map from child => parents, create a map of parent=>children"""
1597
for child, parents in parent_map.items():
1599
# Any given parent is likely to have only a small handful
1600
# of children, many will have only one. So we avoid mem overhead of
1601
# a list, in exchange for extra copying of tuples
1602
if p not in child_map:
1603
child_map[p] = (child,)
1605
child_map[p] = child_map[p] + (child,)
1609
def collapse_linear_regions(parent_map):
1610
"""Collapse regions of the graph that are 'linear'.
1616
can be collapsed by removing B and getting::
1620
:param parent_map: A dictionary mapping children to their parents
1621
:return: Another dictionary with 'linear' chains collapsed
1623
# Note: this isn't a strictly minimal collapse. For example:
1631
# Will not have 'D' removed, even though 'E' could fit. Also:
1637
# A and C are both kept because they are edges of the graph. We *could* get
1638
# rid of A if we wanted.
1646
# Will not have any nodes removed, even though you do have an
1647
# 'uninteresting' linear D->B and E->C
1649
for child, parents in parent_map.items():
1650
children.setdefault(child, [])
1652
children.setdefault(p, []).append(child)
1655
result = dict(parent_map)
1656
for node in parent_map:
1657
parents = result[node]
1658
if len(parents) == 1:
1659
parent_children = children[parents[0]]
1660
if len(parent_children) != 1:
1661
# This is not the only child
1663
node_children = children[node]
1664
if len(node_children) != 1:
1666
child_parents = result.get(node_children[0], None)
1667
if len(child_parents) != 1:
1668
# This is not its only parent
1670
# The child of this node only points at it, and the parent only has
1671
# this as a child. remove this node, and join the others together
1672
result[node_children[0]] = parents
1673
children[parents[0]] = node_children
1681
class GraphThunkIdsToKeys(object):
1682
"""Forwards calls about 'ids' to be about keys internally."""
1684
def __init__(self, graph):
1687
def topo_sort(self):
1688
return [r for (r,) in self._graph.topo_sort()]
1690
def heads(self, ids):
1691
"""See Graph.heads()"""
1692
as_keys = [(i,) for i in ids]
1693
head_keys = self._graph.heads(as_keys)
1694
return {h[0] for h in head_keys}
1696
def merge_sort(self, tip_revision):
1697
nodes = self._graph.merge_sort((tip_revision,))
1699
node.key = node.key[0]
1702
def add_node(self, revision, parents):
1703
self._graph.add_node((revision,), [(p,) for p in parents])
1706
_counters = [0, 0, 0, 0, 0, 0, 0]
1708
from ._known_graph_pyx import KnownGraph
1709
except ImportError as e:
1710
osutils.failed_to_load_extension(e)
1711
from ._known_graph_py import KnownGraph # noqa: F401