1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27
from bzrlib.deprecated_graph import (node_distances, select_farthest)
29
STEP_UNIQUE_SEARCHER_EVERY = 5
31
# DIAGRAM of terminology
41
# In this diagram, relative to G and H:
42
# A, B, C, D, E are common ancestors.
43
# C, D and E are border ancestors, because each has a non-common descendant.
44
# D and E are least common ancestors because none of their descendants are
46
# C is not a least common ancestor because its descendant, E, is a common
49
# The find_unique_lca algorithm will pick A in two steps:
50
# 1. find_lca('G', 'H') => ['D', 'E']
51
# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']
54
class DictParentsProvider(object):
55
"""A parents provider for Graph objects."""
57
def __init__(self, ancestry):
58
self.ancestry = ancestry
61
return 'DictParentsProvider(%r)' % self.ancestry
63
def get_parent_map(self, keys):
64
"""See _StackedParentsProvider.get_parent_map"""
65
ancestry = self.ancestry
66
return dict((k, ancestry[k]) for k in keys if k in ancestry)
69
class _StackedParentsProvider(object):
71
def __init__(self, parent_providers):
72
self._parent_providers = parent_providers
75
return "_StackedParentsProvider(%r)" % self._parent_providers
77
def get_parent_map(self, keys):
78
"""Get a mapping of keys => parents
80
A dictionary is returned with an entry for each key present in this
81
source. If this source doesn't have information about a key, it should
84
[NULL_REVISION] is used as the parent of the first user-committed
85
revision. Its parent list is empty.
87
:param keys: An iterable returning keys to check (eg revision_ids)
88
:return: A dictionary mapping each key to its parents
92
for parents_provider in self._parent_providers:
93
new_found = parents_provider.get_parent_map(remaining)
94
found.update(new_found)
95
remaining.difference_update(new_found)
101
class CachingParentsProvider(object):
102
"""A parents provider which will cache the revision => parents in a dict.
104
This is useful for providers that have an expensive lookup.
107
def __init__(self, parent_provider):
108
self._real_provider = parent_provider
109
# Theoretically we could use an LRUCache here
113
return "%s(%r)" % (self.__class__.__name__, self._real_provider)
115
def get_parent_map(self, keys):
116
"""See _StackedParentsProvider.get_parent_map"""
118
# If the _real_provider doesn't have a key, we cache a value of None,
119
# which we then later use to realize we cannot provide a value for that
126
if value is not None:
127
parent_map[key] = value
132
new_parents = self._real_provider.get_parent_map(needed)
133
cache.update(new_parents)
134
parent_map.update(new_parents)
135
needed.difference_update(new_parents)
136
cache.update(dict.fromkeys(needed, None))
141
"""Provide incremental access to revision graphs.
143
This is the generic implementation; it is intended to be subclassed to
144
specialize it for other repository types.
147
def __init__(self, parents_provider):
148
"""Construct a Graph that uses several graphs as its input
150
This should not normally be invoked directly, because there may be
151
specialized implementations for particular repository types. See
152
Repository.get_graph().
154
:param parents_provider: An object providing a get_parent_map call
155
conforming to the behavior of
156
StackedParentsProvider.get_parent_map.
158
if getattr(parents_provider, 'get_parents', None) is not None:
159
self.get_parents = parents_provider.get_parents
160
if getattr(parents_provider, 'get_parent_map', None) is not None:
161
self.get_parent_map = parents_provider.get_parent_map
162
self._parents_provider = parents_provider
165
return 'Graph(%r)' % self._parents_provider
167
def find_lca(self, *revisions):
168
"""Determine the lowest common ancestors of the provided revisions
170
A lowest common ancestor is a common ancestor none of whose
171
descendants are common ancestors. In graphs, unlike trees, there may
172
be multiple lowest common ancestors.
174
This algorithm has two phases. Phase 1 identifies border ancestors,
175
and phase 2 filters border ancestors to determine lowest common
178
In phase 1, border ancestors are identified, using a breadth-first
179
search starting at the bottom of the graph. Searches are stopped
180
whenever a node or one of its descendants is determined to be common
182
In phase 2, the border ancestors are filtered to find the least
183
common ancestors. This is done by searching the ancestries of each
186
Phase 2 is perfomed on the principle that a border ancestor that is
187
not an ancestor of any other border ancestor is a least common
190
Searches are stopped when they find a node that is determined to be a
191
common ancestor of all border ancestors, because this shows that it
192
cannot be a descendant of any border ancestor.
194
The scaling of this operation should be proportional to
195
1. The number of uncommon ancestors
196
2. The number of border ancestors
197
3. The length of the shortest path between a border ancestor and an
198
ancestor of all border ancestors.
200
border_common, common, sides = self._find_border_ancestors(revisions)
201
# We may have common ancestors that can be reached from each other.
202
# - ask for the heads of them to filter it down to only ones that
203
# cannot be reached from each other - phase 2.
204
return self.heads(border_common)
206
def find_difference(self, left_revision, right_revision):
207
"""Determine the graph difference between two revisions"""
208
border, common, searchers = self._find_border_ancestors(
209
[left_revision, right_revision])
210
self._search_for_extra_common(common, searchers)
211
left = searchers[0].seen
212
right = searchers[1].seen
213
return (left.difference(right), right.difference(left))
215
def find_revno(self, target_revision_id, known_revision_ids):
216
"""Determine the revno for target_revision_id.
218
:param target_revision_id: A revision_id which we would like to know
220
:param known_revision_ids: [(revision_id, revno)] A list of known
221
revno, revision_id tuples. We'll use this to seed the search.
223
# Map from revision_ids to a known value for their revno
224
known_revnos = dict(known_revision_ids)
225
cur_tip = target_revision_id
227
NULL_REVISION = revision.NULL_REVISION
229
while cur_tip != NULL_REVISION:
230
parent_map = self.get_parent_map([cur_tip])
231
parents = parent_map.get(cur_tip, None)
232
if not parents: # An empty list, or None is still a ghost
233
raise errors.GhostRevisionsHaveNoRevno(target_revision_id,
238
# We reached NULL_REVISION, so the total distance is num_steps + 1
241
def find_unique_ancestors(self, unique_revision, common_revisions):
242
"""Find the unique ancestors for a revision versus others.
244
This returns the ancestry of unique_revision, excluding all revisions
245
in the ancestry of common_revisions. If unique_revision is in the
246
ancestry, then the empty set will be returned.
248
:param unique_revision: The revision_id whose ancestry we are
250
XXX: Would this API be better if we allowed multiple revisions on
252
:param common_revisions: Revision_ids of ancestries to exclude.
253
:return: A set of revisions in the ancestry of unique_revision
255
if unique_revision in common_revisions:
258
# Algorithm description
259
# 1) Walk backwards from the unique node and all common nodes.
260
# 2) When a node is seen by both sides, stop searching it in the unique
261
# walker, include it in the common walker.
262
# 3) Stop searching when there are no nodes left for the unique walker.
263
# At this point, you have a maximal set of unique nodes. Some of
264
# them may actually be common, and you haven't reached them yet.
265
# 4) Start new searchers for the unique nodes, seeded with the
266
# information you have so far.
267
# 5) Continue searching, stopping the common searches when the search
268
# tip is an ancestor of all unique nodes.
269
# 6) Aggregate together unique searchers when they are searching the
270
# same tips. When all unique searchers are searching the same node,
271
# stop move it to a single 'all_unique_searcher'.
272
# 7) The 'all_unique_searcher' represents the very 'tip' of searching.
273
# Most of the time this produces very little important information.
274
# So don't step it as quickly as the other searchers.
275
# 8) Search is done when all common searchers have completed.
277
unique_searcher, common_searcher = self._find_initial_unique_nodes(
278
[unique_revision], common_revisions)
280
unique_nodes = unique_searcher.seen.difference(common_searcher.seen)
284
(all_unique_searcher,
285
unique_tip_searchers) = self._make_unique_searchers(unique_nodes,
286
unique_searcher, common_searcher)
288
self._refine_unique_nodes(unique_searcher, all_unique_searcher,
289
unique_tip_searchers, common_searcher)
290
true_unique_nodes = unique_nodes.difference(common_searcher.seen)
291
if 'graph' in debug.debug_flags:
292
trace.mutter('Found %d truly unique nodes out of %d',
293
len(true_unique_nodes), len(unique_nodes))
294
return true_unique_nodes
296
def _find_initial_unique_nodes(self, unique_revisions, common_revisions):
297
"""Steps 1-3 of find_unique_ancestors.
299
Find the maximal set of unique nodes. Some of these might actually
300
still be common, but we are sure that there are no other unique nodes.
302
:return: (unique_searcher, common_searcher)
305
unique_searcher = self._make_breadth_first_searcher(unique_revisions)
306
# we know that unique_revisions aren't in common_revisions, so skip
308
unique_searcher.next()
309
common_searcher = self._make_breadth_first_searcher(common_revisions)
311
# As long as we are still finding unique nodes, keep searching
312
while unique_searcher._next_query:
313
next_unique_nodes = set(unique_searcher.step())
314
next_common_nodes = set(common_searcher.step())
316
# Check if either searcher encounters new nodes seen by the other
318
unique_are_common_nodes = next_unique_nodes.intersection(
319
common_searcher.seen)
320
unique_are_common_nodes.update(
321
next_common_nodes.intersection(unique_searcher.seen))
322
if unique_are_common_nodes:
323
ancestors = unique_searcher.find_seen_ancestors(
324
unique_are_common_nodes)
325
# TODO: This is a bit overboard, we only really care about
326
# the ancestors of the tips because the rest we
327
# already know. This is *correct* but causes us to
328
# search too much ancestry.
329
ancestors.update(common_searcher.find_seen_ancestors(ancestors))
330
unique_searcher.stop_searching_any(ancestors)
331
common_searcher.start_searching(ancestors)
333
return unique_searcher, common_searcher
335
def _make_unique_searchers(self, unique_nodes, unique_searcher,
337
"""Create a searcher for all the unique search tips (step 4).
339
As a side effect, the common_searcher will stop searching any nodes
340
that are ancestors of the unique searcher tips.
342
:return: (all_unique_searcher, unique_tip_searchers)
344
unique_tips = self._remove_simple_descendants(unique_nodes,
345
self.get_parent_map(unique_nodes))
347
if len(unique_tips) == 1:
348
unique_tip_searchers = []
349
ancestor_all_unique = unique_searcher.find_seen_ancestors(unique_tips)
351
unique_tip_searchers = []
352
for tip in unique_tips:
353
revs_to_search = unique_searcher.find_seen_ancestors([tip])
354
revs_to_search.update(
355
common_searcher.find_seen_ancestors(revs_to_search))
356
searcher = self._make_breadth_first_searcher(revs_to_search)
357
# We don't care about the starting nodes.
358
searcher._label = tip
360
unique_tip_searchers.append(searcher)
362
ancestor_all_unique = None
363
for searcher in unique_tip_searchers:
364
if ancestor_all_unique is None:
365
ancestor_all_unique = set(searcher.seen)
367
ancestor_all_unique = ancestor_all_unique.intersection(
369
# Collapse all the common nodes into a single searcher
370
all_unique_searcher = self._make_breadth_first_searcher(
372
if ancestor_all_unique:
373
# We've seen these nodes in all the searchers, so we'll just go to
375
all_unique_searcher.step()
377
# Stop any search tips that are already known as ancestors of the
379
stopped_common = common_searcher.stop_searching_any(
380
common_searcher.find_seen_ancestors(ancestor_all_unique))
383
for searcher in unique_tip_searchers:
384
total_stopped += len(searcher.stop_searching_any(
385
searcher.find_seen_ancestors(ancestor_all_unique)))
386
if 'graph' in debug.debug_flags:
387
trace.mutter('For %d unique nodes, created %d + 1 unique searchers'
388
' (%d stopped search tips, %d common ancestors'
389
' (%d stopped common)',
390
len(unique_nodes), len(unique_tip_searchers),
391
total_stopped, len(ancestor_all_unique),
393
return all_unique_searcher, unique_tip_searchers
395
def _step_unique_and_common_searchers(self, common_searcher,
396
unique_tip_searchers,
398
"""Step all the searchers"""
399
newly_seen_common = set(common_searcher.step())
400
newly_seen_unique = set()
401
for searcher in unique_tip_searchers:
402
next = set(searcher.step())
403
next.update(unique_searcher.find_seen_ancestors(next))
404
next.update(common_searcher.find_seen_ancestors(next))
405
for alt_searcher in unique_tip_searchers:
406
if alt_searcher is searcher:
408
next.update(alt_searcher.find_seen_ancestors(next))
409
searcher.start_searching(next)
410
newly_seen_unique.update(next)
411
return newly_seen_common, newly_seen_unique
413
def _find_nodes_common_to_all_unique(self, unique_tip_searchers,
415
newly_seen_unique, step_all_unique):
416
"""Find nodes that are common to all unique_tip_searchers.
418
If it is time, step the all_unique_searcher, and add its nodes to the
421
common_to_all_unique_nodes = newly_seen_unique.copy()
422
for searcher in unique_tip_searchers:
423
common_to_all_unique_nodes.intersection_update(searcher.seen)
424
common_to_all_unique_nodes.intersection_update(
425
all_unique_searcher.seen)
426
# Step all-unique less frequently than the other searchers.
427
# In the common case, we don't need to spider out far here, so
428
# avoid doing extra work.
430
tstart = time.clock()
431
nodes = all_unique_searcher.step()
432
common_to_all_unique_nodes.update(nodes)
433
if 'graph' in debug.debug_flags:
434
tdelta = time.clock() - tstart
435
trace.mutter('all_unique_searcher step() took %.3fs'
436
'for %d nodes (%d total), iteration: %s',
437
tdelta, len(nodes), len(all_unique_searcher.seen),
438
all_unique_searcher._iterations)
439
return common_to_all_unique_nodes
441
def _collapse_unique_searchers(self, unique_tip_searchers,
442
common_to_all_unique_nodes):
443
"""Combine searchers that are searching the same tips.
445
When two searchers are searching the same tips, we can stop one of the
446
searchers. We also know that the maximal set of common ancestors is the
447
intersection of the two original searchers.
449
:return: A list of searchers that are searching unique nodes.
451
# Filter out searchers that don't actually search different
452
# nodes. We already have the ancestry intersection for them
453
unique_search_tips = {}
454
for searcher in unique_tip_searchers:
455
stopped = searcher.stop_searching_any(common_to_all_unique_nodes)
456
will_search_set = frozenset(searcher._next_query)
457
if not will_search_set:
458
if 'graph' in debug.debug_flags:
459
trace.mutter('Unique searcher %s was stopped.'
460
' (%s iterations) %d nodes stopped',
462
searcher._iterations,
464
elif will_search_set not in unique_search_tips:
465
# This searcher is searching a unique set of nodes, let it
466
unique_search_tips[will_search_set] = [searcher]
468
unique_search_tips[will_search_set].append(searcher)
469
# TODO: it might be possible to collapse searchers faster when they
470
# only have *some* search tips in common.
471
next_unique_searchers = []
472
for searchers in unique_search_tips.itervalues():
473
if len(searchers) == 1:
474
# Searching unique tips, go for it
475
next_unique_searchers.append(searchers[0])
477
# These searchers have started searching the same tips, we
478
# don't need them to cover the same ground. The
479
# intersection of their ancestry won't change, so create a
480
# new searcher, combining their histories.
481
next_searcher = searchers[0]
482
for searcher in searchers[1:]:
483
next_searcher.seen.intersection_update(searcher.seen)
484
if 'graph' in debug.debug_flags:
485
trace.mutter('Combining %d searchers into a single'
486
' searcher searching %d nodes with'
489
len(next_searcher._next_query),
490
len(next_searcher.seen))
491
next_unique_searchers.append(next_searcher)
492
return next_unique_searchers
494
def _refine_unique_nodes(self, unique_searcher, all_unique_searcher,
495
unique_tip_searchers, common_searcher):
496
"""Steps 5-8 of find_unique_ancestors.
498
This function returns when common_searcher has stopped searching for
501
# We step the ancestor_all_unique searcher only every
502
# STEP_UNIQUE_SEARCHER_EVERY steps.
503
step_all_unique_counter = 0
504
# While we still have common nodes to search
505
while common_searcher._next_query:
507
newly_seen_unique) = self._step_unique_and_common_searchers(
508
common_searcher, unique_tip_searchers, unique_searcher)
509
# These nodes are common ancestors of all unique nodes
510
common_to_all_unique_nodes = self._find_nodes_common_to_all_unique(
511
unique_tip_searchers, all_unique_searcher, newly_seen_unique,
512
step_all_unique_counter==0)
513
step_all_unique_counter = ((step_all_unique_counter + 1)
514
% STEP_UNIQUE_SEARCHER_EVERY)
516
if newly_seen_common:
517
# If a 'common' node is an ancestor of all unique searchers, we
518
# can stop searching it.
519
common_searcher.stop_searching_any(
520
all_unique_searcher.seen.intersection(newly_seen_common))
521
if common_to_all_unique_nodes:
522
common_to_all_unique_nodes.update(
523
common_searcher.find_seen_ancestors(
524
common_to_all_unique_nodes))
525
# The all_unique searcher can start searching the common nodes
526
# but everyone else can stop.
527
# This is the sort of thing where we would like to not have it
528
# start_searching all of the nodes, but only mark all of them
529
# as seen, and have it search only the actual tips. Otherwise
530
# it is another get_parent_map() traversal for it to figure out
531
# what we already should know.
532
all_unique_searcher.start_searching(common_to_all_unique_nodes)
533
common_searcher.stop_searching_any(common_to_all_unique_nodes)
535
next_unique_searchers = self._collapse_unique_searchers(
536
unique_tip_searchers, common_to_all_unique_nodes)
537
if len(unique_tip_searchers) != len(next_unique_searchers):
538
if 'graph' in debug.debug_flags:
539
trace.mutter('Collapsed %d unique searchers => %d'
541
len(unique_tip_searchers),
542
len(next_unique_searchers),
543
all_unique_searcher._iterations)
544
unique_tip_searchers = next_unique_searchers
546
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
547
def get_parents(self, revisions):
548
"""Find revision ids of the parents of a list of revisions
550
A list is returned of the same length as the input. Each entry
551
is a list of parent ids for the corresponding input revision.
553
[NULL_REVISION] is used as the parent of the first user-committed
554
revision. Its parent list is empty.
556
If the revision is not present (i.e. a ghost), None is used in place
557
of the list of parents.
559
Deprecated in bzr 1.2 - please see get_parent_map.
561
parents = self.get_parent_map(revisions)
562
return [parents.get(r, None) for r in revisions]
564
def get_parent_map(self, revisions):
565
"""Get a map of key:parent_list for revisions.
567
This implementation delegates to get_parents, for old parent_providers
568
that do not supply get_parent_map.
571
for rev, parents in self.get_parents(revisions):
572
if parents is not None:
573
result[rev] = parents
576
def _make_breadth_first_searcher(self, revisions):
577
return _BreadthFirstSearcher(revisions, self)
579
def _find_border_ancestors(self, revisions):
580
"""Find common ancestors with at least one uncommon descendant.
582
Border ancestors are identified using a breadth-first
583
search starting at the bottom of the graph. Searches are stopped
584
whenever a node or one of its descendants is determined to be common.
586
This will scale with the number of uncommon ancestors.
588
As well as the border ancestors, a set of seen common ancestors and a
589
list of sets of seen ancestors for each input revision is returned.
590
This allows calculation of graph difference from the results of this
593
if None in revisions:
594
raise errors.InvalidRevisionId(None, self)
595
common_ancestors = set()
596
searchers = [self._make_breadth_first_searcher([r])
598
active_searchers = searchers[:]
599
border_ancestors = set()
603
for searcher in searchers:
604
new_ancestors = searcher.step()
606
newly_seen.update(new_ancestors)
608
for revision in newly_seen:
609
if revision in common_ancestors:
610
# Not a border ancestor because it was seen as common
612
new_common.add(revision)
614
for searcher in searchers:
615
if revision not in searcher.seen:
618
# This is a border because it is a first common that we see
619
# after walking for a while.
620
border_ancestors.add(revision)
621
new_common.add(revision)
623
for searcher in searchers:
624
new_common.update(searcher.find_seen_ancestors(new_common))
625
for searcher in searchers:
626
searcher.start_searching(new_common)
627
common_ancestors.update(new_common)
629
# Figure out what the searchers will be searching next, and if
630
# there is only 1 set being searched, then we are done searching,
631
# since all searchers would have to be searching the same data,
632
# thus it *must* be in common.
633
unique_search_sets = set()
634
for searcher in searchers:
635
will_search_set = frozenset(searcher._next_query)
636
if will_search_set not in unique_search_sets:
637
# This searcher is searching a unique set of nodes, let it
638
unique_search_sets.add(will_search_set)
640
if len(unique_search_sets) == 1:
641
nodes = unique_search_sets.pop()
642
uncommon_nodes = nodes.difference(common_ancestors)
644
raise AssertionError("Somehow we ended up converging"
645
" without actually marking them as"
648
"\nuncommon_nodes: %s"
649
% (revisions, uncommon_nodes))
651
return border_ancestors, common_ancestors, searchers
653
def heads(self, keys):
654
"""Return the heads from amongst keys.
656
This is done by searching the ancestries of each key. Any key that is
657
reachable from another key is not returned; all the others are.
659
This operation scales with the relative depth between any two keys. If
660
any two keys are completely disconnected all ancestry of both sides
663
:param keys: An iterable of keys.
664
:return: A set of the heads. Note that as a set there is no ordering
665
information. Callers will need to filter their input to create
666
order if they need it.
668
candidate_heads = set(keys)
669
if revision.NULL_REVISION in candidate_heads:
670
# NULL_REVISION is only a head if it is the only entry
671
candidate_heads.remove(revision.NULL_REVISION)
672
if not candidate_heads:
673
return set([revision.NULL_REVISION])
674
if len(candidate_heads) < 2:
675
return candidate_heads
676
searchers = dict((c, self._make_breadth_first_searcher([c]))
677
for c in candidate_heads)
678
active_searchers = dict(searchers)
679
# skip over the actual candidate for each searcher
680
for searcher in active_searchers.itervalues():
682
# The common walker finds nodes that are common to two or more of the
683
# input keys, so that we don't access all history when a currently
684
# uncommon search point actually meets up with something behind a
685
# common search point. Common search points do not keep searches
686
# active; they just allow us to make searches inactive without
687
# accessing all history.
688
common_walker = self._make_breadth_first_searcher([])
689
while len(active_searchers) > 0:
694
except StopIteration:
695
# No common points being searched at this time.
697
for candidate in active_searchers.keys():
699
searcher = active_searchers[candidate]
701
# rare case: we deleted candidate in a previous iteration
702
# through this for loop, because it was determined to be
703
# a descendant of another candidate.
706
ancestors.update(searcher.next())
707
except StopIteration:
708
del active_searchers[candidate]
710
# process found nodes
712
for ancestor in ancestors:
713
if ancestor in candidate_heads:
714
candidate_heads.remove(ancestor)
715
del searchers[ancestor]
716
if ancestor in active_searchers:
717
del active_searchers[ancestor]
718
# it may meet up with a known common node
719
if ancestor in common_walker.seen:
720
# some searcher has encountered our known common nodes:
722
ancestor_set = set([ancestor])
723
for searcher in searchers.itervalues():
724
searcher.stop_searching_any(ancestor_set)
726
# or it may have been just reached by all the searchers:
727
for searcher in searchers.itervalues():
728
if ancestor not in searcher.seen:
731
# The final active searcher has just reached this node,
732
# making it be known as a descendant of all candidates,
733
# so we can stop searching it, and any seen ancestors
734
new_common.add(ancestor)
735
for searcher in searchers.itervalues():
737
searcher.find_seen_ancestors([ancestor])
738
searcher.stop_searching_any(seen_ancestors)
739
common_walker.start_searching(new_common)
740
return candidate_heads
742
def find_unique_lca(self, left_revision, right_revision,
744
"""Find a unique LCA.
746
Find lowest common ancestors. If there is no unique common
747
ancestor, find the lowest common ancestors of those ancestors.
749
Iteration stops when a unique lowest common ancestor is found.
750
The graph origin is necessarily a unique lowest common ancestor.
752
Note that None is not an acceptable substitute for NULL_REVISION.
753
in the input for this method.
755
:param count_steps: If True, the return value will be a tuple of
756
(unique_lca, steps) where steps is the number of times that
757
find_lca was run. If False, only unique_lca is returned.
759
revisions = [left_revision, right_revision]
763
lca = self.find_lca(*revisions)
771
raise errors.NoCommonAncestor(left_revision, right_revision)
774
def iter_ancestry(self, revision_ids):
775
"""Iterate the ancestry of this revision.
777
:param revision_ids: Nodes to start the search
778
:return: Yield tuples mapping a revision_id to its parents for the
779
ancestry of revision_id.
780
Ghosts will be returned with None as their parents, and nodes
781
with no parents will have NULL_REVISION as their only parent. (As
782
defined by get_parent_map.)
783
There will also be a node for (NULL_REVISION, ())
785
pending = set(revision_ids)
788
processed.update(pending)
789
next_map = self.get_parent_map(pending)
791
for item in next_map.iteritems():
793
next_pending.update(p for p in item[1] if p not in processed)
794
ghosts = pending.difference(next_map)
797
pending = next_pending
799
def iter_topo_order(self, revisions):
800
"""Iterate through the input revisions in topological order.
802
This sorting only ensures that parents come before their children.
803
An ancestor may sort after a descendant if the relationship is not
804
visible in the supplied list of revisions.
806
sorter = tsort.TopoSorter(self.get_parent_map(revisions))
807
return sorter.iter_topo_order()
809
def is_ancestor(self, candidate_ancestor, candidate_descendant):
810
"""Determine whether a revision is an ancestor of another.
812
We answer this using heads() as heads() has the logic to perform the
813
smallest number of parent lookups to determine the ancestral
814
relationship between N revisions.
816
return set([candidate_descendant]) == self.heads(
817
[candidate_ancestor, candidate_descendant])
819
def _search_for_extra_common(self, common, searchers):
820
"""Make sure that unique nodes are genuinely unique.
822
After _find_border_ancestors, all nodes marked "common" are indeed
823
common. Some of the nodes considered unique are not, due to history
824
shortcuts stopping the searches early.
826
We know that we have searched enough when all common search tips are
827
descended from all unique (uncommon) nodes because we know that a node
828
cannot be an ancestor of its own ancestor.
830
:param common: A set of common nodes
831
:param searchers: The searchers returned from _find_border_ancestors
835
# A) The passed in searchers should all be on the same tips, thus
836
# they should be considered the "common" searchers.
837
# B) We find the difference between the searchers, these are the
838
# "unique" nodes for each side.
839
# C) We do a quick culling so that we only start searching from the
840
# more interesting unique nodes. (A unique ancestor is more
841
# interesting than any of its children.)
842
# D) We start searching for ancestors common to all unique nodes.
843
# E) We have the common searchers stop searching any ancestors of
845
# F) When there are no more common search tips, we stop
847
# TODO: We need a way to remove unique_searchers when they overlap with
848
# other unique searchers.
849
if len(searchers) != 2:
850
raise NotImplementedError(
851
"Algorithm not yet implemented for > 2 searchers")
852
common_searchers = searchers
853
left_searcher = searchers[0]
854
right_searcher = searchers[1]
855
unique = left_searcher.seen.symmetric_difference(right_searcher.seen)
856
if not unique: # No unique nodes, nothing to do
858
total_unique = len(unique)
859
unique = self._remove_simple_descendants(unique,
860
self.get_parent_map(unique))
861
simple_unique = len(unique)
863
unique_searchers = []
864
for revision_id in unique:
865
if revision_id in left_searcher.seen:
866
parent_searcher = left_searcher
868
parent_searcher = right_searcher
869
revs_to_search = parent_searcher.find_seen_ancestors([revision_id])
870
if not revs_to_search: # XXX: This shouldn't be possible
871
revs_to_search = [revision_id]
872
searcher = self._make_breadth_first_searcher(revs_to_search)
873
# We don't care about the starting nodes.
875
unique_searchers.append(searcher)
877
# possible todo: aggregate the common searchers into a single common
878
# searcher, just make sure that we include the nodes into the .seen
879
# properties of the original searchers
881
ancestor_all_unique = None
882
for searcher in unique_searchers:
883
if ancestor_all_unique is None:
884
ancestor_all_unique = set(searcher.seen)
886
ancestor_all_unique = ancestor_all_unique.intersection(
889
trace.mutter('Started %s unique searchers for %s unique revisions',
890
simple_unique, total_unique)
892
while True: # If we have no more nodes we have nothing to do
893
newly_seen_common = set()
894
for searcher in common_searchers:
895
newly_seen_common.update(searcher.step())
896
newly_seen_unique = set()
897
for searcher in unique_searchers:
898
newly_seen_unique.update(searcher.step())
899
new_common_unique = set()
900
for revision in newly_seen_unique:
901
for searcher in unique_searchers:
902
if revision not in searcher.seen:
905
# This is a border because it is a first common that we see
906
# after walking for a while.
907
new_common_unique.add(revision)
908
if newly_seen_common:
909
# These are nodes descended from one of the 'common' searchers.
910
# Make sure all searchers are on the same page
911
for searcher in common_searchers:
912
newly_seen_common.update(
913
searcher.find_seen_ancestors(newly_seen_common))
914
# We start searching the whole ancestry. It is a bit wasteful,
915
# though. We really just want to mark all of these nodes as
916
# 'seen' and then start just the tips. However, it requires a
917
# get_parent_map() call to figure out the tips anyway, and all
918
# redundant requests should be fairly fast.
919
for searcher in common_searchers:
920
searcher.start_searching(newly_seen_common)
922
# If a 'common' node is an ancestor of all unique searchers, we
923
# can stop searching it.
924
stop_searching_common = ancestor_all_unique.intersection(
926
if stop_searching_common:
927
for searcher in common_searchers:
928
searcher.stop_searching_any(stop_searching_common)
929
if new_common_unique:
930
# We found some ancestors that are common
931
for searcher in unique_searchers:
932
new_common_unique.update(
933
searcher.find_seen_ancestors(new_common_unique))
934
# Since these are common, we can grab another set of ancestors
936
for searcher in common_searchers:
937
new_common_unique.update(
938
searcher.find_seen_ancestors(new_common_unique))
940
# We can tell all of the unique searchers to start at these
941
# nodes, and tell all of the common searchers to *stop*
942
# searching these nodes
943
for searcher in unique_searchers:
944
searcher.start_searching(new_common_unique)
945
for searcher in common_searchers:
946
searcher.stop_searching_any(new_common_unique)
947
ancestor_all_unique.update(new_common_unique)
949
# Filter out searchers that don't actually search different
950
# nodes. We already have the ancestry intersection for them
951
next_unique_searchers = []
952
unique_search_sets = set()
953
for searcher in unique_searchers:
954
will_search_set = frozenset(searcher._next_query)
955
if will_search_set not in unique_search_sets:
956
# This searcher is searching a unique set of nodes, let it
957
unique_search_sets.add(will_search_set)
958
next_unique_searchers.append(searcher)
959
unique_searchers = next_unique_searchers
960
for searcher in common_searchers:
961
if searcher._next_query:
964
# All common searcher have stopped searching
967
def _remove_simple_descendants(self, revisions, parent_map):
968
"""remove revisions which are children of other ones in the set
970
This doesn't do any graph searching, it just checks the immediate
971
parent_map to find if there are any children which can be removed.
973
:param revisions: A set of revision_ids
974
:return: A set of revision_ids with the children removed
976
simple_ancestors = revisions.copy()
977
# TODO: jam 20071214 we *could* restrict it to searching only the
978
# parent_map of revisions already present in 'revisions', but
979
# considering the general use case, I think this is actually
982
# This is the same as the following loop. I don't know that it is any
984
## simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems()
985
## if p_ids is not None and revisions.intersection(p_ids))
986
## return simple_ancestors
988
# Yet Another Way, invert the parent map (which can be cached)
990
## for revision_id, parent_ids in parent_map.iteritems():
991
## for p_id in parent_ids:
992
## descendants.setdefault(p_id, []).append(revision_id)
993
## for revision in revisions.intersection(descendants):
994
## simple_ancestors.difference_update(descendants[revision])
995
## return simple_ancestors
996
for revision, parent_ids in parent_map.iteritems():
997
if parent_ids is None:
999
for parent_id in parent_ids:
1000
if parent_id in revisions:
1001
# This node has a parent present in the set, so we can
1003
simple_ancestors.discard(revision)
1005
return simple_ancestors
1008
class HeadsCache(object):
1009
"""A cache of results for graph heads calls."""
1011
def __init__(self, graph):
1015
def heads(self, keys):
1016
"""Return the heads of keys.
1018
This matches the API of Graph.heads(), specifically the return value is
1019
a set which can be mutated, and ordering of the input is not preserved
1022
:see also: Graph.heads.
1023
:param keys: The keys to calculate heads for.
1024
:return: A set containing the heads, which may be mutated without
1025
affecting future lookups.
1027
keys = frozenset(keys)
1029
return set(self._heads[keys])
1031
heads = self.graph.heads(keys)
1032
self._heads[keys] = heads
1036
class FrozenHeadsCache(object):
1037
"""Cache heads() calls, assuming the caller won't modify them."""
1039
def __init__(self, graph):
1043
def heads(self, keys):
1044
"""Return the heads of keys.
1046
Similar to Graph.heads(). The main difference is that the return value
1047
is a frozen set which cannot be mutated.
1049
:see also: Graph.heads.
1050
:param keys: The keys to calculate heads for.
1051
:return: A frozenset containing the heads.
1053
keys = frozenset(keys)
1055
return self._heads[keys]
1057
heads = frozenset(self.graph.heads(keys))
1058
self._heads[keys] = heads
1061
def cache(self, keys, heads):
1062
"""Store a known value."""
1063
self._heads[frozenset(keys)] = frozenset(heads)
1066
class _BreadthFirstSearcher(object):
1067
"""Parallel search breadth-first the ancestry of revisions.
1069
This class implements the iterator protocol, but additionally
1070
1. provides a set of seen ancestors, and
1071
2. allows some ancestries to be unsearched, via stop_searching_any
1074
def __init__(self, revisions, parents_provider):
1075
self._iterations = 0
1076
self._next_query = set(revisions)
1078
self._started_keys = set(self._next_query)
1079
self._stopped_keys = set()
1080
self._parents_provider = parents_provider
1081
self._returning = 'next_with_ghosts'
1082
self._current_present = set()
1083
self._current_ghosts = set()
1084
self._current_parents = {}
1087
if self._iterations:
1088
prefix = "searching"
1091
search = '%s=%r' % (prefix, list(self._next_query))
1092
return ('_BreadthFirstSearcher(iterations=%d, %s,'
1093
' seen=%r)' % (self._iterations, search, list(self.seen)))
1095
def get_result(self):
1096
"""Get a SearchResult for the current state of this searcher.
1098
:return: A SearchResult for this search so far. The SearchResult is
1099
static - the search can be advanced and the search result will not
1100
be invalidated or altered.
1102
if self._returning == 'next':
1103
# We have to know the current nodes children to be able to list the
1104
# exclude keys for them. However, while we could have a second
1105
# look-ahead result buffer and shuffle things around, this method
1106
# is typically only called once per search - when memoising the
1107
# results of the search.
1108
found, ghosts, next, parents = self._do_query(self._next_query)
1109
# pretend we didn't query: perhaps we should tweak _do_query to be
1110
# entirely stateless?
1111
self.seen.difference_update(next)
1112
next_query = next.union(ghosts)
1114
next_query = self._next_query
1115
excludes = self._stopped_keys.union(next_query)
1116
included_keys = self.seen.difference(excludes)
1117
return SearchResult(self._started_keys, excludes, len(included_keys),
1123
except StopIteration:
1127
"""Return the next ancestors of this revision.
1129
Ancestors are returned in the order they are seen in a breadth-first
1130
traversal. No ancestor will be returned more than once. Ancestors are
1131
returned before their parentage is queried, so ghosts and missing
1132
revisions (including the start revisions) are included in the result.
1133
This can save a round trip in LCA style calculation by allowing
1134
convergence to be detected without reading the data for the revision
1135
the convergence occurs on.
1137
:return: A set of revision_ids.
1139
if self._returning != 'next':
1140
# switch to returning the query, not the results.
1141
self._returning = 'next'
1142
self._iterations += 1
1145
if len(self._next_query) == 0:
1146
raise StopIteration()
1147
# We have seen what we're querying at this point as we are returning
1148
# the query, not the results.
1149
self.seen.update(self._next_query)
1150
return self._next_query
1152
def next_with_ghosts(self):
1153
"""Return the next found ancestors, with ghosts split out.
1155
Ancestors are returned in the order they are seen in a breadth-first
1156
traversal. No ancestor will be returned more than once. Ancestors are
1157
returned only after asking for their parents, which allows us to detect
1158
which revisions are ghosts and which are not.
1160
:return: A tuple with (present ancestors, ghost ancestors) sets.
1162
if self._returning != 'next_with_ghosts':
1163
# switch to returning the results, not the current query.
1164
self._returning = 'next_with_ghosts'
1166
if len(self._next_query) == 0:
1167
raise StopIteration()
1169
return self._current_present, self._current_ghosts
1172
"""Advance the search.
1174
Updates self.seen, self._next_query, self._current_present,
1175
self._current_ghosts, self._current_parents and self._iterations.
1177
self._iterations += 1
1178
found, ghosts, next, parents = self._do_query(self._next_query)
1179
self._current_present = found
1180
self._current_ghosts = ghosts
1181
self._next_query = next
1182
self._current_parents = parents
1183
# ghosts are implicit stop points, otherwise the search cannot be
1184
# repeated when ghosts are filled.
1185
self._stopped_keys.update(ghosts)
1187
def _do_query(self, revisions):
1188
"""Query for revisions.
1190
Adds revisions to the seen set.
1192
:param revisions: Revisions to query.
1193
:return: A tuple: (set(found_revisions), set(ghost_revisions),
1194
set(parents_of_found_revisions), dict(found_revisions:parents)).
1196
found_revisions = set()
1197
parents_of_found = set()
1198
# revisions may contain nodes that point to other nodes in revisions:
1199
# we want to filter them out.
1200
self.seen.update(revisions)
1201
parent_map = self._parents_provider.get_parent_map(revisions)
1202
found_revisions.update(parent_map)
1203
for rev_id, parents in parent_map.iteritems():
1204
new_found_parents = [p for p in parents if p not in self.seen]
1205
if new_found_parents:
1206
# Calling set.update() with an empty generator is actually
1208
parents_of_found.update(new_found_parents)
1209
ghost_revisions = revisions - found_revisions
1210
return found_revisions, ghost_revisions, parents_of_found, parent_map
1215
def find_seen_ancestors(self, revisions):
1216
"""Find ancestors of these revisions that have already been seen.
1218
This function generally makes the assumption that querying for the
1219
parents of a node that has already been queried is reasonably cheap.
1220
(eg, not a round trip to a remote host).
1222
# TODO: Often we might ask one searcher for its seen ancestors, and
1223
# then ask another searcher the same question. This can result in
1224
# searching the same revisions repeatedly if the two searchers
1225
# have a lot of overlap.
1226
all_seen = self.seen
1227
pending = set(revisions).intersection(all_seen)
1228
seen_ancestors = set(pending)
1230
if self._returning == 'next':
1231
# self.seen contains what nodes have been returned, not what nodes
1232
# have been queried. We don't want to probe for nodes that haven't
1233
# been searched yet.
1234
not_searched_yet = self._next_query
1236
not_searched_yet = ()
1237
pending.difference_update(not_searched_yet)
1238
get_parent_map = self._parents_provider.get_parent_map
1240
parent_map = get_parent_map(pending)
1242
# We don't care if it is a ghost, since it can't be seen if it is
1244
for parent_ids in parent_map.itervalues():
1245
all_parents.extend(parent_ids)
1246
next_pending = all_seen.intersection(all_parents).difference(seen_ancestors)
1247
seen_ancestors.update(next_pending)
1248
next_pending.difference_update(not_searched_yet)
1249
pending = next_pending
1251
return seen_ancestors
1253
def stop_searching_any(self, revisions):
1255
Remove any of the specified revisions from the search list.
1257
None of the specified revisions are required to be present in the
1258
search list. In this case, the call is a no-op.
1260
# TODO: does this help performance?
1263
revisions = frozenset(revisions)
1264
if self._returning == 'next':
1265
stopped = self._next_query.intersection(revisions)
1266
self._next_query = self._next_query.difference(revisions)
1268
stopped_present = self._current_present.intersection(revisions)
1269
stopped = stopped_present.union(
1270
self._current_ghosts.intersection(revisions))
1271
self._current_present.difference_update(stopped)
1272
self._current_ghosts.difference_update(stopped)
1273
# stopping 'x' should stop returning parents of 'x', but
1274
# not if 'y' always references those same parents
1275
stop_rev_references = {}
1276
for rev in stopped_present:
1277
for parent_id in self._current_parents[rev]:
1278
if parent_id not in stop_rev_references:
1279
stop_rev_references[parent_id] = 0
1280
stop_rev_references[parent_id] += 1
1281
# if only the stopped revisions reference it, the ref count will be
1283
for parents in self._current_parents.itervalues():
1284
for parent_id in parents:
1286
stop_rev_references[parent_id] -= 1
1289
stop_parents = set()
1290
for rev_id, refs in stop_rev_references.iteritems():
1292
stop_parents.add(rev_id)
1293
self._next_query.difference_update(stop_parents)
1294
self._stopped_keys.update(stopped)
1297
def start_searching(self, revisions):
1298
"""Add revisions to the search.
1300
The parents of revisions will be returned from the next call to next()
1301
or next_with_ghosts(). If next_with_ghosts was the most recently used
1302
next* call then the return value is the result of looking up the
1303
ghost/not ghost status of revisions. (A tuple (present, ghosted)).
1305
revisions = frozenset(revisions)
1306
self._started_keys.update(revisions)
1307
new_revisions = revisions.difference(self.seen)
1308
if self._returning == 'next':
1309
self._next_query.update(new_revisions)
1310
self.seen.update(new_revisions)
1312
# perform a query on revisions
1313
revs, ghosts, query, parents = self._do_query(revisions)
1314
self._stopped_keys.update(ghosts)
1315
self._current_present.update(revs)
1316
self._current_ghosts.update(ghosts)
1317
self._next_query.update(query)
1318
self._current_parents.update(parents)
1322
class SearchResult(object):
1323
"""The result of a breadth first search.
1325
A SearchResult provides the ability to reconstruct the search or access a
1326
set of the keys the search found.
1329
def __init__(self, start_keys, exclude_keys, key_count, keys):
1330
"""Create a SearchResult.
1332
:param start_keys: The keys the search started at.
1333
:param exclude_keys: The keys the search excludes.
1334
:param key_count: The total number of keys (from start to but not
1336
:param keys: The keys the search found. Note that in future we may get
1337
a SearchResult from a smart server, in which case the keys list is
1338
not necessarily immediately available.
1340
self._recipe = (start_keys, exclude_keys, key_count)
1341
self._keys = frozenset(keys)
1343
def get_recipe(self):
1344
"""Return a recipe that can be used to replay this search.
1346
The recipe allows reconstruction of the same results at a later date
1347
without knowing all the found keys. The essential elements are a list
1348
of keys to start and and to stop at. In order to give reproducible
1349
results when ghosts are encountered by a search they are automatically
1350
added to the exclude list (or else ghost filling may alter the
1353
:return: A tuple (start_keys_set, exclude_keys_set, revision_count). To
1354
recreate the results of this search, create a breadth first
1355
searcher on the same graph starting at start_keys. Then call next()
1356
(or next_with_ghosts()) repeatedly, and on every result, call
1357
stop_searching_any on any keys from the exclude_keys set. The
1358
revision_count value acts as a trivial cross-check - the found
1359
revisions of the new search should have as many elements as
1360
revision_count. If it does not, then additional revisions have been
1361
ghosted since the search was executed the first time and the second
1367
"""Return the keys found in this search.
1369
:return: A set of keys.