1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
from bzrlib.deprecated_graph import (node_distances, select_farthest)
27
# DIAGRAM of terminology
37
# In this diagram, relative to G and H:
38
# A, B, C, D, E are common ancestors.
39
# C, D and E are border ancestors, because each has a non-common descendant.
40
# D and E are least common ancestors because none of their descendants are
42
# C is not a least common ancestor because its descendant, E, is a common
45
# The find_unique_lca algorithm will pick A in two steps:
46
# 1. find_lca('G', 'H') => ['D', 'E']
47
# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']
50
class DictParentsProvider(object):
51
"""A parents provider for Graph objects."""
53
def __init__(self, ancestry):
54
self.ancestry = ancestry
57
return 'DictParentsProvider(%r)' % self.ancestry
59
def get_parent_map(self, keys):
60
"""See _StackedParentsProvider.get_parent_map"""
61
ancestry = self.ancestry
62
return dict((k, ancestry[k]) for k in keys if k in ancestry)
65
class _StackedParentsProvider(object):
67
def __init__(self, parent_providers):
68
self._parent_providers = parent_providers
71
return "_StackedParentsProvider(%r)" % self._parent_providers
73
def get_parent_map(self, keys):
74
"""Get a mapping of keys => parents
76
A dictionary is returned with an entry for each key present in this
77
source. If this source doesn't have information about a key, it should
80
[NULL_REVISION] is used as the parent of the first user-committed
81
revision. Its parent list is empty.
83
:param keys: An iterable returning keys to check (eg revision_ids)
84
:return: A dictionary mapping each key to its parents
88
for parents_provider in self._parent_providers:
89
new_found = parents_provider.get_parent_map(remaining)
90
found.update(new_found)
91
remaining.difference_update(new_found)
97
class CachingParentsProvider(object):
98
"""A parents provider which will cache the revision => parents in a dict.
100
This is useful for providers that have an expensive lookup.
103
def __init__(self, parent_provider):
104
self._real_provider = parent_provider
105
# Theoretically we could use an LRUCache here
109
return "%s(%r)" % (self.__class__.__name__, self._real_provider)
111
def get_parent_map(self, keys):
112
"""See _StackedParentsProvider.get_parent_map"""
114
# If the _real_provider doesn't have a key, we cache a value of None,
115
# which we then later use to realize we cannot provide a value for that
122
if value is not None:
123
parent_map[key] = value
128
new_parents = self._real_provider.get_parent_map(needed)
129
cache.update(new_parents)
130
parent_map.update(new_parents)
131
needed.difference_update(new_parents)
132
cache.update(dict.fromkeys(needed, None))
137
"""Provide incremental access to revision graphs.
139
This is the generic implementation; it is intended to be subclassed to
140
specialize it for other repository types.
143
def __init__(self, parents_provider):
144
"""Construct a Graph that uses several graphs as its input
146
This should not normally be invoked directly, because there may be
147
specialized implementations for particular repository types. See
148
Repository.get_graph().
150
:param parents_provider: An object providing a get_parent_map call
151
conforming to the behavior of
152
StackedParentsProvider.get_parent_map.
154
if getattr(parents_provider, 'get_parents', None) is not None:
155
self.get_parents = parents_provider.get_parents
156
if getattr(parents_provider, 'get_parent_map', None) is not None:
157
self.get_parent_map = parents_provider.get_parent_map
158
self._parents_provider = parents_provider
161
return 'Graph(%r)' % self._parents_provider
163
def find_lca(self, *revisions):
164
"""Determine the lowest common ancestors of the provided revisions
166
A lowest common ancestor is a common ancestor none of whose
167
descendants are common ancestors. In graphs, unlike trees, there may
168
be multiple lowest common ancestors.
170
This algorithm has two phases. Phase 1 identifies border ancestors,
171
and phase 2 filters border ancestors to determine lowest common
174
In phase 1, border ancestors are identified, using a breadth-first
175
search starting at the bottom of the graph. Searches are stopped
176
whenever a node or one of its descendants is determined to be common
178
In phase 2, the border ancestors are filtered to find the least
179
common ancestors. This is done by searching the ancestries of each
182
Phase 2 is perfomed on the principle that a border ancestor that is
183
not an ancestor of any other border ancestor is a least common
186
Searches are stopped when they find a node that is determined to be a
187
common ancestor of all border ancestors, because this shows that it
188
cannot be a descendant of any border ancestor.
190
The scaling of this operation should be proportional to
191
1. The number of uncommon ancestors
192
2. The number of border ancestors
193
3. The length of the shortest path between a border ancestor and an
194
ancestor of all border ancestors.
196
border_common, common, sides = self._find_border_ancestors(revisions)
197
# We may have common ancestors that can be reached from each other.
198
# - ask for the heads of them to filter it down to only ones that
199
# cannot be reached from each other - phase 2.
200
return self.heads(border_common)
202
def find_difference(self, left_revision, right_revision):
203
"""Determine the graph difference between two revisions"""
204
border, common, searchers = self._find_border_ancestors(
205
[left_revision, right_revision])
206
self._search_for_extra_common(common, searchers)
207
left = searchers[0].seen
208
right = searchers[1].seen
209
return (left.difference(right), right.difference(left))
211
def find_unique_ancestors(self, unique_revision, common_revisions):
212
"""Find the unique ancestors for a revision versus others.
214
This returns the ancestry of unique_revision, excluding all revisions
215
in the ancestry of common_revisions. If unique_revision is in the
216
ancestry, then the empty set will be returned.
218
:param unique_revision: The revision_id whose ancestry we are
220
XXX: Would this API be better if we allowed multiple revisions on
222
:param common_revisions: Revision_ids of ancestries to exclude.
223
:return: A set of revisions in the ancestry of unique_revision
225
if unique_revision in common_revisions:
228
# Algorithm description
229
# 1) Walk backwards from the unique node and all common nodes.
230
# 2) When a node is seen by both sides, stop searching it in the unique
231
# walker, include it in the common walker.
232
# 3) Stop searching when there are no nodes left for the unique walker.
233
# At this point, you have a maximal set of unique nodes. Some of
234
# them may actually be common, and you haven't reached them yet.
235
# 4) Start new searchers for the unique nodes, seeded with the
236
# information you have so far.
237
# 5) Continue searching, stopping the common searches when the search
238
# tip is an ancestor of all unique nodes.
239
# 6) Search is done when all common searchers have completed.
241
if 'graph' in debug.debug_flags:
242
_mutter = trace.mutter
244
def _mutter(*args, **kwargs):
247
unique_searcher = self._make_breadth_first_searcher([unique_revision])
248
# we know that unique_revision isn't in common_revisions
249
unique_searcher.next()
250
common_searcher = self._make_breadth_first_searcher(common_revisions)
252
while unique_searcher._next_query:
253
next_unique_nodes = set(unique_searcher.step())
254
next_common_nodes = set(common_searcher.step())
256
# Check if either searcher encounters new nodes seen by the other
258
unique_are_common_nodes = next_unique_nodes.intersection(
259
common_searcher.seen)
260
unique_are_common_nodes.update(
261
next_common_nodes.intersection(unique_searcher.seen))
262
if unique_are_common_nodes:
263
ancestors = unique_searcher.find_seen_ancestors(
264
unique_are_common_nodes)
265
ancestors.update(common_searcher.find_seen_ancestors(ancestors))
266
unique_searcher.stop_searching_any(ancestors)
267
common_searcher.start_searching(ancestors)
269
unique_nodes = unique_searcher.seen.difference(common_searcher.seen)
272
unique_tips = self._remove_simple_descendants(unique_nodes,
273
self.get_parent_map(unique_nodes))
275
if len(unique_tips) == 1:
276
unique_searchers = []
277
ancestor_all_unique = unique_searcher.find_seen_ancestors(unique_tips)
279
unique_searchers = []
280
for tip in unique_tips:
281
revs_to_search = unique_searcher.find_seen_ancestors([tip])
282
searcher = self._make_breadth_first_searcher(revs_to_search)
283
# We don't care about the starting nodes.
284
searcher._label = tip
286
unique_searchers.append(searcher)
288
ancestor_all_unique = None
289
for searcher in unique_searchers:
290
if ancestor_all_unique is None:
291
ancestor_all_unique = set(searcher.seen)
293
ancestor_all_unique = ancestor_all_unique.intersection(
295
# Collapse all the common nodes into a single searcher
296
all_unique_searcher = self._make_breadth_first_searcher(ancestor_all_unique)
297
if ancestor_all_unique:
298
all_unique_searcher.step()
300
# Stop any search tips that are already known as ancestors of the
302
common_searcher.stop_searching_any(
303
common_searcher.find_seen_ancestors(ancestor_all_unique))
306
for searcher in unique_searchers:
307
total_stopped += len(searcher.stop_searching_any(
308
searcher.find_seen_ancestors(ancestor_all_unique)))
309
_mutter('For %s unique nodes, created %s + 1 unique searchers'
310
' (%s stopped search tips, %s common ancestors)',
311
len(unique_nodes), len(unique_searchers), total_stopped,
312
len(ancestor_all_unique))
313
del ancestor_all_unique
315
# While we still have common nodes to search
316
while common_searcher._next_query:
317
newly_seen_common = set(common_searcher.step())
318
newly_seen_unique = set()
319
for searcher in unique_searchers:
320
newly_seen_unique.update(searcher.step())
321
# These nodes are common ancestors of all unique nodes
322
unique_are_common_nodes = newly_seen_unique.copy()
323
for searcher in unique_searchers:
324
unique_are_common_nodes = unique_are_common_nodes.intersection(
326
unique_are_common_nodes = unique_are_common_nodes.intersection(
327
all_unique_searcher.seen)
328
unique_are_common_nodes.update(all_unique_searcher.step())
329
if newly_seen_common:
330
# If a 'common' node is an ancestor of all unique searchers, we
331
# can stop searching it.
332
common_searcher.stop_searching_any(
333
all_unique_searcher.seen.intersection(newly_seen_common))
334
if unique_are_common_nodes:
335
# We have new common-to-all-unique-searchers nodes
336
for searcher in unique_searchers:
337
unique_are_common_nodes.update(
338
searcher.find_seen_ancestors(unique_are_common_nodes))
339
unique_are_common_nodes.update(
340
all_unique_searcher.find_seen_ancestors(unique_are_common_nodes))
341
# Since these are common, we can grab another set of ancestors
343
unique_are_common_nodes.update(
344
common_searcher.find_seen_ancestors(unique_are_common_nodes))
346
# The all_unique searcher can start searching the common nodes
347
# but everyone else can stop.
348
all_unique_searcher.start_searching(unique_are_common_nodes)
349
common_searcher.stop_searching_any(unique_are_common_nodes)
351
# Filter out searchers that don't actually search different
352
# nodes. We already have the ancestry intersection for them
353
next_unique_searchers = []
354
unique_search_sets = set()
355
for searcher in unique_searchers:
356
stopped = searcher.stop_searching_any(unique_are_common_nodes)
357
will_search_set = frozenset(searcher._next_query)
358
if not will_search_set:
359
_mutter('Unique searcher %s was stopped.'
360
' (%s iterations) %d nodes stopped',
362
searcher._iterations,
364
elif will_search_set not in unique_search_sets:
365
# This searcher is searching a unique set of nodes, let it
366
unique_search_sets.add(will_search_set)
367
next_unique_searchers.append(searcher)
369
_mutter('Unique searcher %s stopped for repeated'
370
' search of %s nodes',
371
searcher._label, len(will_search_set))
372
if len(unique_searchers) != len(next_unique_searchers):
373
_mutter('Collapsed %s unique searchers => %s'
375
len(unique_searchers), len(next_unique_searchers),
376
all_unique_searcher._iterations)
377
unique_searchers = next_unique_searchers
378
return unique_nodes.difference(common_searcher.seen)
380
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
381
def get_parents(self, revisions):
382
"""Find revision ids of the parents of a list of revisions
384
A list is returned of the same length as the input. Each entry
385
is a list of parent ids for the corresponding input revision.
387
[NULL_REVISION] is used as the parent of the first user-committed
388
revision. Its parent list is empty.
390
If the revision is not present (i.e. a ghost), None is used in place
391
of the list of parents.
393
Deprecated in bzr 1.2 - please see get_parent_map.
395
parents = self.get_parent_map(revisions)
396
return [parents.get(r, None) for r in revisions]
398
def get_parent_map(self, revisions):
399
"""Get a map of key:parent_list for revisions.
401
This implementation delegates to get_parents, for old parent_providers
402
that do not supply get_parent_map.
405
for rev, parents in self.get_parents(revisions):
406
if parents is not None:
407
result[rev] = parents
410
def _make_breadth_first_searcher(self, revisions):
411
return _BreadthFirstSearcher(revisions, self)
413
def _find_border_ancestors(self, revisions):
414
"""Find common ancestors with at least one uncommon descendant.
416
Border ancestors are identified using a breadth-first
417
search starting at the bottom of the graph. Searches are stopped
418
whenever a node or one of its descendants is determined to be common.
420
This will scale with the number of uncommon ancestors.
422
As well as the border ancestors, a set of seen common ancestors and a
423
list of sets of seen ancestors for each input revision is returned.
424
This allows calculation of graph difference from the results of this
427
if None in revisions:
428
raise errors.InvalidRevisionId(None, self)
429
common_ancestors = set()
430
searchers = [self._make_breadth_first_searcher([r])
432
active_searchers = searchers[:]
433
border_ancestors = set()
437
for searcher in searchers:
438
new_ancestors = searcher.step()
440
newly_seen.update(new_ancestors)
442
for revision in newly_seen:
443
if revision in common_ancestors:
444
# Not a border ancestor because it was seen as common
446
new_common.add(revision)
448
for searcher in searchers:
449
if revision not in searcher.seen:
452
# This is a border because it is a first common that we see
453
# after walking for a while.
454
border_ancestors.add(revision)
455
new_common.add(revision)
457
for searcher in searchers:
458
new_common.update(searcher.find_seen_ancestors(new_common))
459
for searcher in searchers:
460
searcher.start_searching(new_common)
461
common_ancestors.update(new_common)
463
# Figure out what the searchers will be searching next, and if
464
# there is only 1 set being searched, then we are done searching,
465
# since all searchers would have to be searching the same data,
466
# thus it *must* be in common.
467
unique_search_sets = set()
468
for searcher in searchers:
469
will_search_set = frozenset(searcher._next_query)
470
if will_search_set not in unique_search_sets:
471
# This searcher is searching a unique set of nodes, let it
472
unique_search_sets.add(will_search_set)
474
if len(unique_search_sets) == 1:
475
nodes = unique_search_sets.pop()
476
uncommon_nodes = nodes.difference(common_ancestors)
477
assert not uncommon_nodes, ("Somehow we ended up converging"
478
" without actually marking them as"
481
"\nuncommon_nodes: %s"
482
% (revisions, uncommon_nodes))
484
return border_ancestors, common_ancestors, searchers
486
def heads(self, keys):
487
"""Return the heads from amongst keys.
489
This is done by searching the ancestries of each key. Any key that is
490
reachable from another key is not returned; all the others are.
492
This operation scales with the relative depth between any two keys. If
493
any two keys are completely disconnected all ancestry of both sides
496
:param keys: An iterable of keys.
497
:return: A set of the heads. Note that as a set there is no ordering
498
information. Callers will need to filter their input to create
499
order if they need it.
501
candidate_heads = set(keys)
502
if revision.NULL_REVISION in candidate_heads:
503
# NULL_REVISION is only a head if it is the only entry
504
candidate_heads.remove(revision.NULL_REVISION)
505
if not candidate_heads:
506
return set([revision.NULL_REVISION])
507
if len(candidate_heads) < 2:
508
return candidate_heads
509
searchers = dict((c, self._make_breadth_first_searcher([c]))
510
for c in candidate_heads)
511
active_searchers = dict(searchers)
512
# skip over the actual candidate for each searcher
513
for searcher in active_searchers.itervalues():
515
# The common walker finds nodes that are common to two or more of the
516
# input keys, so that we don't access all history when a currently
517
# uncommon search point actually meets up with something behind a
518
# common search point. Common search points do not keep searches
519
# active; they just allow us to make searches inactive without
520
# accessing all history.
521
common_walker = self._make_breadth_first_searcher([])
522
while len(active_searchers) > 0:
527
except StopIteration:
528
# No common points being searched at this time.
530
for candidate in active_searchers.keys():
532
searcher = active_searchers[candidate]
534
# rare case: we deleted candidate in a previous iteration
535
# through this for loop, because it was determined to be
536
# a descendant of another candidate.
539
ancestors.update(searcher.next())
540
except StopIteration:
541
del active_searchers[candidate]
543
# process found nodes
545
for ancestor in ancestors:
546
if ancestor in candidate_heads:
547
candidate_heads.remove(ancestor)
548
del searchers[ancestor]
549
if ancestor in active_searchers:
550
del active_searchers[ancestor]
551
# it may meet up with a known common node
552
if ancestor in common_walker.seen:
553
# some searcher has encountered our known common nodes:
555
ancestor_set = set([ancestor])
556
for searcher in searchers.itervalues():
557
searcher.stop_searching_any(ancestor_set)
559
# or it may have been just reached by all the searchers:
560
for searcher in searchers.itervalues():
561
if ancestor not in searcher.seen:
564
# The final active searcher has just reached this node,
565
# making it be known as a descendant of all candidates,
566
# so we can stop searching it, and any seen ancestors
567
new_common.add(ancestor)
568
for searcher in searchers.itervalues():
570
searcher.find_seen_ancestors([ancestor])
571
searcher.stop_searching_any(seen_ancestors)
572
common_walker.start_searching(new_common)
573
return candidate_heads
575
def find_unique_lca(self, left_revision, right_revision,
577
"""Find a unique LCA.
579
Find lowest common ancestors. If there is no unique common
580
ancestor, find the lowest common ancestors of those ancestors.
582
Iteration stops when a unique lowest common ancestor is found.
583
The graph origin is necessarily a unique lowest common ancestor.
585
Note that None is not an acceptable substitute for NULL_REVISION.
586
in the input for this method.
588
:param count_steps: If True, the return value will be a tuple of
589
(unique_lca, steps) where steps is the number of times that
590
find_lca was run. If False, only unique_lca is returned.
592
revisions = [left_revision, right_revision]
596
lca = self.find_lca(*revisions)
604
raise errors.NoCommonAncestor(left_revision, right_revision)
607
def iter_ancestry(self, revision_ids):
608
"""Iterate the ancestry of this revision.
610
:param revision_ids: Nodes to start the search
611
:return: Yield tuples mapping a revision_id to its parents for the
612
ancestry of revision_id.
613
Ghosts will be returned with None as their parents, and nodes
614
with no parents will have NULL_REVISION as their only parent. (As
615
defined by get_parent_map.)
616
There will also be a node for (NULL_REVISION, ())
618
pending = set(revision_ids)
621
processed.update(pending)
622
next_map = self.get_parent_map(pending)
624
for item in next_map.iteritems():
626
next_pending.update(p for p in item[1] if p not in processed)
627
ghosts = pending.difference(next_map)
630
pending = next_pending
632
def iter_topo_order(self, revisions):
633
"""Iterate through the input revisions in topological order.
635
This sorting only ensures that parents come before their children.
636
An ancestor may sort after a descendant if the relationship is not
637
visible in the supplied list of revisions.
639
sorter = tsort.TopoSorter(self.get_parent_map(revisions))
640
return sorter.iter_topo_order()
642
def is_ancestor(self, candidate_ancestor, candidate_descendant):
643
"""Determine whether a revision is an ancestor of another.
645
We answer this using heads() as heads() has the logic to perform the
646
smallest number of parent lookups to determine the ancestral
647
relationship between N revisions.
649
return set([candidate_descendant]) == self.heads(
650
[candidate_ancestor, candidate_descendant])
652
def _search_for_extra_common(self, common, searchers):
653
"""Make sure that unique nodes are genuinely unique.
655
After _find_border_ancestors, all nodes marked "common" are indeed
656
common. Some of the nodes considered unique are not, due to history
657
shortcuts stopping the searches early.
659
We know that we have searched enough when all common search tips are
660
descended from all unique (uncommon) nodes because we know that a node
661
cannot be an ancestor of its own ancestor.
663
:param common: A set of common nodes
664
:param searchers: The searchers returned from _find_border_ancestors
668
# A) The passed in searchers should all be on the same tips, thus
669
# they should be considered the "common" searchers.
670
# B) We find the difference between the searchers, these are the
671
# "unique" nodes for each side.
672
# C) We do a quick culling so that we only start searching from the
673
# more interesting unique nodes. (A unique ancestor is more
674
# interesting than any of its children.)
675
# D) We start searching for ancestors common to all unique nodes.
676
# E) We have the common searchers stop searching any ancestors of
678
# F) When there are no more common search tips, we stop
680
# TODO: We need a way to remove unique_searchers when they overlap with
681
# other unique searchers.
682
assert len(searchers) == 2, (
683
"Algorithm not yet implemented for > 2 searchers")
684
common_searchers = searchers
685
left_searcher = searchers[0]
686
right_searcher = searchers[1]
687
unique = left_searcher.seen.symmetric_difference(right_searcher.seen)
688
if not unique: # No unique nodes, nothing to do
690
total_unique = len(unique)
691
unique = self._remove_simple_descendants(unique,
692
self.get_parent_map(unique))
693
simple_unique = len(unique)
695
unique_searchers = []
696
for revision_id in unique:
697
if revision_id in left_searcher.seen:
698
parent_searcher = left_searcher
700
parent_searcher = right_searcher
701
revs_to_search = parent_searcher.find_seen_ancestors([revision_id])
702
if not revs_to_search: # XXX: This shouldn't be possible
703
revs_to_search = [revision_id]
704
searcher = self._make_breadth_first_searcher(revs_to_search)
705
# We don't care about the starting nodes.
707
unique_searchers.append(searcher)
709
# possible todo: aggregate the common searchers into a single common
710
# searcher, just make sure that we include the nodes into the .seen
711
# properties of the original searchers
713
ancestor_all_unique = None
714
for searcher in unique_searchers:
715
if ancestor_all_unique is None:
716
ancestor_all_unique = set(searcher.seen)
718
ancestor_all_unique = ancestor_all_unique.intersection(
721
trace.mutter('Started %s unique searchers for %s unique revisions',
722
simple_unique, total_unique)
724
while True: # If we have no more nodes we have nothing to do
725
newly_seen_common = set()
726
for searcher in common_searchers:
727
newly_seen_common.update(searcher.step())
728
newly_seen_unique = set()
729
for searcher in unique_searchers:
730
newly_seen_unique.update(searcher.step())
731
new_common_unique = set()
732
for revision in newly_seen_unique:
733
for searcher in unique_searchers:
734
if revision not in searcher.seen:
737
# This is a border because it is a first common that we see
738
# after walking for a while.
739
new_common_unique.add(revision)
740
if newly_seen_common:
741
# These are nodes descended from one of the 'common' searchers.
742
# Make sure all searchers are on the same page
743
for searcher in common_searchers:
744
newly_seen_common.update(
745
searcher.find_seen_ancestors(newly_seen_common))
746
# We start searching the whole ancestry. It is a bit wasteful,
747
# though. We really just want to mark all of these nodes as
748
# 'seen' and then start just the tips. However, it requires a
749
# get_parent_map() call to figure out the tips anyway, and all
750
# redundant requests should be fairly fast.
751
for searcher in common_searchers:
752
searcher.start_searching(newly_seen_common)
754
# If a 'common' node is an ancestor of all unique searchers, we
755
# can stop searching it.
756
stop_searching_common = ancestor_all_unique.intersection(
758
if stop_searching_common:
759
for searcher in common_searchers:
760
searcher.stop_searching_any(stop_searching_common)
761
if new_common_unique:
762
# We found some ancestors that are common
763
for searcher in unique_searchers:
764
new_common_unique.update(
765
searcher.find_seen_ancestors(new_common_unique))
766
# Since these are common, we can grab another set of ancestors
768
for searcher in common_searchers:
769
new_common_unique.update(
770
searcher.find_seen_ancestors(new_common_unique))
772
# We can tell all of the unique searchers to start at these
773
# nodes, and tell all of the common searchers to *stop*
774
# searching these nodes
775
for searcher in unique_searchers:
776
searcher.start_searching(new_common_unique)
777
for searcher in common_searchers:
778
searcher.stop_searching_any(new_common_unique)
779
ancestor_all_unique.update(new_common_unique)
781
# Filter out searchers that don't actually search different
782
# nodes. We already have the ancestry intersection for them
783
next_unique_searchers = []
784
unique_search_sets = set()
785
for searcher in unique_searchers:
786
will_search_set = frozenset(searcher._next_query)
787
if will_search_set not in unique_search_sets:
788
# This searcher is searching a unique set of nodes, let it
789
unique_search_sets.add(will_search_set)
790
next_unique_searchers.append(searcher)
791
unique_searchers = next_unique_searchers
792
for searcher in common_searchers:
793
if searcher._next_query:
796
# All common searcher have stopped searching
799
def _remove_simple_descendants(self, revisions, parent_map):
800
"""remove revisions which are children of other ones in the set
802
This doesn't do any graph searching, it just checks the immediate
803
parent_map to find if there are any children which can be removed.
805
:param revisions: A set of revision_ids
806
:return: A set of revision_ids with the children removed
808
simple_ancestors = revisions.copy()
809
# TODO: jam 20071214 we *could* restrict it to searching only the
810
# parent_map of revisions already present in 'revisions', but
811
# considering the general use case, I think this is actually
814
# This is the same as the following loop. I don't know that it is any
816
## simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems()
817
## if p_ids is not None and revisions.intersection(p_ids))
818
## return simple_ancestors
820
# Yet Another Way, invert the parent map (which can be cached)
822
## for revision_id, parent_ids in parent_map.iteritems():
823
## for p_id in parent_ids:
824
## descendants.setdefault(p_id, []).append(revision_id)
825
## for revision in revisions.intersection(descendants):
826
## simple_ancestors.difference_update(descendants[revision])
827
## return simple_ancestors
828
for revision, parent_ids in parent_map.iteritems():
829
if parent_ids is None:
831
for parent_id in parent_ids:
832
if parent_id in revisions:
833
# This node has a parent present in the set, so we can
835
simple_ancestors.discard(revision)
837
return simple_ancestors
840
class HeadsCache(object):
841
"""A cache of results for graph heads calls."""
843
def __init__(self, graph):
847
def heads(self, keys):
848
"""Return the heads of keys.
850
This matches the API of Graph.heads(), specifically the return value is
851
a set which can be mutated, and ordering of the input is not preserved
854
:see also: Graph.heads.
855
:param keys: The keys to calculate heads for.
856
:return: A set containing the heads, which may be mutated without
857
affecting future lookups.
859
keys = frozenset(keys)
861
return set(self._heads[keys])
863
heads = self.graph.heads(keys)
864
self._heads[keys] = heads
868
class FrozenHeadsCache(object):
869
"""Cache heads() calls, assuming the caller won't modify them."""
871
def __init__(self, graph):
875
def heads(self, keys):
876
"""Return the heads of keys.
878
Similar to Graph.heads(). The main difference is that the return value
879
is a frozen set which cannot be mutated.
881
:see also: Graph.heads.
882
:param keys: The keys to calculate heads for.
883
:return: A frozenset containing the heads.
885
keys = frozenset(keys)
887
return self._heads[keys]
889
heads = frozenset(self.graph.heads(keys))
890
self._heads[keys] = heads
893
def cache(self, keys, heads):
894
"""Store a known value."""
895
self._heads[frozenset(keys)] = frozenset(heads)
898
class _BreadthFirstSearcher(object):
899
"""Parallel search breadth-first the ancestry of revisions.
901
This class implements the iterator protocol, but additionally
902
1. provides a set of seen ancestors, and
903
2. allows some ancestries to be unsearched, via stop_searching_any
906
def __init__(self, revisions, parents_provider):
908
self._next_query = set(revisions)
910
self._started_keys = set(self._next_query)
911
self._stopped_keys = set()
912
self._parents_provider = parents_provider
913
self._returning = 'next_with_ghosts'
914
self._current_present = set()
915
self._current_ghosts = set()
916
self._current_parents = {}
923
search = '%s=%r' % (prefix, list(self._next_query))
924
return ('_BreadthFirstSearcher(iterations=%d, %s,'
925
' seen=%r)' % (self._iterations, search, list(self.seen)))
927
def get_result(self):
928
"""Get a SearchResult for the current state of this searcher.
930
:return: A SearchResult for this search so far. The SearchResult is
931
static - the search can be advanced and the search result will not
932
be invalidated or altered.
934
if self._returning == 'next':
935
# We have to know the current nodes children to be able to list the
936
# exclude keys for them. However, while we could have a second
937
# look-ahead result buffer and shuffle things around, this method
938
# is typically only called once per search - when memoising the
939
# results of the search.
940
found, ghosts, next, parents = self._do_query(self._next_query)
941
# pretend we didn't query: perhaps we should tweak _do_query to be
942
# entirely stateless?
943
self.seen.difference_update(next)
944
next_query = next.union(ghosts)
946
next_query = self._next_query
947
excludes = self._stopped_keys.union(next_query)
948
included_keys = self.seen.difference(excludes)
949
return SearchResult(self._started_keys, excludes, len(included_keys),
955
except StopIteration:
959
"""Return the next ancestors of this revision.
961
Ancestors are returned in the order they are seen in a breadth-first
962
traversal. No ancestor will be returned more than once. Ancestors are
963
returned before their parentage is queried, so ghosts and missing
964
revisions (including the start revisions) are included in the result.
965
This can save a round trip in LCA style calculation by allowing
966
convergence to be detected without reading the data for the revision
967
the convergence occurs on.
969
:return: A set of revision_ids.
971
if self._returning != 'next':
972
# switch to returning the query, not the results.
973
self._returning = 'next'
974
self._iterations += 1
977
if len(self._next_query) == 0:
978
raise StopIteration()
979
# We have seen what we're querying at this point as we are returning
980
# the query, not the results.
981
self.seen.update(self._next_query)
982
return self._next_query
984
def next_with_ghosts(self):
985
"""Return the next found ancestors, with ghosts split out.
987
Ancestors are returned in the order they are seen in a breadth-first
988
traversal. No ancestor will be returned more than once. Ancestors are
989
returned only after asking for their parents, which allows us to detect
990
which revisions are ghosts and which are not.
992
:return: A tuple with (present ancestors, ghost ancestors) sets.
994
if self._returning != 'next_with_ghosts':
995
# switch to returning the results, not the current query.
996
self._returning = 'next_with_ghosts'
998
if len(self._next_query) == 0:
999
raise StopIteration()
1001
return self._current_present, self._current_ghosts
1004
"""Advance the search.
1006
Updates self.seen, self._next_query, self._current_present,
1007
self._current_ghosts, self._current_parents and self._iterations.
1009
self._iterations += 1
1010
found, ghosts, next, parents = self._do_query(self._next_query)
1011
self._current_present = found
1012
self._current_ghosts = ghosts
1013
self._next_query = next
1014
self._current_parents = parents
1015
# ghosts are implicit stop points, otherwise the search cannot be
1016
# repeated when ghosts are filled.
1017
self._stopped_keys.update(ghosts)
1019
def _do_query(self, revisions):
1020
"""Query for revisions.
1022
Adds revisions to the seen set.
1024
:param revisions: Revisions to query.
1025
:return: A tuple: (set(found_revisions), set(ghost_revisions),
1026
set(parents_of_found_revisions), dict(found_revisions:parents)).
1028
found_revisions = set()
1029
parents_of_found = set()
1030
# revisions may contain nodes that point to other nodes in revisions:
1031
# we want to filter them out.
1032
self.seen.update(revisions)
1033
parent_map = self._parents_provider.get_parent_map(revisions)
1034
found_revisions.update(parent_map)
1035
for rev_id, parents in parent_map.iteritems():
1036
new_found_parents = [p for p in parents if p not in self.seen]
1037
if new_found_parents:
1038
# Calling set.update() with an empty generator is actually
1040
parents_of_found.update(new_found_parents)
1041
ghost_revisions = revisions - found_revisions
1042
return found_revisions, ghost_revisions, parents_of_found, parent_map
1047
def find_seen_ancestors(self, revisions):
1048
"""Find ancestors of these revisions that have already been seen."""
1049
all_seen = self.seen
1050
pending = set(revisions).intersection(all_seen)
1051
seen_ancestors = set(pending)
1053
if self._returning == 'next':
1054
# self.seen contains what nodes have been returned, not what nodes
1055
# have been queried. We don't want to probe for nodes that haven't
1056
# been searched yet.
1057
not_searched_yet = self._next_query
1059
not_searched_yet = ()
1060
pending.difference_update(not_searched_yet)
1061
get_parent_map = self._parents_provider.get_parent_map
1063
parent_map = get_parent_map(pending)
1065
# We don't care if it is a ghost, since it can't be seen if it is
1067
for parent_ids in parent_map.itervalues():
1068
all_parents.extend(parent_ids)
1069
next_pending = all_seen.intersection(all_parents).difference(seen_ancestors)
1070
seen_ancestors.update(next_pending)
1071
next_pending.difference_update(not_searched_yet)
1072
pending = next_pending
1074
return seen_ancestors
1076
def stop_searching_any(self, revisions):
1078
Remove any of the specified revisions from the search list.
1080
None of the specified revisions are required to be present in the
1081
search list. In this case, the call is a no-op.
1083
revisions = frozenset(revisions)
1084
if self._returning == 'next':
1085
stopped = self._next_query.intersection(revisions)
1086
self._next_query = self._next_query.difference(revisions)
1088
stopped_present = self._current_present.intersection(revisions)
1089
stopped = stopped_present.union(
1090
self._current_ghosts.intersection(revisions))
1091
self._current_present.difference_update(stopped)
1092
self._current_ghosts.difference_update(stopped)
1093
# stopping 'x' should stop returning parents of 'x', but
1094
# not if 'y' always references those same parents
1095
stop_rev_references = {}
1096
for rev in stopped_present:
1097
for parent_id in self._current_parents[rev]:
1098
if parent_id not in stop_rev_references:
1099
stop_rev_references[parent_id] = 0
1100
stop_rev_references[parent_id] += 1
1101
# if only the stopped revisions reference it, the ref count will be
1103
for parents in self._current_parents.itervalues():
1104
for parent_id in parents:
1106
stop_rev_references[parent_id] -= 1
1109
stop_parents = set()
1110
for rev_id, refs in stop_rev_references.iteritems():
1112
stop_parents.add(rev_id)
1113
self._next_query.difference_update(stop_parents)
1114
self._stopped_keys.update(stopped)
1117
def start_searching(self, revisions):
1118
"""Add revisions to the search.
1120
The parents of revisions will be returned from the next call to next()
1121
or next_with_ghosts(). If next_with_ghosts was the most recently used
1122
next* call then the return value is the result of looking up the
1123
ghost/not ghost status of revisions. (A tuple (present, ghosted)).
1125
revisions = frozenset(revisions)
1126
self._started_keys.update(revisions)
1127
new_revisions = revisions.difference(self.seen)
1128
if self._returning == 'next':
1129
self._next_query.update(new_revisions)
1130
self.seen.update(new_revisions)
1132
# perform a query on revisions
1133
revs, ghosts, query, parents = self._do_query(revisions)
1134
self._stopped_keys.update(ghosts)
1135
self._current_present.update(revs)
1136
self._current_ghosts.update(ghosts)
1137
self._next_query.update(query)
1138
self._current_parents.update(parents)
1142
class SearchResult(object):
1143
"""The result of a breadth first search.
1145
A SearchResult provides the ability to reconstruct the search or access a
1146
set of the keys the search found.
1149
def __init__(self, start_keys, exclude_keys, key_count, keys):
1150
"""Create a SearchResult.
1152
:param start_keys: The keys the search started at.
1153
:param exclude_keys: The keys the search excludes.
1154
:param key_count: The total number of keys (from start to but not
1156
:param keys: The keys the search found. Note that in future we may get
1157
a SearchResult from a smart server, in which case the keys list is
1158
not necessarily immediately available.
1160
self._recipe = (start_keys, exclude_keys, key_count)
1161
self._keys = frozenset(keys)
1163
def get_recipe(self):
1164
"""Return a recipe that can be used to replay this search.
1166
The recipe allows reconstruction of the same results at a later date
1167
without knowing all the found keys. The essential elements are a list
1168
of keys to start and and to stop at. In order to give reproducible
1169
results when ghosts are encountered by a search they are automatically
1170
added to the exclude list (or else ghost filling may alter the
1173
:return: A tuple (start_keys_set, exclude_keys_set, revision_count). To
1174
recreate the results of this search, create a breadth first
1175
searcher on the same graph starting at start_keys. Then call next()
1176
(or next_with_ghosts()) repeatedly, and on every result, call
1177
stop_searching_any on any keys from the exclude_keys set. The
1178
revision_count value acts as a trivial cross-check - the found
1179
revisions of the new search should have as many elements as
1180
revision_count. If it does not, then additional revisions have been
1181
ghosted since the search was executed the first time and the second
1187
"""Return the keys found in this search.
1189
:return: A set of keys.