1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
from bzrlib.deprecated_graph import (node_distances, select_farthest)
27
# DIAGRAM of terminology
37
# In this diagram, relative to G and H:
38
# A, B, C, D, E are common ancestors.
39
# C, D and E are border ancestors, because each has a non-common descendant.
40
# D and E are least common ancestors because none of their descendants are
42
# C is not a least common ancestor because its descendant, E, is a common
45
# The find_unique_lca algorithm will pick A in two steps:
46
# 1. find_lca('G', 'H') => ['D', 'E']
47
# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']
50
class DictParentsProvider(object):
51
"""A parents provider for Graph objects."""
53
def __init__(self, ancestry):
54
self.ancestry = ancestry
57
return 'DictParentsProvider(%r)' % self.ancestry
59
def get_parent_map(self, keys):
60
"""See _StackedParentsProvider.get_parent_map"""
61
ancestry = self.ancestry
62
return dict((k, ancestry[k]) for k in keys if k in ancestry)
65
class _StackedParentsProvider(object):
67
def __init__(self, parent_providers):
68
self._parent_providers = parent_providers
71
return "_StackedParentsProvider(%r)" % self._parent_providers
73
def get_parent_map(self, keys):
74
"""Get a mapping of keys => parents
76
A dictionary is returned with an entry for each key present in this
77
source. If this source doesn't have information about a key, it should
80
[NULL_REVISION] is used as the parent of the first user-committed
81
revision. Its parent list is empty.
83
:param keys: An iterable returning keys to check (eg revision_ids)
84
:return: A dictionary mapping each key to its parents
88
for parents_provider in self._parent_providers:
89
new_found = parents_provider.get_parent_map(remaining)
90
found.update(new_found)
91
remaining.difference_update(new_found)
97
class CachingParentsProvider(object):
98
"""A parents provider which will cache the revision => parents in a dict.
100
This is useful for providers that have an expensive lookup.
103
def __init__(self, parent_provider):
104
self._real_provider = parent_provider
105
# Theoretically we could use an LRUCache here
109
return "%s(%r)" % (self.__class__.__name__, self._real_provider)
111
def get_parent_map(self, keys):
112
"""See _StackedParentsProvider.get_parent_map"""
114
# If the _real_provider doesn't have a key, we cache a value of None,
115
# which we then later use to realize we cannot provide a value for that
122
if value is not None:
123
parent_map[key] = value
128
new_parents = self._real_provider.get_parent_map(needed)
129
cache.update(new_parents)
130
parent_map.update(new_parents)
131
needed.difference_update(new_parents)
132
cache.update(dict.fromkeys(needed, None))
137
"""Provide incremental access to revision graphs.
139
This is the generic implementation; it is intended to be subclassed to
140
specialize it for other repository types.
143
def __init__(self, parents_provider):
144
"""Construct a Graph that uses several graphs as its input
146
This should not normally be invoked directly, because there may be
147
specialized implementations for particular repository types. See
148
Repository.get_graph().
150
:param parents_provider: An object providing a get_parent_map call
151
conforming to the behavior of
152
StackedParentsProvider.get_parent_map.
154
if getattr(parents_provider, 'get_parents', None) is not None:
155
self.get_parents = parents_provider.get_parents
156
if getattr(parents_provider, 'get_parent_map', None) is not None:
157
self.get_parent_map = parents_provider.get_parent_map
158
self._parents_provider = parents_provider
161
return 'Graph(%r)' % self._parents_provider
163
def find_lca(self, *revisions):
164
"""Determine the lowest common ancestors of the provided revisions
166
A lowest common ancestor is a common ancestor none of whose
167
descendants are common ancestors. In graphs, unlike trees, there may
168
be multiple lowest common ancestors.
170
This algorithm has two phases. Phase 1 identifies border ancestors,
171
and phase 2 filters border ancestors to determine lowest common
174
In phase 1, border ancestors are identified, using a breadth-first
175
search starting at the bottom of the graph. Searches are stopped
176
whenever a node or one of its descendants is determined to be common
178
In phase 2, the border ancestors are filtered to find the least
179
common ancestors. This is done by searching the ancestries of each
182
Phase 2 is perfomed on the principle that a border ancestor that is
183
not an ancestor of any other border ancestor is a least common
186
Searches are stopped when they find a node that is determined to be a
187
common ancestor of all border ancestors, because this shows that it
188
cannot be a descendant of any border ancestor.
190
The scaling of this operation should be proportional to
191
1. The number of uncommon ancestors
192
2. The number of border ancestors
193
3. The length of the shortest path between a border ancestor and an
194
ancestor of all border ancestors.
196
border_common, common, sides = self._find_border_ancestors(revisions)
197
# We may have common ancestors that can be reached from each other.
198
# - ask for the heads of them to filter it down to only ones that
199
# cannot be reached from each other - phase 2.
200
return self.heads(border_common)
202
def find_difference(self, left_revision, right_revision):
203
"""Determine the graph difference between two revisions"""
204
border, common, searchers = self._find_border_ancestors(
205
[left_revision, right_revision])
206
self._search_for_extra_common(common, searchers)
207
left = searchers[0].seen
208
right = searchers[1].seen
209
return (left.difference(right), right.difference(left))
211
def find_unique_ancestors(self, unique_revision, common_revisions):
212
"""Find the unique ancestors for a revision versus others.
214
This returns the ancestry of unique_revision, excluding all revisions
215
in the ancestry of common_revisions. If unique_revision is in the
216
ancestry, then the empty set will be returned.
218
:param unique_revision: The revision_id whose ancestry we are
220
XXX: Would this API be better if we allowed multiple revisions on
222
:param common_revisions: Revision_ids of ancestries to exclude.
223
:return: A set of revisions in the ancestry of unique_revision
225
if unique_revision in common_revisions:
228
# Algorithm description
229
# 1) Walk backwards from the unique node and all common nodes.
230
# 2) When a node is seen by both sides, stop searching it in the unique
231
# walker, include it in the common walker.
232
# 3) Stop searching when there are no nodes left for the unique walker.
233
# At this point, you have a maximal set of unique nodes. Some of
234
# them may actually be common, and you haven't reached them yet.
235
# 4) Start new searchers for the unique nodes, seeded with the
236
# information you have so far.
237
# 5) Continue searching, stopping the common searches when the search
238
# tip is an ancestor of all unique nodes.
239
# 6) Search is done when all common searchers have completed.
241
if 'graph' in debug.debug_flags:
242
_mutter = trace.mutter
244
def _mutter(*args, **kwargs):
247
unique_searcher = self._make_breadth_first_searcher([unique_revision])
248
# we know that unique_revision isn't in common_revisions
249
unique_searcher.next()
250
common_searcher = self._make_breadth_first_searcher(common_revisions)
252
while unique_searcher._next_query:
253
next_unique_nodes = set(unique_searcher.step())
254
next_common_nodes = set(common_searcher.step())
256
# Check if either searcher encounters new nodes seen by the other
258
unique_are_common_nodes = next_unique_nodes.intersection(
259
common_searcher.seen)
260
unique_are_common_nodes.update(
261
next_common_nodes.intersection(unique_searcher.seen))
262
if unique_are_common_nodes:
263
ancestors = unique_searcher.find_seen_ancestors(
264
unique_are_common_nodes)
265
ancestors.update(common_searcher.find_seen_ancestors(ancestors))
266
unique_searcher.stop_searching_any(ancestors)
267
common_searcher.start_searching(ancestors)
269
unique_nodes = unique_searcher.seen.difference(common_searcher.seen)
272
unique_tips = self._remove_simple_descendants(unique_nodes,
273
self.get_parent_map(unique_nodes))
275
if len(unique_tips) == 1:
276
unique_searchers = []
277
ancestor_all_unique = unique_searcher.find_seen_ancestors(unique_tips)
279
unique_searchers = []
280
for tip in unique_tips:
281
revs_to_search = unique_searcher.find_seen_ancestors([tip])
282
searcher = self._make_breadth_first_searcher(revs_to_search)
283
# We don't care about the starting nodes.
284
searcher._label = tip
286
unique_searchers.append(searcher)
288
ancestor_all_unique = None
289
for searcher in unique_searchers:
290
if ancestor_all_unique is None:
291
ancestor_all_unique = set(searcher.seen)
293
ancestor_all_unique = ancestor_all_unique.intersection(
295
# Collapse all the common nodes into a single searcher
296
all_unique_searcher = self._make_breadth_first_searcher(ancestor_all_unique)
297
if ancestor_all_unique:
298
all_unique_searcher.step()
300
# Stop any search tips that are already known as ancestors of the
302
common_searcher.stop_searching_any(
303
common_searcher.find_seen_ancestors(ancestor_all_unique))
306
for searcher in unique_searchers:
307
total_stopped += len(searcher.stop_searching_any(
308
searcher.find_seen_ancestors(ancestor_all_unique)))
309
_mutter('For %s unique nodes, created %s + 1 unique searchers'
310
' (%s stopped search tips, %s common ancestors)',
311
len(unique_nodes), len(unique_searchers), total_stopped,
312
len(ancestor_all_unique))
313
del ancestor_all_unique
315
# While we still have common nodes to search
316
while common_searcher._next_query:
317
newly_seen_common = set(common_searcher.step())
318
newly_seen_unique = set()
319
for searcher in unique_searchers:
320
newly_seen_unique.update(searcher.step())
321
# These nodes are common ancestors of all unique nodes
322
unique_are_common_nodes = newly_seen_unique.copy()
323
for searcher in unique_searchers:
324
unique_are_common_nodes = unique_are_common_nodes.intersection(
326
unique_are_common_nodes = unique_are_common_nodes.intersection(
327
all_unique_searcher.seen)
328
unique_are_common_nodes.update(all_unique_searcher.step())
329
if newly_seen_common:
330
# If a 'common' node is an ancestor of all unique searchers, we
331
# can stop searching it.
332
common_searcher.stop_searching_any(
333
all_unique_searcher.seen.intersection(newly_seen_common))
334
if unique_are_common_nodes:
335
# We have new common-to-all-unique-searchers nodes
336
for searcher in unique_searchers:
337
unique_are_common_nodes.update(
338
searcher.find_seen_ancestors(unique_are_common_nodes))
339
unique_are_common_nodes.update(
340
all_unique_searcher.find_seen_ancestors(unique_are_common_nodes))
341
# Since these are common, we can grab another set of ancestors
343
unique_are_common_nodes.update(
344
common_searcher.find_seen_ancestors(unique_are_common_nodes))
346
# The all_unique searcher can start searching the common nodes
347
# but everyone else can stop.
348
all_unique_searcher.start_searching(unique_are_common_nodes)
349
common_searcher.stop_searching_any(unique_are_common_nodes)
351
# Filter out searchers that don't actually search different
352
# nodes. We already have the ancestry intersection for them
353
next_unique_searchers = []
354
unique_search_sets = set()
355
for searcher in unique_searchers:
356
stopped = searcher.stop_searching_any(unique_are_common_nodes)
357
will_search_set = frozenset(searcher._next_query)
358
if not will_search_set:
359
_mutter('Unique searcher %s was stopped.'
360
' (%s iterations) %d nodes stopped',
362
searcher._iterations,
364
elif will_search_set not in unique_search_sets:
365
# This searcher is searching a unique set of nodes, let it
366
unique_search_sets.add(will_search_set)
367
next_unique_searchers.append(searcher)
369
_mutter('Unique searcher %s stopped for repeated'
370
' search of %s nodes',
371
searcher._label, len(will_search_set))
372
if len(unique_searchers) != len(next_unique_searchers):
373
_mutter('Collapsed %s unique searchers => %s'
375
len(unique_searchers), len(next_unique_searchers),
376
all_unique_searcher._iterations)
377
unique_searchers = next_unique_searchers
378
return unique_nodes.difference(common_searcher.seen)
380
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
381
def get_parents(self, revisions):
382
"""Find revision ids of the parents of a list of revisions
384
A list is returned of the same length as the input. Each entry
385
is a list of parent ids for the corresponding input revision.
387
[NULL_REVISION] is used as the parent of the first user-committed
388
revision. Its parent list is empty.
390
If the revision is not present (i.e. a ghost), None is used in place
391
of the list of parents.
393
Deprecated in bzr 1.2 - please see get_parent_map.
395
parents = self.get_parent_map(revisions)
396
return [parents.get(r, None) for r in revisions]
398
def get_parent_map(self, revisions):
399
"""Get a map of key:parent_list for revisions.
401
This implementation delegates to get_parents, for old parent_providers
402
that do not supply get_parent_map.
405
for rev, parents in self.get_parents(revisions):
406
if parents is not None:
407
result[rev] = parents
410
def _make_breadth_first_searcher(self, revisions):
411
return _BreadthFirstSearcher(revisions, self)
413
def _find_border_ancestors(self, revisions):
414
"""Find common ancestors with at least one uncommon descendant.
416
Border ancestors are identified using a breadth-first
417
search starting at the bottom of the graph. Searches are stopped
418
whenever a node or one of its descendants is determined to be common.
420
This will scale with the number of uncommon ancestors.
422
As well as the border ancestors, a set of seen common ancestors and a
423
list of sets of seen ancestors for each input revision is returned.
424
This allows calculation of graph difference from the results of this
427
if None in revisions:
428
raise errors.InvalidRevisionId(None, self)
429
common_ancestors = set()
430
searchers = [self._make_breadth_first_searcher([r])
432
active_searchers = searchers[:]
433
border_ancestors = set()
437
for searcher in searchers:
438
new_ancestors = searcher.step()
440
newly_seen.update(new_ancestors)
442
for revision in newly_seen:
443
if revision in common_ancestors:
444
# Not a border ancestor because it was seen as common
446
new_common.add(revision)
448
for searcher in searchers:
449
if revision not in searcher.seen:
452
# This is a border because it is a first common that we see
453
# after walking for a while.
454
border_ancestors.add(revision)
455
new_common.add(revision)
457
for searcher in searchers:
458
new_common.update(searcher.find_seen_ancestors(new_common))
459
for searcher in searchers:
460
searcher.start_searching(new_common)
461
common_ancestors.update(new_common)
463
# Figure out what the searchers will be searching next, and if
464
# there is only 1 set being searched, then we are done searching,
465
# since all searchers would have to be searching the same data,
466
# thus it *must* be in common.
467
unique_search_sets = set()
468
for searcher in searchers:
469
will_search_set = frozenset(searcher._next_query)
470
if will_search_set not in unique_search_sets:
471
# This searcher is searching a unique set of nodes, let it
472
unique_search_sets.add(will_search_set)
474
if len(unique_search_sets) == 1:
475
nodes = unique_search_sets.pop()
476
uncommon_nodes = nodes.difference(common_ancestors)
478
raise AssertionError("Somehow we ended up converging"
479
" without actually marking them as"
482
"\nuncommon_nodes: %s"
483
% (revisions, uncommon_nodes))
485
return border_ancestors, common_ancestors, searchers
487
def heads(self, keys):
488
"""Return the heads from amongst keys.
490
This is done by searching the ancestries of each key. Any key that is
491
reachable from another key is not returned; all the others are.
493
This operation scales with the relative depth between any two keys. If
494
any two keys are completely disconnected all ancestry of both sides
497
:param keys: An iterable of keys.
498
:return: A set of the heads. Note that as a set there is no ordering
499
information. Callers will need to filter their input to create
500
order if they need it.
502
candidate_heads = set(keys)
503
if revision.NULL_REVISION in candidate_heads:
504
# NULL_REVISION is only a head if it is the only entry
505
candidate_heads.remove(revision.NULL_REVISION)
506
if not candidate_heads:
507
return set([revision.NULL_REVISION])
508
if len(candidate_heads) < 2:
509
return candidate_heads
510
searchers = dict((c, self._make_breadth_first_searcher([c]))
511
for c in candidate_heads)
512
active_searchers = dict(searchers)
513
# skip over the actual candidate for each searcher
514
for searcher in active_searchers.itervalues():
516
# The common walker finds nodes that are common to two or more of the
517
# input keys, so that we don't access all history when a currently
518
# uncommon search point actually meets up with something behind a
519
# common search point. Common search points do not keep searches
520
# active; they just allow us to make searches inactive without
521
# accessing all history.
522
common_walker = self._make_breadth_first_searcher([])
523
while len(active_searchers) > 0:
528
except StopIteration:
529
# No common points being searched at this time.
531
for candidate in active_searchers.keys():
533
searcher = active_searchers[candidate]
535
# rare case: we deleted candidate in a previous iteration
536
# through this for loop, because it was determined to be
537
# a descendant of another candidate.
540
ancestors.update(searcher.next())
541
except StopIteration:
542
del active_searchers[candidate]
544
# process found nodes
546
for ancestor in ancestors:
547
if ancestor in candidate_heads:
548
candidate_heads.remove(ancestor)
549
del searchers[ancestor]
550
if ancestor in active_searchers:
551
del active_searchers[ancestor]
552
# it may meet up with a known common node
553
if ancestor in common_walker.seen:
554
# some searcher has encountered our known common nodes:
556
ancestor_set = set([ancestor])
557
for searcher in searchers.itervalues():
558
searcher.stop_searching_any(ancestor_set)
560
# or it may have been just reached by all the searchers:
561
for searcher in searchers.itervalues():
562
if ancestor not in searcher.seen:
565
# The final active searcher has just reached this node,
566
# making it be known as a descendant of all candidates,
567
# so we can stop searching it, and any seen ancestors
568
new_common.add(ancestor)
569
for searcher in searchers.itervalues():
571
searcher.find_seen_ancestors([ancestor])
572
searcher.stop_searching_any(seen_ancestors)
573
common_walker.start_searching(new_common)
574
return candidate_heads
576
def find_unique_lca(self, left_revision, right_revision,
578
"""Find a unique LCA.
580
Find lowest common ancestors. If there is no unique common
581
ancestor, find the lowest common ancestors of those ancestors.
583
Iteration stops when a unique lowest common ancestor is found.
584
The graph origin is necessarily a unique lowest common ancestor.
586
Note that None is not an acceptable substitute for NULL_REVISION.
587
in the input for this method.
589
:param count_steps: If True, the return value will be a tuple of
590
(unique_lca, steps) where steps is the number of times that
591
find_lca was run. If False, only unique_lca is returned.
593
revisions = [left_revision, right_revision]
597
lca = self.find_lca(*revisions)
605
raise errors.NoCommonAncestor(left_revision, right_revision)
608
def iter_ancestry(self, revision_ids):
609
"""Iterate the ancestry of this revision.
611
:param revision_ids: Nodes to start the search
612
:return: Yield tuples mapping a revision_id to its parents for the
613
ancestry of revision_id.
614
Ghosts will be returned with None as their parents, and nodes
615
with no parents will have NULL_REVISION as their only parent. (As
616
defined by get_parent_map.)
617
There will also be a node for (NULL_REVISION, ())
619
pending = set(revision_ids)
622
processed.update(pending)
623
next_map = self.get_parent_map(pending)
625
for item in next_map.iteritems():
627
next_pending.update(p for p in item[1] if p not in processed)
628
ghosts = pending.difference(next_map)
631
pending = next_pending
633
def iter_topo_order(self, revisions):
634
"""Iterate through the input revisions in topological order.
636
This sorting only ensures that parents come before their children.
637
An ancestor may sort after a descendant if the relationship is not
638
visible in the supplied list of revisions.
640
sorter = tsort.TopoSorter(self.get_parent_map(revisions))
641
return sorter.iter_topo_order()
643
def is_ancestor(self, candidate_ancestor, candidate_descendant):
644
"""Determine whether a revision is an ancestor of another.
646
We answer this using heads() as heads() has the logic to perform the
647
smallest number of parent lookups to determine the ancestral
648
relationship between N revisions.
650
return set([candidate_descendant]) == self.heads(
651
[candidate_ancestor, candidate_descendant])
653
def _search_for_extra_common(self, common, searchers):
654
"""Make sure that unique nodes are genuinely unique.
656
After _find_border_ancestors, all nodes marked "common" are indeed
657
common. Some of the nodes considered unique are not, due to history
658
shortcuts stopping the searches early.
660
We know that we have searched enough when all common search tips are
661
descended from all unique (uncommon) nodes because we know that a node
662
cannot be an ancestor of its own ancestor.
664
:param common: A set of common nodes
665
:param searchers: The searchers returned from _find_border_ancestors
669
# A) The passed in searchers should all be on the same tips, thus
670
# they should be considered the "common" searchers.
671
# B) We find the difference between the searchers, these are the
672
# "unique" nodes for each side.
673
# C) We do a quick culling so that we only start searching from the
674
# more interesting unique nodes. (A unique ancestor is more
675
# interesting than any of its children.)
676
# D) We start searching for ancestors common to all unique nodes.
677
# E) We have the common searchers stop searching any ancestors of
679
# F) When there are no more common search tips, we stop
681
# TODO: We need a way to remove unique_searchers when they overlap with
682
# other unique searchers.
683
if len(searchers) != 2:
684
raise NotImplementedError(
685
"Algorithm not yet implemented for > 2 searchers")
686
common_searchers = searchers
687
left_searcher = searchers[0]
688
right_searcher = searchers[1]
689
unique = left_searcher.seen.symmetric_difference(right_searcher.seen)
690
if not unique: # No unique nodes, nothing to do
692
total_unique = len(unique)
693
unique = self._remove_simple_descendants(unique,
694
self.get_parent_map(unique))
695
simple_unique = len(unique)
697
unique_searchers = []
698
for revision_id in unique:
699
if revision_id in left_searcher.seen:
700
parent_searcher = left_searcher
702
parent_searcher = right_searcher
703
revs_to_search = parent_searcher.find_seen_ancestors([revision_id])
704
if not revs_to_search: # XXX: This shouldn't be possible
705
revs_to_search = [revision_id]
706
searcher = self._make_breadth_first_searcher(revs_to_search)
707
# We don't care about the starting nodes.
709
unique_searchers.append(searcher)
711
# possible todo: aggregate the common searchers into a single common
712
# searcher, just make sure that we include the nodes into the .seen
713
# properties of the original searchers
715
ancestor_all_unique = None
716
for searcher in unique_searchers:
717
if ancestor_all_unique is None:
718
ancestor_all_unique = set(searcher.seen)
720
ancestor_all_unique = ancestor_all_unique.intersection(
723
trace.mutter('Started %s unique searchers for %s unique revisions',
724
simple_unique, total_unique)
726
while True: # If we have no more nodes we have nothing to do
727
newly_seen_common = set()
728
for searcher in common_searchers:
729
newly_seen_common.update(searcher.step())
730
newly_seen_unique = set()
731
for searcher in unique_searchers:
732
newly_seen_unique.update(searcher.step())
733
new_common_unique = set()
734
for revision in newly_seen_unique:
735
for searcher in unique_searchers:
736
if revision not in searcher.seen:
739
# This is a border because it is a first common that we see
740
# after walking for a while.
741
new_common_unique.add(revision)
742
if newly_seen_common:
743
# These are nodes descended from one of the 'common' searchers.
744
# Make sure all searchers are on the same page
745
for searcher in common_searchers:
746
newly_seen_common.update(
747
searcher.find_seen_ancestors(newly_seen_common))
748
# We start searching the whole ancestry. It is a bit wasteful,
749
# though. We really just want to mark all of these nodes as
750
# 'seen' and then start just the tips. However, it requires a
751
# get_parent_map() call to figure out the tips anyway, and all
752
# redundant requests should be fairly fast.
753
for searcher in common_searchers:
754
searcher.start_searching(newly_seen_common)
756
# If a 'common' node is an ancestor of all unique searchers, we
757
# can stop searching it.
758
stop_searching_common = ancestor_all_unique.intersection(
760
if stop_searching_common:
761
for searcher in common_searchers:
762
searcher.stop_searching_any(stop_searching_common)
763
if new_common_unique:
764
# We found some ancestors that are common
765
for searcher in unique_searchers:
766
new_common_unique.update(
767
searcher.find_seen_ancestors(new_common_unique))
768
# Since these are common, we can grab another set of ancestors
770
for searcher in common_searchers:
771
new_common_unique.update(
772
searcher.find_seen_ancestors(new_common_unique))
774
# We can tell all of the unique searchers to start at these
775
# nodes, and tell all of the common searchers to *stop*
776
# searching these nodes
777
for searcher in unique_searchers:
778
searcher.start_searching(new_common_unique)
779
for searcher in common_searchers:
780
searcher.stop_searching_any(new_common_unique)
781
ancestor_all_unique.update(new_common_unique)
783
# Filter out searchers that don't actually search different
784
# nodes. We already have the ancestry intersection for them
785
next_unique_searchers = []
786
unique_search_sets = set()
787
for searcher in unique_searchers:
788
will_search_set = frozenset(searcher._next_query)
789
if will_search_set not in unique_search_sets:
790
# This searcher is searching a unique set of nodes, let it
791
unique_search_sets.add(will_search_set)
792
next_unique_searchers.append(searcher)
793
unique_searchers = next_unique_searchers
794
for searcher in common_searchers:
795
if searcher._next_query:
798
# All common searcher have stopped searching
801
def _remove_simple_descendants(self, revisions, parent_map):
802
"""remove revisions which are children of other ones in the set
804
This doesn't do any graph searching, it just checks the immediate
805
parent_map to find if there are any children which can be removed.
807
:param revisions: A set of revision_ids
808
:return: A set of revision_ids with the children removed
810
simple_ancestors = revisions.copy()
811
# TODO: jam 20071214 we *could* restrict it to searching only the
812
# parent_map of revisions already present in 'revisions', but
813
# considering the general use case, I think this is actually
816
# This is the same as the following loop. I don't know that it is any
818
## simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems()
819
## if p_ids is not None and revisions.intersection(p_ids))
820
## return simple_ancestors
822
# Yet Another Way, invert the parent map (which can be cached)
824
## for revision_id, parent_ids in parent_map.iteritems():
825
## for p_id in parent_ids:
826
## descendants.setdefault(p_id, []).append(revision_id)
827
## for revision in revisions.intersection(descendants):
828
## simple_ancestors.difference_update(descendants[revision])
829
## return simple_ancestors
830
for revision, parent_ids in parent_map.iteritems():
831
if parent_ids is None:
833
for parent_id in parent_ids:
834
if parent_id in revisions:
835
# This node has a parent present in the set, so we can
837
simple_ancestors.discard(revision)
839
return simple_ancestors
842
class HeadsCache(object):
843
"""A cache of results for graph heads calls."""
845
def __init__(self, graph):
849
def heads(self, keys):
850
"""Return the heads of keys.
852
This matches the API of Graph.heads(), specifically the return value is
853
a set which can be mutated, and ordering of the input is not preserved
856
:see also: Graph.heads.
857
:param keys: The keys to calculate heads for.
858
:return: A set containing the heads, which may be mutated without
859
affecting future lookups.
861
keys = frozenset(keys)
863
return set(self._heads[keys])
865
heads = self.graph.heads(keys)
866
self._heads[keys] = heads
870
class FrozenHeadsCache(object):
871
"""Cache heads() calls, assuming the caller won't modify them."""
873
def __init__(self, graph):
877
def heads(self, keys):
878
"""Return the heads of keys.
880
Similar to Graph.heads(). The main difference is that the return value
881
is a frozen set which cannot be mutated.
883
:see also: Graph.heads.
884
:param keys: The keys to calculate heads for.
885
:return: A frozenset containing the heads.
887
keys = frozenset(keys)
889
return self._heads[keys]
891
heads = frozenset(self.graph.heads(keys))
892
self._heads[keys] = heads
895
def cache(self, keys, heads):
896
"""Store a known value."""
897
self._heads[frozenset(keys)] = frozenset(heads)
900
class _BreadthFirstSearcher(object):
901
"""Parallel search breadth-first the ancestry of revisions.
903
This class implements the iterator protocol, but additionally
904
1. provides a set of seen ancestors, and
905
2. allows some ancestries to be unsearched, via stop_searching_any
908
def __init__(self, revisions, parents_provider):
910
self._next_query = set(revisions)
912
self._started_keys = set(self._next_query)
913
self._stopped_keys = set()
914
self._parents_provider = parents_provider
915
self._returning = 'next_with_ghosts'
916
self._current_present = set()
917
self._current_ghosts = set()
918
self._current_parents = {}
925
search = '%s=%r' % (prefix, list(self._next_query))
926
return ('_BreadthFirstSearcher(iterations=%d, %s,'
927
' seen=%r)' % (self._iterations, search, list(self.seen)))
929
def get_result(self):
930
"""Get a SearchResult for the current state of this searcher.
932
:return: A SearchResult for this search so far. The SearchResult is
933
static - the search can be advanced and the search result will not
934
be invalidated or altered.
936
if self._returning == 'next':
937
# We have to know the current nodes children to be able to list the
938
# exclude keys for them. However, while we could have a second
939
# look-ahead result buffer and shuffle things around, this method
940
# is typically only called once per search - when memoising the
941
# results of the search.
942
found, ghosts, next, parents = self._do_query(self._next_query)
943
# pretend we didn't query: perhaps we should tweak _do_query to be
944
# entirely stateless?
945
self.seen.difference_update(next)
946
next_query = next.union(ghosts)
948
next_query = self._next_query
949
excludes = self._stopped_keys.union(next_query)
950
included_keys = self.seen.difference(excludes)
951
return SearchResult(self._started_keys, excludes, len(included_keys),
957
except StopIteration:
961
"""Return the next ancestors of this revision.
963
Ancestors are returned in the order they are seen in a breadth-first
964
traversal. No ancestor will be returned more than once. Ancestors are
965
returned before their parentage is queried, so ghosts and missing
966
revisions (including the start revisions) are included in the result.
967
This can save a round trip in LCA style calculation by allowing
968
convergence to be detected without reading the data for the revision
969
the convergence occurs on.
971
:return: A set of revision_ids.
973
if self._returning != 'next':
974
# switch to returning the query, not the results.
975
self._returning = 'next'
976
self._iterations += 1
979
if len(self._next_query) == 0:
980
raise StopIteration()
981
# We have seen what we're querying at this point as we are returning
982
# the query, not the results.
983
self.seen.update(self._next_query)
984
return self._next_query
986
def next_with_ghosts(self):
987
"""Return the next found ancestors, with ghosts split out.
989
Ancestors are returned in the order they are seen in a breadth-first
990
traversal. No ancestor will be returned more than once. Ancestors are
991
returned only after asking for their parents, which allows us to detect
992
which revisions are ghosts and which are not.
994
:return: A tuple with (present ancestors, ghost ancestors) sets.
996
if self._returning != 'next_with_ghosts':
997
# switch to returning the results, not the current query.
998
self._returning = 'next_with_ghosts'
1000
if len(self._next_query) == 0:
1001
raise StopIteration()
1003
return self._current_present, self._current_ghosts
1006
"""Advance the search.
1008
Updates self.seen, self._next_query, self._current_present,
1009
self._current_ghosts, self._current_parents and self._iterations.
1011
self._iterations += 1
1012
found, ghosts, next, parents = self._do_query(self._next_query)
1013
self._current_present = found
1014
self._current_ghosts = ghosts
1015
self._next_query = next
1016
self._current_parents = parents
1017
# ghosts are implicit stop points, otherwise the search cannot be
1018
# repeated when ghosts are filled.
1019
self._stopped_keys.update(ghosts)
1021
def _do_query(self, revisions):
1022
"""Query for revisions.
1024
Adds revisions to the seen set.
1026
:param revisions: Revisions to query.
1027
:return: A tuple: (set(found_revisions), set(ghost_revisions),
1028
set(parents_of_found_revisions), dict(found_revisions:parents)).
1030
found_revisions = set()
1031
parents_of_found = set()
1032
# revisions may contain nodes that point to other nodes in revisions:
1033
# we want to filter them out.
1034
self.seen.update(revisions)
1035
parent_map = self._parents_provider.get_parent_map(revisions)
1036
found_revisions.update(parent_map)
1037
for rev_id, parents in parent_map.iteritems():
1038
new_found_parents = [p for p in parents if p not in self.seen]
1039
if new_found_parents:
1040
# Calling set.update() with an empty generator is actually
1042
parents_of_found.update(new_found_parents)
1043
ghost_revisions = revisions - found_revisions
1044
return found_revisions, ghost_revisions, parents_of_found, parent_map
1049
def find_seen_ancestors(self, revisions):
1050
"""Find ancestors of these revisions that have already been seen."""
1051
all_seen = self.seen
1052
pending = set(revisions).intersection(all_seen)
1053
seen_ancestors = set(pending)
1055
if self._returning == 'next':
1056
# self.seen contains what nodes have been returned, not what nodes
1057
# have been queried. We don't want to probe for nodes that haven't
1058
# been searched yet.
1059
not_searched_yet = self._next_query
1061
not_searched_yet = ()
1062
pending.difference_update(not_searched_yet)
1063
get_parent_map = self._parents_provider.get_parent_map
1065
parent_map = get_parent_map(pending)
1067
# We don't care if it is a ghost, since it can't be seen if it is
1069
for parent_ids in parent_map.itervalues():
1070
all_parents.extend(parent_ids)
1071
next_pending = all_seen.intersection(all_parents).difference(seen_ancestors)
1072
seen_ancestors.update(next_pending)
1073
next_pending.difference_update(not_searched_yet)
1074
pending = next_pending
1076
return seen_ancestors
1078
def stop_searching_any(self, revisions):
1080
Remove any of the specified revisions from the search list.
1082
None of the specified revisions are required to be present in the
1083
search list. In this case, the call is a no-op.
1085
revisions = frozenset(revisions)
1086
if self._returning == 'next':
1087
stopped = self._next_query.intersection(revisions)
1088
self._next_query = self._next_query.difference(revisions)
1090
stopped_present = self._current_present.intersection(revisions)
1091
stopped = stopped_present.union(
1092
self._current_ghosts.intersection(revisions))
1093
self._current_present.difference_update(stopped)
1094
self._current_ghosts.difference_update(stopped)
1095
# stopping 'x' should stop returning parents of 'x', but
1096
# not if 'y' always references those same parents
1097
stop_rev_references = {}
1098
for rev in stopped_present:
1099
for parent_id in self._current_parents[rev]:
1100
if parent_id not in stop_rev_references:
1101
stop_rev_references[parent_id] = 0
1102
stop_rev_references[parent_id] += 1
1103
# if only the stopped revisions reference it, the ref count will be
1105
for parents in self._current_parents.itervalues():
1106
for parent_id in parents:
1108
stop_rev_references[parent_id] -= 1
1111
stop_parents = set()
1112
for rev_id, refs in stop_rev_references.iteritems():
1114
stop_parents.add(rev_id)
1115
self._next_query.difference_update(stop_parents)
1116
self._stopped_keys.update(stopped)
1119
def start_searching(self, revisions):
1120
"""Add revisions to the search.
1122
The parents of revisions will be returned from the next call to next()
1123
or next_with_ghosts(). If next_with_ghosts was the most recently used
1124
next* call then the return value is the result of looking up the
1125
ghost/not ghost status of revisions. (A tuple (present, ghosted)).
1127
revisions = frozenset(revisions)
1128
self._started_keys.update(revisions)
1129
new_revisions = revisions.difference(self.seen)
1130
if self._returning == 'next':
1131
self._next_query.update(new_revisions)
1132
self.seen.update(new_revisions)
1134
# perform a query on revisions
1135
revs, ghosts, query, parents = self._do_query(revisions)
1136
self._stopped_keys.update(ghosts)
1137
self._current_present.update(revs)
1138
self._current_ghosts.update(ghosts)
1139
self._next_query.update(query)
1140
self._current_parents.update(parents)
1144
class SearchResult(object):
1145
"""The result of a breadth first search.
1147
A SearchResult provides the ability to reconstruct the search or access a
1148
set of the keys the search found.
1151
def __init__(self, start_keys, exclude_keys, key_count, keys):
1152
"""Create a SearchResult.
1154
:param start_keys: The keys the search started at.
1155
:param exclude_keys: The keys the search excludes.
1156
:param key_count: The total number of keys (from start to but not
1158
:param keys: The keys the search found. Note that in future we may get
1159
a SearchResult from a smart server, in which case the keys list is
1160
not necessarily immediately available.
1162
self._recipe = (start_keys, exclude_keys, key_count)
1163
self._keys = frozenset(keys)
1165
def get_recipe(self):
1166
"""Return a recipe that can be used to replay this search.
1168
The recipe allows reconstruction of the same results at a later date
1169
without knowing all the found keys. The essential elements are a list
1170
of keys to start and and to stop at. In order to give reproducible
1171
results when ghosts are encountered by a search they are automatically
1172
added to the exclude list (or else ghost filling may alter the
1175
:return: A tuple (start_keys_set, exclude_keys_set, revision_count). To
1176
recreate the results of this search, create a breadth first
1177
searcher on the same graph starting at start_keys. Then call next()
1178
(or next_with_ghosts()) repeatedly, and on every result, call
1179
stop_searching_any on any keys from the exclude_keys set. The
1180
revision_count value acts as a trivial cross-check - the found
1181
revisions of the new search should have as many elements as
1182
revision_count. If it does not, then additional revisions have been
1183
ghosted since the search was executed the first time and the second
1189
"""Return the keys found in this search.
1191
:return: A set of keys.