508
484
def __init__(self, revisions, parents_provider):
509
self._start = set(revisions)
510
self._search_revisions = None
511
self.seen = set(revisions)
486
self._next_query = set(revisions)
488
self._started_keys = set(self._next_query)
489
self._stopped_keys = set()
512
490
self._parents_provider = parents_provider
491
self._returning = 'next_with_ghosts'
492
self._current_present = set()
493
self._current_ghosts = set()
494
self._current_parents = {}
514
496
def __repr__(self):
515
if self._search_revisions is not None:
516
search = 'searching=%r' % (list(self._search_revisions),)
518
search = 'starting=%r' % (list(self._start),)
519
return ('_BreadthFirstSearcher(%s,'
520
' seen=%r)' % (search, list(self.seen)))
501
search = '%s=%r' % (prefix, list(self._next_query))
502
return ('_BreadthFirstSearcher(iterations=%d, %s,'
503
' seen=%r)' % (self._iterations, search, list(self.seen)))
505
def get_result(self):
506
"""Get a SearchResult for the current state of this searcher.
508
:return: A SearchResult for this search so far. The SearchResult is
509
static - the search can be advanced and the search result will not
510
be invalidated or altered.
512
if self._returning == 'next':
513
# We have to know the current nodes children to be able to list the
514
# exclude keys for them. However, while we could have a second
515
# look-ahead result buffer and shuffle things around, this method
516
# is typically only called once per search - when memoising the
517
# results of the search.
518
found, ghosts, next, parents = self._do_query(self._next_query)
519
# pretend we didn't query: perhaps we should tweak _do_query to be
520
# entirely stateless?
521
self.seen.difference_update(next)
522
next_query = next.union(ghosts)
524
next_query = self._next_query
525
excludes = self._stopped_keys.union(next_query)
526
included_keys = self.seen.difference(excludes)
527
return SearchResult(self._started_keys, excludes, len(included_keys),
523
531
"""Return the next ancestors of this revision.
525
533
Ancestors are returned in the order they are seen in a breadth-first
526
traversal. No ancestor will be returned more than once.
534
traversal. No ancestor will be returned more than once. Ancestors are
535
returned before their parentage is queried, so ghosts and missing
536
revisions (including the start revisions) are included in the result.
537
This can save a round trip in LCA style calculation by allowing
538
convergence to be detected without reading the data for the revision
539
the convergence occurs on.
541
:return: A set of revision_ids.
528
if self._search_revisions is None:
529
self._search_revisions = self._start
543
if self._returning != 'next':
544
# switch to returning the query, not the results.
545
self._returning = 'next'
546
self._iterations += 1
531
new_search_revisions = set()
532
parent_map = self._parents_provider.get_parent_map(
533
self._search_revisions)
534
for parents in parent_map.itervalues():
535
new_search_revisions.update(p for p in parents if
537
self._search_revisions = new_search_revisions
538
if len(self._search_revisions) == 0:
539
raise StopIteration()
540
self.seen.update(self._search_revisions)
541
return self._search_revisions
549
if len(self._next_query) == 0:
550
raise StopIteration()
551
# We have seen what we're querying at this point as we are returning
552
# the query, not the results.
553
self.seen.update(self._next_query)
554
return self._next_query
556
def next_with_ghosts(self):
557
"""Return the next found ancestors, with ghosts split out.
559
Ancestors are returned in the order they are seen in a breadth-first
560
traversal. No ancestor will be returned more than once. Ancestors are
561
returned only after asking for their parents, which allows us to detect
562
which revisions are ghosts and which are not.
564
:return: A tuple with (present ancestors, ghost ancestors) sets.
566
if self._returning != 'next_with_ghosts':
567
# switch to returning the results, not the current query.
568
self._returning = 'next_with_ghosts'
570
if len(self._next_query) == 0:
571
raise StopIteration()
573
return self._current_present, self._current_ghosts
576
"""Advance the search.
578
Updates self.seen, self._next_query, self._current_present,
579
self._current_ghosts, self._current_parents and self._iterations.
581
self._iterations += 1
582
found, ghosts, next, parents = self._do_query(self._next_query)
583
self._current_present = found
584
self._current_ghosts = ghosts
585
self._next_query = next
586
self._current_parents = parents
587
# ghosts are implicit stop points, otherwise the search cannot be
588
# repeated when ghosts are filled.
589
self._stopped_keys.update(ghosts)
591
def _do_query(self, revisions):
592
"""Query for revisions.
594
Adds revisions to the seen set.
596
:param revisions: Revisions to query.
597
:return: A tuple: (set(found_revisions), set(ghost_revisions),
598
set(parents_of_found_revisions), dict(found_revisions:parents)).
600
found_parents = set()
601
parents_of_found = set()
602
# revisions may contain nodes that point to other nodes in revisions:
603
# we want to filter them out.
604
self.seen.update(revisions)
605
parent_map = self._parents_provider.get_parent_map(revisions)
606
for rev_id, parents in parent_map.iteritems():
607
found_parents.add(rev_id)
608
parents_of_found.update(p for p in parents if p not in self.seen)
609
ghost_parents = revisions - found_parents
610
return found_parents, ghost_parents, parents_of_found, parent_map
543
612
def __iter__(self):
562
631
None of the specified revisions are required to be present in the
563
632
search list. In this case, the call is a no-op.
565
stopped = self._search_revisions.intersection(revisions)
566
self._search_revisions = self._search_revisions.difference(revisions)
634
revisions = frozenset(revisions)
635
if self._returning == 'next':
636
stopped = self._next_query.intersection(revisions)
637
self._next_query = self._next_query.difference(revisions)
639
stopped_present = self._current_present.intersection(revisions)
640
stopped = stopped_present.union(
641
self._current_ghosts.intersection(revisions))
642
self._current_present.difference_update(stopped)
643
self._current_ghosts.difference_update(stopped)
644
# stopping 'x' should stop returning parents of 'x', but
645
# not if 'y' always references those same parents
646
stop_rev_references = {}
647
for rev in stopped_present:
648
for parent_id in self._current_parents[rev]:
649
if parent_id not in stop_rev_references:
650
stop_rev_references[parent_id] = 0
651
stop_rev_references[parent_id] += 1
652
# if only the stopped revisions reference it, the ref count will be
654
for parents in self._current_parents.itervalues():
655
for parent_id in parents:
657
stop_rev_references[parent_id] -= 1
661
for rev_id, refs in stop_rev_references.iteritems():
663
stop_parents.add(rev_id)
664
self._next_query.difference_update(stop_parents)
665
self._stopped_keys.update(stopped)
569
668
def start_searching(self, revisions):
570
if self._search_revisions is None:
571
self._start = set(revisions)
669
"""Add revisions to the search.
671
The parents of revisions will be returned from the next call to next()
672
or next_with_ghosts(). If next_with_ghosts was the most recently used
673
next* call then the return value is the result of looking up the
674
ghost/not ghost status of revisions. (A tuple (present, ghosted)).
676
revisions = frozenset(revisions)
677
self._started_keys.update(revisions)
678
new_revisions = revisions.difference(self.seen)
679
revs, ghosts, query, parents = self._do_query(revisions)
680
self._stopped_keys.update(ghosts)
681
if self._returning == 'next':
682
self._next_query.update(new_revisions)
573
self._search_revisions.update(revisions.difference(self.seen))
574
self.seen.update(revisions)
684
# perform a query on revisions
685
self._current_present.update(revs)
686
self._current_ghosts.update(ghosts)
687
self._next_query.update(query)
688
self._current_parents.update(parents)
692
class SearchResult(object):
693
"""The result of a breadth first search.
695
A SearchResult provides the ability to reconstruct the search or access a
696
set of the keys the search found.
699
def __init__(self, start_keys, exclude_keys, key_count, keys):
700
"""Create a SearchResult.
702
:param start_keys: The keys the search started at.
703
:param exclude_keys: The keys the search excludes.
704
:param key_count: The total number of keys (from start to but not
706
:param keys: The keys the search found. Note that in future we may get
707
a SearchResult from a smart server, in which case the keys list is
708
not necessarily immediately available.
710
self._recipe = (start_keys, exclude_keys, key_count)
711
self._keys = frozenset(keys)
713
def get_recipe(self):
714
"""Return a recipe that can be used to replay this search.
716
The recipe allows reconstruction of the same results at a later date
717
without knowing all the found keys. The essential elements are a list
718
of keys to start and and to stop at. In order to give reproducible
719
results when ghosts are encountered by a search they are automatically
720
added to the exclude list (or else ghost filling may alter the
723
:return: A tuple (start_keys_set, exclude_keys_set, revision_count). To
724
recreate the results of this search, create a breadth first
725
searcher on the same graph starting at start_keys. Then call next()
726
(or next_with_ghosts()) repeatedly, and on every result, call
727
stop_searching_any on any keys from the exclude_keys set. The
728
revision_count value acts as a trivial cross-check - the found
729
revisions of the new search should have as many elements as
730
revision_count. If it does not, then additional revisions have been
731
ghosted since the search was executed the first time and the second
737
"""Return the keys found in this search.
739
:return: A set of keys.