1463
1551
return revs, ghosts
1466
class SearchResult(object):
1554
class AbstractSearchResult(object):
1555
"""The result of a search, describing a set of keys.
1557
Search results are typically used as the 'fetch_spec' parameter when
1560
:seealso: AbstractSearch
1563
def get_recipe(self):
1564
"""Return a recipe that can be used to replay this search.
1566
The recipe allows reconstruction of the same results at a later date.
1568
:return: A tuple of `(search_kind_str, *details)`. The details vary by
1569
kind of search result.
1571
raise NotImplementedError(self.get_recipe)
1573
def get_network_struct(self):
1574
"""Return a tuple that can be transmitted via the HPSS protocol."""
1575
raise NotImplementedError(self.get_network_struct)
1578
"""Return the keys found in this search.
1580
:return: A set of keys.
1582
raise NotImplementedError(self.get_keys)
1585
"""Return false if the search lists 1 or more revisions."""
1586
raise NotImplementedError(self.is_empty)
1588
def refine(self, seen, referenced):
1589
"""Create a new search by refining this search.
1591
:param seen: Revisions that have been satisfied.
1592
:param referenced: Revision references observed while satisfying some
1594
:return: A search result.
1596
raise NotImplementedError(self.refine)
1599
class AbstractSearch(object):
1600
"""A search that can be executed, producing a search result.
1602
:seealso: AbstractSearchResult
1606
"""Construct a network-ready search result from this search description.
1608
This may take some time to search repositories, etc.
1610
:return: A search result (an object that implements
1611
AbstractSearchResult's API).
1613
raise NotImplementedError(self.execute)
1616
class SearchResult(AbstractSearchResult):
1467
1617
"""The result of a breadth first search.
1469
1619
A SearchResult provides the ability to reconstruct the search or access a
1606
1789
return PendingAncestryResult(referenced - seen, self.repo)
1792
class EmptySearchResult(AbstractSearchResult):
1793
"""An empty search result."""
1799
class EverythingResult(AbstractSearchResult):
1800
"""A search result that simply requests everything in the repository."""
1802
def __init__(self, repo):
1806
return '%s(%r)' % (self.__class__.__name__, self._repo)
1808
def get_recipe(self):
1809
raise NotImplementedError(self.get_recipe)
1811
def get_network_struct(self):
1812
return ('everything',)
1815
if 'evil' in debug.debug_flags:
1816
from bzrlib import remote
1817
if isinstance(self._repo, remote.RemoteRepository):
1818
# warn developers (not users) not to do this
1819
trace.mutter_callsite(
1820
2, "EverythingResult(RemoteRepository).get_keys() is slow.")
1821
return self._repo.all_revision_ids()
1824
# It's ok for this to wrongly return False: the worst that can happen
1825
# is that RemoteStreamSource will initiate a get_stream on an empty
1826
# repository. And almost all repositories are non-empty.
1829
def refine(self, seen, referenced):
1830
heads = set(self._repo.all_revision_ids())
1831
heads.difference_update(seen)
1832
heads.update(referenced)
1833
return PendingAncestryResult(heads, self._repo)
1836
class EverythingNotInOther(AbstractSearch):
1837
"""Find all revisions in that are in one repo but not the other."""
1839
def __init__(self, to_repo, from_repo, find_ghosts=False):
1840
self.to_repo = to_repo
1841
self.from_repo = from_repo
1842
self.find_ghosts = find_ghosts
1845
return self.to_repo.search_missing_revision_ids(
1846
self.from_repo, find_ghosts=self.find_ghosts)
1849
class NotInOtherForRevs(AbstractSearch):
1850
"""Find all revisions missing in one repo for a some specific heads."""
1852
def __init__(self, to_repo, from_repo, required_ids, if_present_ids=None,
1853
find_ghosts=False, limit=None):
1856
:param required_ids: revision IDs of heads that must be found, or else
1857
the search will fail with NoSuchRevision. All revisions in their
1858
ancestry not already in the other repository will be included in
1860
:param if_present_ids: revision IDs of heads that may be absent in the
1861
source repository. If present, then their ancestry not already
1862
found in other will be included in the search result.
1863
:param limit: maximum number of revisions to fetch
1865
self.to_repo = to_repo
1866
self.from_repo = from_repo
1867
self.find_ghosts = find_ghosts
1868
self.required_ids = required_ids
1869
self.if_present_ids = if_present_ids
1873
if len(self.required_ids) > 5:
1874
reqd_revs_repr = repr(list(self.required_ids)[:5])[:-1] + ', ...]'
1876
reqd_revs_repr = repr(self.required_ids)
1877
if self.if_present_ids and len(self.if_present_ids) > 5:
1878
ifp_revs_repr = repr(list(self.if_present_ids)[:5])[:-1] + ', ...]'
1880
ifp_revs_repr = repr(self.if_present_ids)
1882
return ("<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r"
1884
self.__class__.__name__, self.from_repo, self.to_repo,
1885
self.find_ghosts, reqd_revs_repr, ifp_revs_repr,
1889
return self.to_repo.search_missing_revision_ids(
1890
self.from_repo, revision_ids=self.required_ids,
1891
if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts,
1895
def invert_parent_map(parent_map):
1896
"""Given a map from child => parents, create a map of parent=>children"""
1898
for child, parents in parent_map.iteritems():
1900
# Any given parent is likely to have only a small handful
1901
# of children, many will have only one. So we avoid mem overhead of
1902
# a list, in exchange for extra copying of tuples
1903
if p not in child_map:
1904
child_map[p] = (child,)
1906
child_map[p] = child_map[p] + (child,)
1910
def _find_possible_heads(parent_map, tip_keys, depth):
1911
"""Walk backwards (towards children) through the parent_map.
1913
This finds 'heads' that will hopefully succinctly describe our search
1916
child_map = invert_parent_map(parent_map)
1918
current_roots = tip_keys
1919
walked = set(current_roots)
1920
while current_roots and depth > 0:
1923
children_update = children.update
1924
for p in current_roots:
1925
# Is it better to pre- or post- filter the children?
1927
children_update(child_map[p])
1930
# If we've seen a key before, we don't want to walk it again. Note that
1931
# 'children' stays relatively small while 'walked' grows large. So
1932
# don't use 'difference_update' here which has to walk all of 'walked'.
1933
# '.difference' is smart enough to walk only children and compare it to
1935
children = children.difference(walked)
1936
walked.update(children)
1937
current_roots = children
1939
# We walked to the end of depth, so these are the new tips.
1940
heads.update(current_roots)
1944
def _run_search(parent_map, heads, exclude_keys):
1945
"""Given a parent map, run a _BreadthFirstSearcher on it.
1947
Start at heads, walk until you hit exclude_keys. As a further improvement,
1948
watch for any heads that you encounter while walking, which means they were
1949
not heads of the search.
1951
This is mostly used to generate a succinct recipe for how to walk through
1954
:return: (_BreadthFirstSearcher, set(heads_encountered_by_walking))
1956
g = Graph(DictParentsProvider(parent_map))
1957
s = g._make_breadth_first_searcher(heads)
1961
next_revs = s.next()
1962
except StopIteration:
1964
for parents in s._current_parents.itervalues():
1965
f_heads = heads.intersection(parents)
1967
found_heads.update(f_heads)
1968
stop_keys = exclude_keys.intersection(next_revs)
1970
s.stop_searching_any(stop_keys)
1971
for parents in s._current_parents.itervalues():
1972
f_heads = heads.intersection(parents)
1974
found_heads.update(f_heads)
1975
return s, found_heads
1978
def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,
1980
"""Transform a parent_map that is searching 'tip_keys' into an
1981
approximate SearchResult.
1983
We should be able to generate a SearchResult from a given set of starting
1984
keys, that covers a subset of parent_map that has the last step pointing at
1985
tip_keys. This is to handle the case that really-long-searches shouldn't be
1986
started from scratch on each get_parent_map request, but we *do* want to
1987
filter out some of the keys that we've already seen, so we don't get
1988
information that we already know about on every request.
1990
The server will validate the search (that starting at start_keys and
1991
stopping at stop_keys yields the exact key_count), so we have to be careful
1992
to give an exact recipe.
1995
1) Invert parent_map to get child_map (todo: have it cached and pass it
1997
2) Starting at tip_keys, walk towards children for 'depth' steps.
1998
3) At that point, we have the 'start' keys.
1999
4) Start walking parent_map from 'start' keys, counting how many keys
2000
are seen, and generating stop_keys for anything that would walk
2001
outside of the parent_map.
2003
:param parent_map: A map from {child_id: (parent_ids,)}
2004
:param missing_keys: parent_ids that we know are unavailable
2005
:param tip_keys: the revision_ids that we are searching
2006
:param depth: How far back to walk.
2009
# No search to send, because we haven't done any searching yet.
2011
heads = _find_possible_heads(parent_map, tip_keys, depth)
2012
s, found_heads = _run_search(parent_map, heads, set(tip_keys))
2013
_, start_keys, exclude_keys, key_count = s.get_result().get_recipe()
2015
# Anything in found_heads are redundant start_keys, we hit them while
2016
# walking, so we can exclude them from the start list.
2017
start_keys = set(start_keys).difference(found_heads)
2018
return start_keys, exclude_keys, key_count
2021
def search_result_from_parent_map(parent_map, missing_keys):
2022
"""Transform a parent_map into SearchResult information."""
2024
# parent_map is empty or None, simple search result
2026
# start_set is all the keys in the cache
2027
start_set = set(parent_map)
2028
# result set is all the references to keys in the cache
2029
result_parents = set()
2030
for parents in parent_map.itervalues():
2031
result_parents.update(parents)
2032
stop_keys = result_parents.difference(start_set)
2033
# We don't need to send ghosts back to the server as a position to
2035
stop_keys.difference_update(missing_keys)
2036
key_count = len(parent_map)
2037
if (revision.NULL_REVISION in result_parents
2038
and revision.NULL_REVISION in missing_keys):
2039
# If we pruned NULL_REVISION from the stop_keys because it's also
2040
# in our cache of "missing" keys we need to increment our key count
2041
# by 1, because the reconsitituted SearchResult on the server will
2042
# still consider NULL_REVISION to be an included key.
2044
included_keys = start_set.intersection(result_parents)
2045
start_set.difference_update(included_keys)
2046
return start_set, stop_keys, key_count
1609
2049
def collapse_linear_regions(parent_map):
1610
2050
"""Collapse regions of the graph that are 'linear'.