/brz/remove-bazaar : contents of breezy/vf_search.py at revision 6677.1.1

: (revision 6677.1.1)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	1	# Copyright (C) 2007-2011 Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	16
6379.6.7 by Jelmer Vernooij Move importing from future until after doc string, otherwise the doc string will disappear.	17	"""Searching in versioned file repositories."""
	18
6379.6.3 by Jelmer Vernooij Use absolute_import.	19	from __future__ import absolute_import
6379.6.3 by Jelmer Vernooij Use absolute_import.	20
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	21	import itertools
	22
6624 by Jelmer Vernooĳ Merge Python3 porting work ('py3 pokes')	23	from . import (
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	24	debug,
	25	revision,
	26	trace,
	27	)
	28
6624 by Jelmer Vernooĳ Merge Python3 porting work ('py3 pokes')	29	from .graph import (
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	30	DictParentsProvider,
	31	Graph,
	32	invert_parent_map,
	33	)
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	34	from .sixish import (
	35	viewvalues,
	36	)
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	37
	38
	39	class AbstractSearchResult(object):
	40	"""The result of a search, describing a set of keys.
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	41
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	42	Search results are typically used as the 'fetch_spec' parameter when
	43	fetching revisions.
	44
	45	:seealso: AbstractSearch
	46	"""
	47
	48	def get_recipe(self):
	49	"""Return a recipe that can be used to replay this search.
	50
	51	The recipe allows reconstruction of the same results at a later date.
	52
	53	:return: A tuple of `(search_kind_str, *details)`. The details vary by
	54	kind of search result.
	55	"""
	56	raise NotImplementedError(self.get_recipe)
	57
	58	def get_network_struct(self):
	59	"""Return a tuple that can be transmitted via the HPSS protocol."""
	60	raise NotImplementedError(self.get_network_struct)
	61
	62	def get_keys(self):
	63	"""Return the keys found in this search.
	64
	65	:return: A set of keys.
	66	"""
	67	raise NotImplementedError(self.get_keys)
	68
	69	def is_empty(self):
	70	"""Return false if the search lists 1 or more revisions."""
	71	raise NotImplementedError(self.is_empty)
	72
	73	def refine(self, seen, referenced):
	74	"""Create a new search by refining this search.
	75
	76	:param seen: Revisions that have been satisfied.
	77	:param referenced: Revision references observed while satisfying some
	78	of this search.
	79	:return: A search result.
	80	"""
	81	raise NotImplementedError(self.refine)
	82
	83
	84	class AbstractSearch(object):
	85	"""A search that can be executed, producing a search result.
	86
	87	:seealso: AbstractSearchResult
	88	"""
	89
	90	def execute(self):
	91	"""Construct a network-ready search result from this search description.
	92
	93	This may take some time to search repositories, etc.
	94
	95	:return: A search result (an object that implements
	96	AbstractSearchResult's API).
	97	"""
	98	raise NotImplementedError(self.execute)
	99
	100
	101	class SearchResult(AbstractSearchResult):
	102	"""The result of a breadth first search.
	103
	104	A SearchResult provides the ability to reconstruct the search or access a
	105	set of the keys the search found.
106	"""
107
108	def __init__(self, start_keys, exclude_keys, key_count, keys):
109	"""Create a SearchResult.
110
111	:param start_keys: The keys the search started at.
112	:param exclude_keys: The keys the search excludes.
113	:param key_count: The total number of keys (from start to but not
114	including exclude).
115	:param keys: The keys the search found. Note that in future we may get
116	a SearchResult from a smart server, in which case the keys list is
117	not necessarily immediately available.
118	"""
119	self._recipe = ('search', start_keys, exclude_keys, key_count)
120	self._keys = frozenset(keys)
121
122	def __repr__(self):
123	kind, start_keys, exclude_keys, key_count = self._recipe
124	if len(start_keys) > 5:
125	start_keys_repr = repr(list(start_keys)[:5])[:-1] + ', ...]'
126	else:
127	start_keys_repr = repr(start_keys)
128	if len(exclude_keys) > 5:
129	exclude_keys_repr = repr(list(exclude_keys)[:5])[:-1] + ', ...]'
130	else:
131	exclude_keys_repr = repr(exclude_keys)
132	return '<%s %s:(%s, %s, %d)>' % (self.__class__.__name__,
133	kind, start_keys_repr, exclude_keys_repr, key_count)
134
135	def get_recipe(self):
136	"""Return a recipe that can be used to replay this search.
137
138	The recipe allows reconstruction of the same results at a later date
139	without knowing all the found keys. The essential elements are a list
140	of keys to start and to stop at. In order to give reproducible
141	results when ghosts are encountered by a search they are automatically
142	added to the exclude list (or else ghost filling may alter the
143	results).
144
145	:return: A tuple ('search', start_keys_set, exclude_keys_set,
146	revision_count). To recreate the results of this search, create a
147	breadth first searcher on the same graph starting at start_keys.
148	Then call next() (or next_with_ghosts()) repeatedly, and on every
149	result, call stop_searching_any on any keys from the exclude_keys
150	set. The revision_count value acts as a trivial cross-check - the
151	found revisions of the new search should have as many elements as
152	revision_count. If it does not, then additional revisions have been
153	ghosted since the search was executed the first time and the second
154	time.
155	"""
156	return self._recipe
157
158	def get_network_struct(self):
159	start_keys = ' '.join(self._recipe[1])
160	stop_keys = ' '.join(self._recipe[2])
161	count = str(self._recipe[3])
162	return (self._recipe[0], '\n'.join((start_keys, stop_keys, count)))
163
164	def get_keys(self):
165	"""Return the keys found in this search.
166
167	:return: A set of keys.
168	"""
169	return self._keys
170
171	def is_empty(self):
172	"""Return false if the search lists 1 or more revisions."""
173	return self._recipe[3] == 0
174
175	def refine(self, seen, referenced):
176	"""Create a new search by refining this search.
177
178	:param seen: Revisions that have been satisfied.
179	:param referenced: Revision references observed while satisfying some
180	of this search.
181	"""
182	start = self._recipe[1]
183	exclude = self._recipe[2]
184	count = self._recipe[3]
185	keys = self.get_keys()
186	# New heads = referenced + old heads - seen things - exclude
187	pending_refs = set(referenced)
188	pending_refs.update(start)
189	pending_refs.difference_update(seen)
190	pending_refs.difference_update(exclude)
191	# New exclude = old exclude + satisfied heads
192	seen_heads = start.intersection(seen)
193	exclude.update(seen_heads)
194	# keys gets seen removed
195	keys = keys - seen
196	# length is reduced by len(seen)
197	count -= len(seen)
198	return SearchResult(pending_refs, exclude, count, keys)
199
200
201	class PendingAncestryResult(AbstractSearchResult):
202	"""A search result that will reconstruct the ancestry for some graph heads.
203
204	Unlike SearchResult, this doesn't hold the complete search result in
205	memory, it just holds a description of how to generate it.
206	"""
207
208	def __init__(self, heads, repo):
209	"""Constructor.
210
211	:param heads: an iterable of graph heads.
212	:param repo: a repository to use to generate the ancestry for the given
213	heads.
214	"""
215	self.heads = frozenset(heads)
216	self.repo = repo
217
218	def __repr__(self):
219	if len(self.heads) > 5:
220	heads_repr = repr(list(self.heads)[:5])[:-1]
221	heads_repr += ', <%d more>...]' % (len(self.heads) - 5,)
222	else:
223	heads_repr = repr(self.heads)
224	return '<%s heads:%s repo:%r>' % (
225	self.__class__.__name__, heads_repr, self.repo)
226
227	def get_recipe(self):
228	"""Return a recipe that can be used to replay this search.
229
230	The recipe allows reconstruction of the same results at a later date.
231
232	:seealso SearchResult.get_recipe:
233
234	:return: A tuple ('proxy-search', start_keys_set, set(), -1)
235	To recreate this result, create a PendingAncestryResult with the
236	start_keys_set.
237	"""
238	return ('proxy-search', self.heads, set(), -1)
239
240	def get_network_struct(self):
241	parts = ['ancestry-of']
242	parts.extend(self.heads)
243	return parts
244
245	def get_keys(self):
246	"""See SearchResult.get_keys.
247
248	Returns all the keys for the ancestry of the heads, excluding
249	NULL_REVISION.
250	"""
251	return self._get_keys(self.repo.get_graph())
252
253	def _get_keys(self, graph):
254	NULL_REVISION = revision.NULL_REVISION
255	keys = [key for (key, parents) in graph.iter_ancestry(self.heads)
256	if key != NULL_REVISION and parents is not None]
257	return keys
258
259	def is_empty(self):
260	"""Return false if the search lists 1 or more revisions."""
261	if revision.NULL_REVISION in self.heads:
262	return len(self.heads) == 1
263	else:
264	return len(self.heads) == 0
265
266	def refine(self, seen, referenced):
267	"""Create a new search by refining this search.
268
269	:param seen: Revisions that have been satisfied.
270	:param referenced: Revision references observed while satisfying some
271	of this search.
272	"""
273	referenced = self.heads.union(referenced)
274	return PendingAncestryResult(referenced - seen, self.repo)
275
276
277	class EmptySearchResult(AbstractSearchResult):
278	"""An empty search result."""
279
280	def is_empty(self):
281	return True
282
283
284	class EverythingResult(AbstractSearchResult):
285	"""A search result that simply requests everything in the repository."""
286
287	def __init__(self, repo):
288	self._repo = repo
289
290	def __repr__(self):
291	return '%s(%r)' % (self.__class__.__name__, self._repo)
292
293	def get_recipe(self):
294	raise NotImplementedError(self.get_recipe)
295
296	def get_network_struct(self):
297	return ('everything',)
298
299	def get_keys(self):
300	if 'evil' in debug.debug_flags:
6624 by Jelmer Vernooĳ Merge Python3 porting work ('py3 pokes')	301	from . import remote
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	302	if isinstance(self._repo, remote.RemoteRepository):
	303	# warn developers (not users) not to do this
	304	trace.mutter_callsite(
	305	2, "EverythingResult(RemoteRepository).get_keys() is slow.")
	306	return self._repo.all_revision_ids()
	307
	308	def is_empty(self):
	309	# It's ok for this to wrongly return False: the worst that can happen
	310	# is that RemoteStreamSource will initiate a get_stream on an empty
	311	# repository. And almost all repositories are non-empty.
	312	return False
	313
	314	def refine(self, seen, referenced):
	315	heads = set(self._repo.all_revision_ids())
	316	heads.difference_update(seen)
	317	heads.update(referenced)
	318	return PendingAncestryResult(heads, self._repo)
	319
	320
	321	class EverythingNotInOther(AbstractSearch):
	322	"""Find all revisions in that are in one repo but not the other."""
	323
	324	def __init__(self, to_repo, from_repo, find_ghosts=False):
	325	self.to_repo = to_repo
	326	self.from_repo = from_repo
	327	self.find_ghosts = find_ghosts
	328
	329	def execute(self):
	330	return self.to_repo.search_missing_revision_ids(
	331	self.from_repo, find_ghosts=self.find_ghosts)
	332
	333
	334	class NotInOtherForRevs(AbstractSearch):
	335	"""Find all revisions missing in one repo for a some specific heads."""
	336
	337	def __init__(self, to_repo, from_repo, required_ids, if_present_ids=None,
	338	find_ghosts=False, limit=None):
	339	"""Constructor.
	340
	341	:param required_ids: revision IDs of heads that must be found, or else
	342	the search will fail with NoSuchRevision. All revisions in their
	343	ancestry not already in the other repository will be included in
	344	the search result.
	345	:param if_present_ids: revision IDs of heads that may be absent in the
	346	source repository. If present, then their ancestry not already
	347	found in other will be included in the search result.
	348	:param limit: maximum number of revisions to fetch
	349	"""
	350	self.to_repo = to_repo
	351	self.from_repo = from_repo
	352	self.find_ghosts = find_ghosts
	353	self.required_ids = required_ids
	354	self.if_present_ids = if_present_ids
	355	self.limit = limit
	356
	357	def __repr__(self):
	358	if len(self.required_ids) > 5:
	359	reqd_revs_repr = repr(list(self.required_ids)[:5])[:-1] + ', ...]'
	360	else:
	361	reqd_revs_repr = repr(self.required_ids)
	362	if self.if_present_ids and len(self.if_present_ids) > 5:
	363	ifp_revs_repr = repr(list(self.if_present_ids)[:5])[:-1] + ', ...]'
	364	else:
	365	ifp_revs_repr = repr(self.if_present_ids)
366
367	return ("<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r"
368	"limit:%r>") % (
369	self.__class__.__name__, self.from_repo, self.to_repo,
370	self.find_ghosts, reqd_revs_repr, ifp_revs_repr,
371	self.limit)
372
373	def execute(self):
374	return self.to_repo.search_missing_revision_ids(
375	self.from_repo, revision_ids=self.required_ids,
376	if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts,
377	limit=self.limit)
378
379
380	def search_result_from_parent_map(parent_map, missing_keys):
381	"""Transform a parent_map into SearchResult information."""
382	if not parent_map:
383	# parent_map is empty or None, simple search result
384	return [], [], 0
385	# start_set is all the keys in the cache
386	start_set = set(parent_map)
387	# result set is all the references to keys in the cache
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	388	result_parents = set(itertools.chain.from_iterable(viewvalues(parent_map)))
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	389	stop_keys = result_parents.difference(start_set)
	390	# We don't need to send ghosts back to the server as a position to
	391	# stop either.
	392	stop_keys.difference_update(missing_keys)
	393	key_count = len(parent_map)
	394	if (revision.NULL_REVISION in result_parents
	395	and revision.NULL_REVISION in missing_keys):
	396	# If we pruned NULL_REVISION from the stop_keys because it's also
	397	# in our cache of "missing" keys we need to increment our key count
	398	# by 1, because the reconsitituted SearchResult on the server will
	399	# still consider NULL_REVISION to be an included key.
	400	key_count += 1
	401	included_keys = start_set.intersection(result_parents)
	402	start_set.difference_update(included_keys)
	403	return start_set, stop_keys, key_count
	404
	405
	406	def _run_search(parent_map, heads, exclude_keys):
	407	"""Given a parent map, run a _BreadthFirstSearcher on it.
	408
	409	Start at heads, walk until you hit exclude_keys. As a further improvement,
	410	watch for any heads that you encounter while walking, which means they were
	411	not heads of the search.
	412
	413	This is mostly used to generate a succinct recipe for how to walk through
	414	most of parent_map.
	415
	416	:return: (_BreadthFirstSearcher, set(heads_encountered_by_walking))
	417	"""
	418	g = Graph(DictParentsProvider(parent_map))
	419	s = g._make_breadth_first_searcher(heads)
	420	found_heads = set()
	421	while True:
	422	try:
6634.2.1 by Martin Apply 2to3 next fixer and make compatible	423	next_revs = next(s)
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	424	except StopIteration:
	425	break
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	426	for parents in viewvalues(s._current_parents):
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	427	f_heads = heads.intersection(parents)
	428	if f_heads:
	429	found_heads.update(f_heads)
	430	stop_keys = exclude_keys.intersection(next_revs)
	431	if stop_keys:
	432	s.stop_searching_any(stop_keys)
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	433	for parents in viewvalues(s._current_parents):
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	434	f_heads = heads.intersection(parents)
	435	if f_heads:
	436	found_heads.update(f_heads)
	437	return s, found_heads
	438
	439
	440	def _find_possible_heads(parent_map, tip_keys, depth):
	441	"""Walk backwards (towards children) through the parent_map.
	442
	443	This finds 'heads' that will hopefully succinctly describe our search
	444	graph.
	445	"""
	446	child_map = invert_parent_map(parent_map)
	447	heads = set()
	448	current_roots = tip_keys
	449	walked = set(current_roots)
	450	while current_roots and depth > 0:
	451	depth -= 1
	452	children = set()
	453	children_update = children.update
	454	for p in current_roots:
	455	# Is it better to pre- or post- filter the children?
	456	try:
	457	children_update(child_map[p])
	458	except KeyError:
	459	heads.add(p)
	460	# If we've seen a key before, we don't want to walk it again. Note that
	461	# 'children' stays relatively small while 'walked' grows large. So
	462	# don't use 'difference_update' here which has to walk all of 'walked'.
	463	# '.difference' is smart enough to walk only children and compare it to
	464	# walked.
	465	children = children.difference(walked)
	466	walked.update(children)
	467	current_roots = children
	468	if current_roots:
	469	# We walked to the end of depth, so these are the new tips.
	470	heads.update(current_roots)
	471	return heads
	472
	473
	474	def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,
	475	depth):
	476	"""Transform a parent_map that is searching 'tip_keys' into an
	477	approximate SearchResult.
	478
	479	We should be able to generate a SearchResult from a given set of starting
	480	keys, that covers a subset of parent_map that has the last step pointing at
	481	tip_keys. This is to handle the case that really-long-searches shouldn't be
	482	started from scratch on each get_parent_map request, but we do want to
	483	filter out some of the keys that we've already seen, so we don't get
	484	information that we already know about on every request.
	485
	486	The server will validate the search (that starting at start_keys and
	487	stopping at stop_keys yields the exact key_count), so we have to be careful
	488	to give an exact recipe.
	489
	490	Basic algorithm is:
	491	1) Invert parent_map to get child_map (todo: have it cached and pass it
	492	in)
	493	2) Starting at tip_keys, walk towards children for 'depth' steps.
	494	3) At that point, we have the 'start' keys.
	495	4) Start walking parent_map from 'start' keys, counting how many keys
	496	are seen, and generating stop_keys for anything that would walk
	497	outside of the parent_map.
498
499	:param parent_map: A map from {child_id: (parent_ids,)}
500	:param missing_keys: parent_ids that we know are unavailable
501	:param tip_keys: the revision_ids that we are searching
502	:param depth: How far back to walk.
503	"""
504	if not parent_map:
505	# No search to send, because we haven't done any searching yet.
506	return [], [], 0
507	heads = _find_possible_heads(parent_map, tip_keys, depth)
508	s, found_heads = _run_search(parent_map, heads, set(tip_keys))
6341.1.5 by Jelmer Vernooij Fix get_state().	509	start_keys, exclude_keys, keys = s.get_state()
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	510	if found_heads:
	511	# Anything in found_heads are redundant start_keys, we hit them while
	512	# walking, so we can exclude them from the start list.
	513	start_keys = set(start_keys).difference(found_heads)
6341.1.5 by Jelmer Vernooij Fix get_state().	514	return start_keys, exclude_keys, len(keys)