1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Searching in versioned file repositories."""
19
from __future__ import absolute_import
39
class AbstractSearchResult(object):
40
"""The result of a search, describing a set of keys.
42
Search results are typically used as the 'fetch_spec' parameter when
45
:seealso: AbstractSearch
49
"""Return a recipe that can be used to replay this search.
51
The recipe allows reconstruction of the same results at a later date.
53
:return: A tuple of `(search_kind_str, *details)`. The details vary by
54
kind of search result.
56
raise NotImplementedError(self.get_recipe)
58
def get_network_struct(self):
59
"""Return a tuple that can be transmitted via the HPSS protocol."""
60
raise NotImplementedError(self.get_network_struct)
63
"""Return the keys found in this search.
65
:return: A set of keys.
67
raise NotImplementedError(self.get_keys)
70
"""Return false if the search lists 1 or more revisions."""
71
raise NotImplementedError(self.is_empty)
73
def refine(self, seen, referenced):
74
"""Create a new search by refining this search.
76
:param seen: Revisions that have been satisfied.
77
:param referenced: Revision references observed while satisfying some
79
:return: A search result.
81
raise NotImplementedError(self.refine)
84
class AbstractSearch(object):
85
"""A search that can be executed, producing a search result.
87
:seealso: AbstractSearchResult
91
"""Construct a network-ready search result from this search description.
93
This may take some time to search repositories, etc.
95
:return: A search result (an object that implements
96
AbstractSearchResult's API).
98
raise NotImplementedError(self.execute)
101
class SearchResult(AbstractSearchResult):
102
"""The result of a breadth first search.
104
A SearchResult provides the ability to reconstruct the search or access a
105
set of the keys the search found.
108
def __init__(self, start_keys, exclude_keys, key_count, keys):
109
"""Create a SearchResult.
111
:param start_keys: The keys the search started at.
112
:param exclude_keys: The keys the search excludes.
113
:param key_count: The total number of keys (from start to but not
115
:param keys: The keys the search found. Note that in future we may get
116
a SearchResult from a smart server, in which case the keys list is
117
not necessarily immediately available.
119
self._recipe = ('search', start_keys, exclude_keys, key_count)
120
self._keys = frozenset(keys)
123
kind, start_keys, exclude_keys, key_count = self._recipe
124
if len(start_keys) > 5:
125
start_keys_repr = repr(list(start_keys)[:5])[:-1] + ', ...]'
127
start_keys_repr = repr(start_keys)
128
if len(exclude_keys) > 5:
129
exclude_keys_repr = repr(list(exclude_keys)[:5])[:-1] + ', ...]'
131
exclude_keys_repr = repr(exclude_keys)
132
return '<%s %s:(%s, %s, %d)>' % (self.__class__.__name__,
133
kind, start_keys_repr, exclude_keys_repr, key_count)
135
def get_recipe(self):
136
"""Return a recipe that can be used to replay this search.
138
The recipe allows reconstruction of the same results at a later date
139
without knowing all the found keys. The essential elements are a list
140
of keys to start and to stop at. In order to give reproducible
141
results when ghosts are encountered by a search they are automatically
142
added to the exclude list (or else ghost filling may alter the
145
:return: A tuple ('search', start_keys_set, exclude_keys_set,
146
revision_count). To recreate the results of this search, create a
147
breadth first searcher on the same graph starting at start_keys.
148
Then call next() (or next_with_ghosts()) repeatedly, and on every
149
result, call stop_searching_any on any keys from the exclude_keys
150
set. The revision_count value acts as a trivial cross-check - the
151
found revisions of the new search should have as many elements as
152
revision_count. If it does not, then additional revisions have been
153
ghosted since the search was executed the first time and the second
158
def get_network_struct(self):
159
start_keys = ' '.join(self._recipe[1])
160
stop_keys = ' '.join(self._recipe[2])
161
count = str(self._recipe[3])
162
return (self._recipe[0], '\n'.join((start_keys, stop_keys, count)))
165
"""Return the keys found in this search.
167
:return: A set of keys.
172
"""Return false if the search lists 1 or more revisions."""
173
return self._recipe[3] == 0
175
def refine(self, seen, referenced):
176
"""Create a new search by refining this search.
178
:param seen: Revisions that have been satisfied.
179
:param referenced: Revision references observed while satisfying some
182
start = self._recipe[1]
183
exclude = self._recipe[2]
184
count = self._recipe[3]
185
keys = self.get_keys()
186
# New heads = referenced + old heads - seen things - exclude
187
pending_refs = set(referenced)
188
pending_refs.update(start)
189
pending_refs.difference_update(seen)
190
pending_refs.difference_update(exclude)
191
# New exclude = old exclude + satisfied heads
192
seen_heads = start.intersection(seen)
193
exclude.update(seen_heads)
194
# keys gets seen removed
196
# length is reduced by len(seen)
198
return SearchResult(pending_refs, exclude, count, keys)
201
class PendingAncestryResult(AbstractSearchResult):
202
"""A search result that will reconstruct the ancestry for some graph heads.
204
Unlike SearchResult, this doesn't hold the complete search result in
205
memory, it just holds a description of how to generate it.
208
def __init__(self, heads, repo):
211
:param heads: an iterable of graph heads.
212
:param repo: a repository to use to generate the ancestry for the given
215
self.heads = frozenset(heads)
219
if len(self.heads) > 5:
220
heads_repr = repr(list(self.heads)[:5])[:-1]
221
heads_repr += ', <%d more>...]' % (len(self.heads) - 5,)
223
heads_repr = repr(self.heads)
224
return '<%s heads:%s repo:%r>' % (
225
self.__class__.__name__, heads_repr, self.repo)
227
def get_recipe(self):
228
"""Return a recipe that can be used to replay this search.
230
The recipe allows reconstruction of the same results at a later date.
232
:seealso SearchResult.get_recipe:
234
:return: A tuple ('proxy-search', start_keys_set, set(), -1)
235
To recreate this result, create a PendingAncestryResult with the
238
return ('proxy-search', self.heads, set(), -1)
240
def get_network_struct(self):
241
parts = ['ancestry-of']
242
parts.extend(self.heads)
246
"""See SearchResult.get_keys.
248
Returns all the keys for the ancestry of the heads, excluding
251
return self._get_keys(self.repo.get_graph())
253
def _get_keys(self, graph):
254
NULL_REVISION = revision.NULL_REVISION
255
keys = [key for (key, parents) in graph.iter_ancestry(self.heads)
256
if key != NULL_REVISION and parents is not None]
260
"""Return false if the search lists 1 or more revisions."""
261
if revision.NULL_REVISION in self.heads:
262
return len(self.heads) == 1
264
return len(self.heads) == 0
266
def refine(self, seen, referenced):
267
"""Create a new search by refining this search.
269
:param seen: Revisions that have been satisfied.
270
:param referenced: Revision references observed while satisfying some
273
referenced = self.heads.union(referenced)
274
return PendingAncestryResult(referenced - seen, self.repo)
277
class EmptySearchResult(AbstractSearchResult):
278
"""An empty search result."""
284
class EverythingResult(AbstractSearchResult):
285
"""A search result that simply requests everything in the repository."""
287
def __init__(self, repo):
291
return '%s(%r)' % (self.__class__.__name__, self._repo)
293
def get_recipe(self):
294
raise NotImplementedError(self.get_recipe)
296
def get_network_struct(self):
297
return ('everything',)
300
if 'evil' in debug.debug_flags:
302
if isinstance(self._repo, remote.RemoteRepository):
303
# warn developers (not users) not to do this
304
trace.mutter_callsite(
305
2, "EverythingResult(RemoteRepository).get_keys() is slow.")
306
return self._repo.all_revision_ids()
309
# It's ok for this to wrongly return False: the worst that can happen
310
# is that RemoteStreamSource will initiate a get_stream on an empty
311
# repository. And almost all repositories are non-empty.
314
def refine(self, seen, referenced):
315
heads = set(self._repo.all_revision_ids())
316
heads.difference_update(seen)
317
heads.update(referenced)
318
return PendingAncestryResult(heads, self._repo)
321
class EverythingNotInOther(AbstractSearch):
322
"""Find all revisions in that are in one repo but not the other."""
324
def __init__(self, to_repo, from_repo, find_ghosts=False):
325
self.to_repo = to_repo
326
self.from_repo = from_repo
327
self.find_ghosts = find_ghosts
330
return self.to_repo.search_missing_revision_ids(
331
self.from_repo, find_ghosts=self.find_ghosts)
334
class NotInOtherForRevs(AbstractSearch):
335
"""Find all revisions missing in one repo for a some specific heads."""
337
def __init__(self, to_repo, from_repo, required_ids, if_present_ids=None,
338
find_ghosts=False, limit=None):
341
:param required_ids: revision IDs of heads that must be found, or else
342
the search will fail with NoSuchRevision. All revisions in their
343
ancestry not already in the other repository will be included in
345
:param if_present_ids: revision IDs of heads that may be absent in the
346
source repository. If present, then their ancestry not already
347
found in other will be included in the search result.
348
:param limit: maximum number of revisions to fetch
350
self.to_repo = to_repo
351
self.from_repo = from_repo
352
self.find_ghosts = find_ghosts
353
self.required_ids = required_ids
354
self.if_present_ids = if_present_ids
358
if len(self.required_ids) > 5:
359
reqd_revs_repr = repr(list(self.required_ids)[:5])[:-1] + ', ...]'
361
reqd_revs_repr = repr(self.required_ids)
362
if self.if_present_ids and len(self.if_present_ids) > 5:
363
ifp_revs_repr = repr(list(self.if_present_ids)[:5])[:-1] + ', ...]'
365
ifp_revs_repr = repr(self.if_present_ids)
367
return ("<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r"
369
self.__class__.__name__, self.from_repo, self.to_repo,
370
self.find_ghosts, reqd_revs_repr, ifp_revs_repr,
374
return self.to_repo.search_missing_revision_ids(
375
self.from_repo, revision_ids=self.required_ids,
376
if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts,
380
def search_result_from_parent_map(parent_map, missing_keys):
381
"""Transform a parent_map into SearchResult information."""
383
# parent_map is empty or None, simple search result
385
# start_set is all the keys in the cache
386
start_set = set(parent_map)
387
# result set is all the references to keys in the cache
388
result_parents = set(itertools.chain.from_iterable(viewvalues(parent_map)))
389
stop_keys = result_parents.difference(start_set)
390
# We don't need to send ghosts back to the server as a position to
392
stop_keys.difference_update(missing_keys)
393
key_count = len(parent_map)
394
if (revision.NULL_REVISION in result_parents
395
and revision.NULL_REVISION in missing_keys):
396
# If we pruned NULL_REVISION from the stop_keys because it's also
397
# in our cache of "missing" keys we need to increment our key count
398
# by 1, because the reconsitituted SearchResult on the server will
399
# still consider NULL_REVISION to be an included key.
401
included_keys = start_set.intersection(result_parents)
402
start_set.difference_update(included_keys)
403
return start_set, stop_keys, key_count
406
def _run_search(parent_map, heads, exclude_keys):
407
"""Given a parent map, run a _BreadthFirstSearcher on it.
409
Start at heads, walk until you hit exclude_keys. As a further improvement,
410
watch for any heads that you encounter while walking, which means they were
411
not heads of the search.
413
This is mostly used to generate a succinct recipe for how to walk through
416
:return: (_BreadthFirstSearcher, set(heads_encountered_by_walking))
418
g = Graph(DictParentsProvider(parent_map))
419
s = g._make_breadth_first_searcher(heads)
424
except StopIteration:
426
for parents in viewvalues(s._current_parents):
427
f_heads = heads.intersection(parents)
429
found_heads.update(f_heads)
430
stop_keys = exclude_keys.intersection(next_revs)
432
s.stop_searching_any(stop_keys)
433
for parents in viewvalues(s._current_parents):
434
f_heads = heads.intersection(parents)
436
found_heads.update(f_heads)
437
return s, found_heads
440
def _find_possible_heads(parent_map, tip_keys, depth):
441
"""Walk backwards (towards children) through the parent_map.
443
This finds 'heads' that will hopefully succinctly describe our search
446
child_map = invert_parent_map(parent_map)
448
current_roots = tip_keys
449
walked = set(current_roots)
450
while current_roots and depth > 0:
453
children_update = children.update
454
for p in current_roots:
455
# Is it better to pre- or post- filter the children?
457
children_update(child_map[p])
460
# If we've seen a key before, we don't want to walk it again. Note that
461
# 'children' stays relatively small while 'walked' grows large. So
462
# don't use 'difference_update' here which has to walk all of 'walked'.
463
# '.difference' is smart enough to walk only children and compare it to
465
children = children.difference(walked)
466
walked.update(children)
467
current_roots = children
469
# We walked to the end of depth, so these are the new tips.
470
heads.update(current_roots)
474
def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,
476
"""Transform a parent_map that is searching 'tip_keys' into an
477
approximate SearchResult.
479
We should be able to generate a SearchResult from a given set of starting
480
keys, that covers a subset of parent_map that has the last step pointing at
481
tip_keys. This is to handle the case that really-long-searches shouldn't be
482
started from scratch on each get_parent_map request, but we *do* want to
483
filter out some of the keys that we've already seen, so we don't get
484
information that we already know about on every request.
486
The server will validate the search (that starting at start_keys and
487
stopping at stop_keys yields the exact key_count), so we have to be careful
488
to give an exact recipe.
491
1) Invert parent_map to get child_map (todo: have it cached and pass it
493
2) Starting at tip_keys, walk towards children for 'depth' steps.
494
3) At that point, we have the 'start' keys.
495
4) Start walking parent_map from 'start' keys, counting how many keys
496
are seen, and generating stop_keys for anything that would walk
497
outside of the parent_map.
499
:param parent_map: A map from {child_id: (parent_ids,)}
500
:param missing_keys: parent_ids that we know are unavailable
501
:param tip_keys: the revision_ids that we are searching
502
:param depth: How far back to walk.
505
# No search to send, because we haven't done any searching yet.
507
heads = _find_possible_heads(parent_map, tip_keys, depth)
508
s, found_heads = _run_search(parent_map, heads, set(tip_keys))
509
start_keys, exclude_keys, keys = s.get_state()
511
# Anything in found_heads are redundant start_keys, we hit them while
512
# walking, so we can exclude them from the start list.
513
start_keys = set(start_keys).difference(found_heads)
514
return start_keys, exclude_keys, len(keys)