58
54
def __init__(self, to_repository, from_repository, last_revision=None,
59
find_ghosts=True, fetch_spec=None):
55
find_ghosts=True, fetch_spec=None):
60
56
"""Create a repo fetcher.
62
58
:param last_revision: If set, try to limit to the data this revision
64
:param fetch_spec: A SearchResult specifying which revisions to fetch.
65
If set, this overrides last_revision.
66
60
:param find_ghosts: If True search the entire history for ghosts.
68
62
# repository.fetch has the responsibility for short-circuiting
74
68
self._last_revision = last_revision
75
69
self._fetch_spec = fetch_spec
76
70
self.find_ghosts = find_ghosts
77
with self.from_repository.lock_read():
78
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
79
str(self.from_repository), str(self.from_repository._format),
80
str(self.to_repository), str(self.to_repository._format))
71
self.from_repository.lock_read()
72
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
73
self.from_repository, self.from_repository._format,
74
self.to_repository, self.to_repository._format)
78
self.from_repository.unlock()
84
81
"""Primary worker function.
94
91
# assert not missing
95
92
self.count_total = 0
96
93
self.file_ids_names = {}
97
with ui.ui_factory.nested_progress_bar() as pb:
98
pb.show_pct = pb.show_count = False
99
pb.update(gettext("Finding revisions"), 0, 2)
100
search_result = self._revids_to_fetch()
101
mutter('fetching: %s', str(search_result))
102
if search_result.is_empty():
94
pb = ui.ui_factory.nested_progress_bar()
95
pb.show_pct = pb.show_count = False
97
pb.update("Finding revisions", 0, 2)
98
search = self._revids_to_fetch()
104
pb.update(gettext("Fetching revisions"), 1, 2)
105
self._fetch_everything_for_search(search_result)
101
pb.update("Fetching revisions", 1, 2)
102
self._fetch_everything_for_search(search)
107
106
def _fetch_everything_for_search(self, search):
108
107
"""Fetch all data for the given set of revisions."""
115
114
# moment, so that it can feed the progress information back to this
117
116
if (self.from_repository._format.rich_root_data and
118
not self.to_repository._format.rich_root_data):
117
not self.to_repository._format.rich_root_data):
119
118
raise errors.IncompatibleRepositories(
120
119
self.from_repository, self.to_repository,
121
120
"different rich-root support")
122
with ui.ui_factory.nested_progress_bar() as pb:
121
pb = ui.ui_factory.nested_progress_bar()
123
123
pb.update("Get stream source")
124
124
source = self.from_repository._get_source(
125
125
self.to_repository._format)
145
148
pb.update("Finishing stream")
146
149
self.sink.finished()
148
153
def _revids_to_fetch(self):
149
154
"""Determines the exact revisions needed from self.from_repository to
150
155
install self._last_revision in self.to_repository.
152
:returns: A SearchResult of some sort. (Possibly a
153
PendingAncestryResult, EmptySearchResult, etc.)
157
If no revisions need to be fetched, then this just returns None.
155
159
if self._fetch_spec is not None:
156
# The fetch spec is already a concrete search result.
157
160
return self._fetch_spec
158
elif self._last_revision == NULL_REVISION:
159
# fetch_spec is None + last_revision is null => empty fetch.
161
mutter('fetch up to rev {%s}', self._last_revision)
162
if self._last_revision is NULL_REVISION:
160
163
# explicit limit of no revisions needed
161
return vf_search.EmptySearchResult()
162
elif self._last_revision is not None:
163
return vf_search.NotInOtherForRevs(self.to_repository,
164
self.from_repository, [
165
self._last_revision],
166
find_ghosts=self.find_ghosts).execute()
167
else: # self._last_revision is None:
168
return vf_search.EverythingNotInOther(self.to_repository,
169
self.from_repository,
170
find_ghosts=self.find_ghosts).execute()
165
return self.to_repository.search_missing_revision_ids(
166
self.from_repository, self._last_revision,
167
find_ghosts=self.find_ghosts)
169
def _parent_inventories(self, revision_ids):
170
# Find all the parent revisions referenced by the stream, but
171
# not present in the stream, and make sure we send their
173
parent_maps = self.to_repository.get_parent_map(revision_ids)
175
map(parents.update, parent_maps.itervalues())
176
parents.discard(NULL_REVISION)
177
parents.difference_update(revision_ids)
178
missing_keys = set(('inventories', rev_id) for rev_id in parents)
173
182
class Inter1and2Helper(object):
200
206
revs = list(revs)
202
208
for tree in self.source.revision_trees(revs[:100]):
203
if tree.root_inventory.revision_id is None:
204
tree.root_inventory.revision_id = tree.get_revision_id()
209
if tree.inventory.revision_id is None:
210
tree.inventory.revision_id = tree.get_revision_id()
206
212
revs = revs[100:]
208
214
def _find_root_ids(self, revs, parent_map, graph):
209
215
revision_root = {}
210
216
for tree in self.iter_rev_trees(revs):
211
root_id = tree.path2id('')
212
revision_id = tree.get_file_revision(u'')
217
revision_id = tree.inventory.root.revision
218
root_id = tree.get_root_id()
213
219
revision_root[revision_id] = root_id
214
220
# Find out which parents we don't already know root ids for
215
parents = set(viewvalues(parent_map))
216
parents.difference_update(revision_root)
217
parents.discard(NULL_REVISION)
222
for revision_parents in parent_map.itervalues():
223
parents.update(revision_parents)
224
parents.difference_update(revision_root.keys() + [NULL_REVISION])
218
225
# Limit to revisions present in the versionedfile
219
parents = graph.get_parent_map(parents)
226
parents = graph.get_parent_map(parents).keys()
220
227
for tree in self.iter_rev_trees(parents):
221
root_id = tree.path2id('')
228
root_id = tree.get_root_id()
222
229
revision_root[tree.get_revision_id()] = root_id
223
230
return revision_root
232
239
rev_order = tsort.topo_sort(parent_map)
233
240
rev_id_to_root_id = self._find_root_ids(revs, parent_map, graph)
234
241
root_id_order = [(rev_id_to_root_id[rev_id], rev_id) for rev_id in
236
243
# Guaranteed stable, this groups all the file id operations together
237
244
# retaining topological order within the revisions of a file id.
238
245
# File id splits and joins would invalidate this, but they don't exist
239
246
# yet, and are unlikely to in non-rich-root environments anyway.
240
247
root_id_order.sort(key=operator.itemgetter(0))
241
248
# Create a record stream containing the roots to create.
242
if len(revs) > self.known_graph_threshold:
243
graph = self.source.get_known_graph_ancestry(revs)
250
# XXX: not covered by tests, should have a flag to always run
251
# this. -- mbp 20100129
252
graph = _get_rich_root_heads_graph(self.source, revs)
244
253
new_roots_stream = _new_root_data_stream(
245
254
root_id_order, rev_id_to_root_id, parent_map, self.source, graph)
246
255
return [('texts', new_roots_stream)]
258
def _get_rich_root_heads_graph(source_repo, revision_ids):
259
"""Get a Graph object suitable for asking heads() for new rich roots."""
260
st = static_tuple.StaticTuple
261
revision_keys = [st(r_id).intern() for r_id in revision_ids]
262
known_graph = source_repo.revisions.get_known_graph_ancestry(
264
return _mod_graph.GraphThunkIdsToKeys(known_graph)
249
267
def _new_root_data_stream(
250
root_keys_to_create, rev_id_to_root_id_map, parent_map, repo, graph=None):
268
root_keys_to_create, rev_id_to_root_id_map, parent_map, repo, graph=None):
251
269
"""Generate a texts substream of synthesised root entries.
253
271
Used in fetches that do rich-root upgrades.
255
273
:param root_keys_to_create: iterable of (root_id, rev_id) pairs describing
256
274
the root entries to create.
257
275
:param rev_id_to_root_id_map: dict of known rev_id -> root_id mappings for
265
283
root_id, rev_id = root_key
266
284
parent_keys = _parent_keys_for_root_version(
267
285
root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph)
268
yield versionedfile.ChunkedContentFactory(
269
root_key, parent_keys, None, [])
286
yield versionedfile.FulltextContentFactory(
287
root_key, parent_keys, None, '')
272
290
def _parent_keys_for_root_version(
273
root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph=None):
291
root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph=None):
274
292
"""Get the parent keys for a given root id.
276
294
A helper function for _new_root_data_stream.
278
296
# Include direct parents of the revision, but only if they used the same
331
347
selected_ids.append(parent_id)
332
348
parent_keys = [(root_id, parent_id) for parent_id in selected_ids]
333
349
return parent_keys
336
class TargetRepoKinds(object):
337
"""An enum-like set of constants.
339
They are the possible values of FetchSpecFactory.target_repo_kinds.
342
PREEXISTING = 'preexisting'
347
class FetchSpecFactory(object):
348
"""A helper for building the best fetch spec for a sprout call.
350
Factors that go into determining the sort of fetch to perform:
351
* did the caller specify any revision IDs?
352
* did the caller specify a source branch (need to fetch its
353
heads_to_fetch(), usually the tip + tags)
354
* is there an existing target repo (don't need to refetch revs it
356
* target is stacked? (similar to pre-existing target repo: even if
357
the target itself is new don't want to refetch existing revs)
359
:ivar source_branch: the source branch if one specified, else None.
360
:ivar source_branch_stop_revision_id: fetch up to this revision of
361
source_branch, rather than its tip.
362
:ivar source_repo: the source repository if one found, else None.
363
:ivar target_repo: the target repository acquired by sprout.
364
:ivar target_repo_kind: one of the TargetRepoKinds constants.
368
self._explicit_rev_ids = set()
369
self.source_branch = None
370
self.source_branch_stop_revision_id = None
371
self.source_repo = None
372
self.target_repo = None
373
self.target_repo_kind = None
376
def add_revision_ids(self, revision_ids):
377
"""Add revision_ids to the set of revision_ids to be fetched."""
378
self._explicit_rev_ids.update(revision_ids)
380
def make_fetch_spec(self):
381
"""Build a SearchResult or PendingAncestryResult or etc."""
382
if self.target_repo_kind is None or self.source_repo is None:
383
raise AssertionError(
384
'Incomplete FetchSpecFactory: %r' % (self.__dict__,))
385
if len(self._explicit_rev_ids) == 0 and self.source_branch is None:
386
if self.limit is not None:
387
raise NotImplementedError(
388
"limit is only supported with a source branch set")
389
# Caller hasn't specified any revisions or source branch
390
if self.target_repo_kind == TargetRepoKinds.EMPTY:
391
return vf_search.EverythingResult(self.source_repo)
393
# We want everything not already in the target (or target's
395
return vf_search.EverythingNotInOther(
396
self.target_repo, self.source_repo).execute()
397
heads_to_fetch = set(self._explicit_rev_ids)
398
if self.source_branch is not None:
399
must_fetch, if_present_fetch = self.source_branch.heads_to_fetch()
400
if self.source_branch_stop_revision_id is not None:
401
# Replace the tip rev from must_fetch with the stop revision
402
# XXX: this might be wrong if the tip rev is also in the
403
# must_fetch set for other reasons (e.g. it's the tip of
404
# multiple loom threads?), but then it's pretty unclear what it
405
# should mean to specify a stop_revision in that case anyway.
406
must_fetch.discard(self.source_branch.last_revision())
407
must_fetch.add(self.source_branch_stop_revision_id)
408
heads_to_fetch.update(must_fetch)
410
if_present_fetch = set()
411
if self.target_repo_kind == TargetRepoKinds.EMPTY:
412
# PendingAncestryResult does not raise errors if a requested head
413
# is absent. Ideally it would support the
414
# required_ids/if_present_ids distinction, but in practice
415
# heads_to_fetch will almost certainly be present so this doesn't
417
all_heads = heads_to_fetch.union(if_present_fetch)
418
ret = vf_search.PendingAncestryResult(all_heads, self.source_repo)
419
if self.limit is not None:
420
graph = self.source_repo.get_graph()
421
topo_order = list(graph.iter_topo_order(ret.get_keys()))
422
result_set = topo_order[:self.limit]
423
ret = self.source_repo.revision_ids_to_search_result(
427
return vf_search.NotInOtherForRevs(self.target_repo, self.source_repo,
428
required_ids=heads_to_fetch, if_present_ids=if_present_fetch,
429
limit=self.limit).execute()