53
52
def __init__(self, to_repository, from_repository, last_revision=None,
54
find_ghosts=True, fetch_spec=None):
53
pb=None, find_ghosts=True, fetch_spec=None):
55
54
"""Create a repo fetcher.
57
56
:param last_revision: If set, try to limit to the data this revision
59
:param fetch_spec: A SearchResult specifying which revisions to fetch.
60
If set, this overrides last_revision.
61
58
:param find_ghosts: If True search the entire history for ghosts.
59
:param pb: ProgressBar object to use; deprecated and ignored.
60
This method will just create one on top of the stack.
63
symbol_versioning.warn(
64
symbol_versioning.deprecated_in((1, 14, 0))
65
% "pb parameter to RepoFetcher.__init__")
66
# and for simplicity it is in fact ignored
63
67
# repository.fetch has the responsibility for short-circuiting
64
68
# attempts to copy between a repository and itself.
65
69
self.to_repository = to_repository
69
73
self._last_revision = last_revision
70
74
self._fetch_spec = fetch_spec
71
75
self.find_ghosts = find_ghosts
72
with self.from_repository.lock_read():
73
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
74
str(self.from_repository), str(self.from_repository._format),
75
str(self.to_repository), str(self.to_repository._format))
76
self.from_repository.lock_read()
77
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
78
self.from_repository, self.from_repository._format,
79
self.to_repository, self.to_repository._format)
83
self.from_repository.unlock()
79
86
"""Primary worker function.
89
96
# assert not missing
90
97
self.count_total = 0
91
98
self.file_ids_names = {}
92
with ui.ui_factory.nested_progress_bar() as pb:
93
pb.show_pct = pb.show_count = False
94
pb.update(gettext("Finding revisions"), 0, 2)
95
search_result = self._revids_to_fetch()
96
mutter('fetching: %s', str(search_result))
97
if search_result.is_empty():
99
pb = bzrlib.ui.ui_factory.nested_progress_bar()
100
pb.show_pct = pb.show_count = False
102
pb.update("Finding revisions", 0, 2)
103
search = self._revids_to_fetch()
99
pb.update(gettext("Fetching revisions"), 1, 2)
100
self._fetch_everything_for_search(search_result)
106
pb.update("Fetching revisions", 1, 2)
107
self._fetch_everything_for_search(search)
102
111
def _fetch_everything_for_search(self, search):
103
112
"""Fetch all data for the given set of revisions."""
110
119
# moment, so that it can feed the progress information back to this
112
121
if (self.from_repository._format.rich_root_data and
113
not self.to_repository._format.rich_root_data):
122
not self.to_repository._format.rich_root_data):
114
123
raise errors.IncompatibleRepositories(
115
124
self.from_repository, self.to_repository,
116
125
"different rich-root support")
117
with ui.ui_factory.nested_progress_bar() as pb:
126
pb = bzrlib.ui.ui_factory.nested_progress_bar()
118
128
pb.update("Get stream source")
119
129
source = self.from_repository._get_source(
120
130
self.to_repository._format)
140
153
pb.update("Finishing stream")
141
154
self.sink.finished()
143
158
def _revids_to_fetch(self):
144
159
"""Determines the exact revisions needed from self.from_repository to
145
160
install self._last_revision in self.to_repository.
147
:returns: A SearchResult of some sort. (Possibly a
148
PendingAncestryResult, EmptySearchResult, etc.)
162
If no revisions need to be fetched, then this just returns None.
150
164
if self._fetch_spec is not None:
151
# The fetch spec is already a concrete search result.
152
165
return self._fetch_spec
153
elif self._last_revision == NULL_REVISION:
154
# fetch_spec is None + last_revision is null => empty fetch.
166
mutter('fetch up to rev {%s}', self._last_revision)
167
if self._last_revision is NULL_REVISION:
155
168
# explicit limit of no revisions needed
156
return vf_search.EmptySearchResult()
157
elif self._last_revision is not None:
158
return vf_search.NotInOtherForRevs(self.to_repository,
159
self.from_repository, [
160
self._last_revision],
161
find_ghosts=self.find_ghosts).execute()
162
else: # self._last_revision is None:
163
return vf_search.EverythingNotInOther(self.to_repository,
164
self.from_repository,
165
find_ghosts=self.find_ghosts).execute()
170
return self.to_repository.search_missing_revision_ids(
171
self.from_repository, self._last_revision,
172
find_ghosts=self.find_ghosts)
174
def _parent_inventories(self, revision_ids):
175
# Find all the parent revisions referenced by the stream, but
176
# not present in the stream, and make sure we send their
178
parent_maps = self.to_repository.get_parent_map(revision_ids)
180
map(parents.update, parent_maps.itervalues())
181
parents.discard(NULL_REVISION)
182
parents.difference_update(revision_ids)
183
missing_keys = set(('inventories', rev_id) for rev_id in parents)
168
187
class Inter1and2Helper(object):
195
211
revs = list(revs)
197
213
for tree in self.source.revision_trees(revs[:100]):
198
if tree.root_inventory.revision_id is None:
199
tree.root_inventory.revision_id = tree.get_revision_id()
214
if tree.inventory.revision_id is None:
215
tree.inventory.revision_id = tree.get_revision_id()
201
217
revs = revs[100:]
203
219
def _find_root_ids(self, revs, parent_map, graph):
204
220
revision_root = {}
205
221
for tree in self.iter_rev_trees(revs):
206
root_id = tree.path2id('')
207
revision_id = tree.get_file_revision(u'')
222
revision_id = tree.inventory.root.revision
223
root_id = tree.get_root_id()
208
224
revision_root[revision_id] = root_id
209
225
# Find out which parents we don't already know root ids for
210
parents = set(parent_map.values())
211
parents.difference_update(revision_root)
212
parents.discard(NULL_REVISION)
227
for revision_parents in parent_map.itervalues():
228
parents.update(revision_parents)
229
parents.difference_update(revision_root.keys() + [NULL_REVISION])
213
230
# Limit to revisions present in the versionedfile
214
parents = graph.get_parent_map(parents)
231
parents = graph.get_parent_map(parents).keys()
215
232
for tree in self.iter_rev_trees(parents):
216
root_id = tree.path2id('')
233
root_id = tree.get_root_id()
217
234
revision_root[tree.get_revision_id()] = root_id
218
235
return revision_root
227
244
rev_order = tsort.topo_sort(parent_map)
228
245
rev_id_to_root_id = self._find_root_ids(revs, parent_map, graph)
229
246
root_id_order = [(rev_id_to_root_id[rev_id], rev_id) for rev_id in
231
248
# Guaranteed stable, this groups all the file id operations together
232
249
# retaining topological order within the revisions of a file id.
233
250
# File id splits and joins would invalidate this, but they don't exist
234
251
# yet, and are unlikely to in non-rich-root environments anyway.
235
252
root_id_order.sort(key=operator.itemgetter(0))
236
253
# Create a record stream containing the roots to create.
237
if len(revs) > self.known_graph_threshold:
238
graph = self.source.get_known_graph_ancestry(revs)
254
from bzrlib.graph import FrozenHeadsCache
255
graph = FrozenHeadsCache(graph)
239
256
new_roots_stream = _new_root_data_stream(
240
257
root_id_order, rev_id_to_root_id, parent_map, self.source, graph)
241
258
return [('texts', new_roots_stream)]
244
261
def _new_root_data_stream(
245
root_keys_to_create, rev_id_to_root_id_map, parent_map, repo, graph=None):
262
root_keys_to_create, rev_id_to_root_id_map, parent_map, repo, graph=None):
246
263
"""Generate a texts substream of synthesised root entries.
248
265
Used in fetches that do rich-root upgrades.
250
267
:param root_keys_to_create: iterable of (root_id, rev_id) pairs describing
251
268
the root entries to create.
252
269
:param rev_id_to_root_id_map: dict of known rev_id -> root_id mappings for
260
277
root_id, rev_id = root_key
261
278
parent_keys = _parent_keys_for_root_version(
262
279
root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph)
263
yield versionedfile.ChunkedContentFactory(
264
root_key, parent_keys, None, [])
280
yield versionedfile.FulltextContentFactory(
281
root_key, parent_keys, None, '')
267
284
def _parent_keys_for_root_version(
268
root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph=None):
285
root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph=None):
269
286
"""Get the parent keys for a given root id.
271
288
A helper function for _new_root_data_stream.
273
290
# Include direct parents of the revision, but only if they used the same
326
341
selected_ids.append(parent_id)
327
342
parent_keys = [(root_id, parent_id) for parent_id in selected_ids]
328
343
return parent_keys
331
class TargetRepoKinds(object):
332
"""An enum-like set of constants.
334
They are the possible values of FetchSpecFactory.target_repo_kinds.
337
PREEXISTING = 'preexisting'
342
class FetchSpecFactory(object):
343
"""A helper for building the best fetch spec for a sprout call.
345
Factors that go into determining the sort of fetch to perform:
346
* did the caller specify any revision IDs?
347
* did the caller specify a source branch (need to fetch its
348
heads_to_fetch(), usually the tip + tags)
349
* is there an existing target repo (don't need to refetch revs it
351
* target is stacked? (similar to pre-existing target repo: even if
352
the target itself is new don't want to refetch existing revs)
354
:ivar source_branch: the source branch if one specified, else None.
355
:ivar source_branch_stop_revision_id: fetch up to this revision of
356
source_branch, rather than its tip.
357
:ivar source_repo: the source repository if one found, else None.
358
:ivar target_repo: the target repository acquired by sprout.
359
:ivar target_repo_kind: one of the TargetRepoKinds constants.
363
self._explicit_rev_ids = set()
364
self.source_branch = None
365
self.source_branch_stop_revision_id = None
366
self.source_repo = None
367
self.target_repo = None
368
self.target_repo_kind = None
371
def add_revision_ids(self, revision_ids):
372
"""Add revision_ids to the set of revision_ids to be fetched."""
373
self._explicit_rev_ids.update(revision_ids)
375
def make_fetch_spec(self):
376
"""Build a SearchResult or PendingAncestryResult or etc."""
377
if self.target_repo_kind is None or self.source_repo is None:
378
raise AssertionError(
379
'Incomplete FetchSpecFactory: %r' % (self.__dict__,))
380
if len(self._explicit_rev_ids) == 0 and self.source_branch is None:
381
if self.limit is not None:
382
raise NotImplementedError(
383
"limit is only supported with a source branch set")
384
# Caller hasn't specified any revisions or source branch
385
if self.target_repo_kind == TargetRepoKinds.EMPTY:
386
return vf_search.EverythingResult(self.source_repo)
388
# We want everything not already in the target (or target's
390
return vf_search.EverythingNotInOther(
391
self.target_repo, self.source_repo).execute()
392
heads_to_fetch = set(self._explicit_rev_ids)
393
if self.source_branch is not None:
394
must_fetch, if_present_fetch = self.source_branch.heads_to_fetch()
395
if self.source_branch_stop_revision_id is not None:
396
# Replace the tip rev from must_fetch with the stop revision
397
# XXX: this might be wrong if the tip rev is also in the
398
# must_fetch set for other reasons (e.g. it's the tip of
399
# multiple loom threads?), but then it's pretty unclear what it
400
# should mean to specify a stop_revision in that case anyway.
401
must_fetch.discard(self.source_branch.last_revision())
402
must_fetch.add(self.source_branch_stop_revision_id)
403
heads_to_fetch.update(must_fetch)
405
if_present_fetch = set()
406
if self.target_repo_kind == TargetRepoKinds.EMPTY:
407
# PendingAncestryResult does not raise errors if a requested head
408
# is absent. Ideally it would support the
409
# required_ids/if_present_ids distinction, but in practice
410
# heads_to_fetch will almost certainly be present so this doesn't
412
all_heads = heads_to_fetch.union(if_present_fetch)
413
ret = vf_search.PendingAncestryResult(all_heads, self.source_repo)
414
if self.limit is not None:
415
graph = self.source_repo.get_graph()
416
topo_order = list(graph.iter_topo_order(ret.get_keys()))
417
result_set = topo_order[:self.limit]
418
ret = self.source_repo.revision_ids_to_search_result(
422
return vf_search.NotInOtherForRevs(self.target_repo, self.source_repo,
423
required_ids=heads_to_fetch, if_present_ids=if_present_fetch,
424
limit=self.limit).execute()