21
31
that has merged into it. As the first step of a merge, pull, or
22
32
branch operation we copy history from the source into the destination
35
The copying is done in a slightly complicated order. We don't want to
36
add a revision to the store until everything it refers to is also
37
stored, so that if a revision is present we can totally recreate it.
38
However, we can't know what files are included in a revision until we
39
read its inventory. Therefore, we first pull the XML and hold it in
40
memory until we've updated all of the files referenced.
28
from bzrlib.lazy_import import lazy_import
29
lazy_import(globals(), """
40
from bzrlib.revision import NULL_REVISION
41
from bzrlib.trace import mutter
44
class RepoFetcher(object):
45
"""Pull revisions and texts from one repository to another.
47
This should not be used directly, it's essential a object to encapsulate
48
the logic in InterRepository.fetch().
51
def __init__(self, to_repository, from_repository, last_revision=None,
52
find_ghosts=True, fetch_spec=None):
53
"""Create a repo fetcher.
55
:param last_revision: If set, try to limit to the data this revision
57
:param find_ghosts: If True search the entire history for ghosts.
59
# repository.fetch has the responsibility for short-circuiting
60
# attempts to copy between a repository and itself.
61
self.to_repository = to_repository
62
self.from_repository = from_repository
63
self.sink = to_repository._get_sink()
64
# must not mutate self._last_revision as its potentially a shared instance
65
self._last_revision = last_revision
66
self._fetch_spec = fetch_spec
67
self.find_ghosts = find_ghosts
68
self.from_repository.lock_read()
69
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
70
self.from_repository, self.from_repository._format,
71
self.to_repository, self.to_repository._format)
75
self.from_repository.unlock()
78
"""Primary worker function.
80
This initialises all the needed variables, and then fetches the
81
requested revisions, finally clearing the progress bar.
83
# Roughly this is what we're aiming for fetch to become:
85
# missing = self.sink.insert_stream(self.source.get_stream(search))
87
# missing = self.sink.insert_stream(self.source.get_items(missing))
90
self.file_ids_names = {}
91
pb = ui.ui_factory.nested_progress_bar()
92
pb.show_pct = pb.show_count = False
94
pb.update("Finding revisions", 0, 2)
95
search = self._revids_to_fetch()
98
pb.update("Fetching revisions", 1, 2)
99
self._fetch_everything_for_search(search)
103
def _fetch_everything_for_search(self, search):
104
"""Fetch all data for the given set of revisions."""
105
# The first phase is "file". We pass the progress bar for it directly
106
# into item_keys_introduced_by, which has more information about how
107
# that phase is progressing than we do. Progress updates for the other
108
# phases are taken care of in this function.
109
# XXX: there should be a clear owner of the progress reporting. Perhaps
110
# item_keys_introduced_by should have a richer API than it does at the
111
# moment, so that it can feed the progress information back to this
113
if (self.from_repository._format.rich_root_data and
114
not self.to_repository._format.rich_root_data):
115
raise errors.IncompatibleRepositories(
116
self.from_repository, self.to_repository,
117
"different rich-root support")
118
pb = ui.ui_factory.nested_progress_bar()
120
pb.update("Get stream source")
121
source = self.from_repository._get_source(
122
self.to_repository._format)
123
stream = source.get_stream(search)
124
from_format = self.from_repository._format
125
pb.update("Inserting stream")
126
resume_tokens, missing_keys = self.sink.insert_stream(
127
stream, from_format, [])
128
if self.to_repository._fallback_repositories:
130
self._parent_inventories(search.get_keys()))
132
pb.update("Missing keys")
133
stream = source.get_stream_for_missing_keys(missing_keys)
134
pb.update("Inserting missing keys")
135
resume_tokens, missing_keys = self.sink.insert_stream(
136
stream, from_format, resume_tokens)
138
raise AssertionError(
139
"second push failed to complete a fetch %r." % (
142
raise AssertionError(
143
"second push failed to commit the fetch %r." % (
145
pb.update("Finishing stream")
150
def _revids_to_fetch(self):
151
"""Determines the exact revisions needed from self.from_repository to
152
install self._last_revision in self.to_repository.
154
If no revisions need to be fetched, then this just returns None.
156
if self._fetch_spec is not None:
157
return self._fetch_spec
158
mutter('fetch up to rev {%s}', self._last_revision)
159
if self._last_revision is NULL_REVISION:
160
# explicit limit of no revisions needed
162
return self.to_repository.search_missing_revision_ids(
163
self.from_repository, self._last_revision,
164
find_ghosts=self.find_ghosts)
166
def _parent_inventories(self, revision_ids):
167
# Find all the parent revisions referenced by the stream, but
168
# not present in the stream, and make sure we send their
170
parent_maps = self.to_repository.get_parent_map(revision_ids)
172
map(parents.update, parent_maps.itervalues())
173
parents.discard(NULL_REVISION)
174
parents.difference_update(revision_ids)
175
missing_keys = set(('inventories', rev_id) for rev_id in parents)
179
class Inter1and2Helper(object):
180
"""Helper for operations that convert data from model 1 and 2
182
This is for use by fetchers and converters.
185
def __init__(self, source):
188
:param source: The repository data comes from
192
def iter_rev_trees(self, revs):
193
"""Iterate through RevisionTrees efficiently.
195
Additionally, the inventory's revision_id is set if unset.
197
Trees are retrieved in batches of 100, and then yielded in the order
200
:param revs: A list of revision ids
202
# In case that revs is not a list.
205
for tree in self.source.revision_trees(revs[:100]):
206
if tree.inventory.revision_id is None:
207
tree.inventory.revision_id = tree.get_revision_id()
211
def _find_root_ids(self, revs, parent_map, graph):
213
for tree in self.iter_rev_trees(revs):
214
revision_id = tree.inventory.root.revision
215
root_id = tree.get_root_id()
216
revision_root[revision_id] = root_id
217
# Find out which parents we don't already know root ids for
219
for revision_parents in parent_map.itervalues():
220
parents.update(revision_parents)
221
parents.difference_update(revision_root.keys() + [NULL_REVISION])
222
# Limit to revisions present in the versionedfile
223
parents = graph.get_parent_map(parents).keys()
224
for tree in self.iter_rev_trees(parents):
225
root_id = tree.get_root_id()
226
revision_root[tree.get_revision_id()] = root_id
229
def generate_root_texts(self, revs):
230
"""Generate VersionedFiles for all root ids.
232
:param revs: the revisions to include
234
graph = self.source.get_graph()
235
parent_map = graph.get_parent_map(revs)
236
rev_order = tsort.topo_sort(parent_map)
237
rev_id_to_root_id = self._find_root_ids(revs, parent_map, graph)
238
root_id_order = [(rev_id_to_root_id[rev_id], rev_id) for rev_id in
240
# Guaranteed stable, this groups all the file id operations together
241
# retaining topological order within the revisions of a file id.
242
# File id splits and joins would invalidate this, but they don't exist
243
# yet, and are unlikely to in non-rich-root environments anyway.
244
root_id_order.sort(key=operator.itemgetter(0))
245
# Create a record stream containing the roots to create.
247
# XXX: not covered by tests, should have a flag to always run
248
# this. -- mbp 20100129
249
graph = self.source_repo.get_known_graph_ancestry(revs)
250
new_roots_stream = _new_root_data_stream(
251
root_id_order, rev_id_to_root_id, parent_map, self.source, graph)
252
return [('texts', new_roots_stream)]
255
def _get_rich_root_heads_graph(source_repo, revision_ids):
256
"""Get a Graph object suitable for asking heads() for new rich roots."""
260
def _new_root_data_stream(
261
root_keys_to_create, rev_id_to_root_id_map, parent_map, repo, graph=None):
262
"""Generate a texts substream of synthesised root entries.
264
Used in fetches that do rich-root upgrades.
266
:param root_keys_to_create: iterable of (root_id, rev_id) pairs describing
267
the root entries to create.
268
:param rev_id_to_root_id_map: dict of known rev_id -> root_id mappings for
269
calculating the parents. If a parent rev_id is not found here then it
270
will be recalculated.
271
:param parent_map: a parent map for all the revisions in
273
:param graph: a graph to use instead of repo.get_graph().
275
for root_key in root_keys_to_create:
276
root_id, rev_id = root_key
277
parent_keys = _parent_keys_for_root_version(
278
root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph)
279
yield versionedfile.FulltextContentFactory(
280
root_key, parent_keys, None, '')
283
def _parent_keys_for_root_version(
284
root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph=None):
285
"""Get the parent keys for a given root id.
287
A helper function for _new_root_data_stream.
289
# Include direct parents of the revision, but only if they used the same
290
# root_id and are heads.
291
rev_parents = parent_map[rev_id]
293
for parent_id in rev_parents:
294
if parent_id == NULL_REVISION:
296
if parent_id not in rev_id_to_root_id_map:
297
# We probably didn't read this revision, go spend the extra effort
300
tree = repo.revision_tree(parent_id)
301
except errors.NoSuchRevision:
302
# Ghost, fill out rev_id_to_root_id in case we encounter this
304
# But set parent_root_id to None since we don't really know
305
parent_root_id = None
307
parent_root_id = tree.get_root_id()
308
rev_id_to_root_id_map[parent_id] = None
310
# rev_id_to_root_id_map[parent_id] = parent_root_id
311
# memory consumption maybe?
313
parent_root_id = rev_id_to_root_id_map[parent_id]
314
if root_id == parent_root_id:
315
# With stacking we _might_ want to refer to a non-local revision,
316
# but this code path only applies when we have the full content
317
# available, so ghosts really are ghosts, not just the edge of
319
parent_ids.append(parent_id)
321
# root_id may be in the parent anyway.
323
tree = repo.revision_tree(parent_id)
324
except errors.NoSuchRevision:
325
# ghost, can't refer to it.
329
parent_ids.append(tree.inventory[root_id].revision)
330
except errors.NoSuchId:
333
# Drop non-head parents
335
graph = repo.get_graph()
336
heads = graph.heads(parent_ids)
338
for parent_id in parent_ids:
339
if parent_id in heads and parent_id not in selected_ids:
340
selected_ids.append(parent_id)
341
parent_keys = [(root_id, parent_id) for parent_id in selected_ids]
43
# TODO: Avoid repeatedly opening weaves so many times.
45
# XXX: This doesn't handle ghost (not present in branch) revisions at
48
# - get a list of revisions that need to be pulled in
49
# - for each one, pull in that revision file
50
# and get the inventory, and store the inventory with right
52
# - and get the ancestry, and store that with right parents too
53
# - and keep a note of all file ids and version seen
54
# - then go through all files; for each one get the weave,
55
# and add in all file versions
59
def greedy_fetch(to_branch, from_branch, revision, pb):
60
f = Fetcher(to_branch, from_branch, revision, pb)
61
return f.count_copied, f.failed_revisions
64
class Fetcher(object):
65
"""Pull history from one branch to another.
68
If set, pull only up to this revision_id.
70
def __init__(self, to_branch, from_branch, revision_limit=None, pb=None):
71
self.to_branch = to_branch
72
self.from_branch = from_branch
73
self.revision_limit = revision_limit
74
self.failed_revisions = []
77
self.pb = bzrlib.ui.ui_factory.progress_bar()
80
self._load_histories()
81
revs_to_fetch = self._compare_ancestries()
82
self._copy_revisions(revs_to_fetch)
84
def _load_histories(self):
85
"""Load histories of both branches, up to the limit."""
86
self.from_history = self.from_branch.revision_history()
87
self.to_history = self.to_branch.revision_history()
88
if self.revision_limit:
89
assert isinstance(revision_limit, basestring)
91
rev_index = self.from_history.index(revision_limit)
94
if rev_index is not None:
95
self.from_history = self.from_history[:rev_index + 1]
97
self.from_history = [revision]
100
def _compare_ancestries(self):
101
"""Get a list of revisions that must be copied.
103
That is, every revision that's in the ancestry of the source
104
branch and not in the destination branch."""
105
if self.from_history:
106
self.from_ancestry = self.from_branch.get_ancestry(self.from_history[-1])
108
self.from_ancestry = []
110
self.to_history = self.to_branch.get_ancestry(self.to_history[-1])
113
ss = set(self.to_history)
115
for rev_id in self.from_ancestry:
117
to_fetch.append(rev_id)
118
mutter('need to get revision {%s}', rev_id)
119
mutter('need to get %d revisions in total', len(to_fetch))
124
def _copy_revisions(self, revs_to_fetch):
125
for rev_id in revs_to_fetch:
126
self._copy_one_revision(rev_id)
129
def _copy_one_revision(self, rev_id):
130
"""Copy revision and everything referenced by it."""
131
mutter('copying revision {%s}', rev_id)
132
rev_xml = self.from_branch.get_revision_xml(rev_id)
133
inv_xml = self.from_branch.get_inventory_xml(rev_id)
134
rev = serializer_v5.read_revision_from_string(rev_xml)
135
inv = serializer_v5.read_inventory_from_string(inv_xml)
136
assert rev.revision_id == rev_id
137
assert rev.inventory_sha1 == sha_string(inv_xml)
138
mutter(' commiter %s, %d parents',
141
self._copy_new_texts(rev_id, inv)
142
self.to_branch.weave_store.add_text(INVENTORY_FILEID, rev_id,
143
split_lines(inv_xml), rev.parents)
144
self.to_branch.revision_store.add(StringIO(rev_xml), rev_id)
147
def _copy_new_texts(self, rev_id, inv):
148
"""Copy any new texts occuring in this revision."""
149
# TODO: Rather than writing out weaves every time, hold them
150
# in memory until everything's done? But this way is nicer
151
# if it's interrupted.
152
for path, ie in inv.iter_entries():
153
if ie.kind != 'file':
155
if ie.text_version != rev_id:
157
mutter('%s {%s} is changed in this revision',
159
self._copy_one_text(rev_id, ie.file_id)
162
def _copy_one_text(self, rev_id, file_id):
163
"""Copy one file text."""
164
from_weave = self.from_branch.weave_store.get_weave(file_id)
165
from_idx = from_weave.lookup(rev_id)
166
from_parents = map(from_weave.idx_to_name, from_weave.parents(from_idx))
167
text_lines = from_weave.get(from_idx)
168
to_weave = self.to_branch.weave_store.get_weave_or_empty(file_id)
169
to_parents = map(to_weave.lookup, from_parents)
170
# it's ok to add even if the text is already there
171
to_weave.add(rev_id, to_parents, text_lines)
172
self.to_branch.weave_store.put_weave(file_id, to_weave)