14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib import osutils
18
from bzrlib.errors import InvalidRevisionId
19
from bzrlib.inventory import Inventory
20
from bzrlib.repository import InterRepository
21
from bzrlib.trace import info
23
from bzrlib.plugins.git import git
24
from bzrlib.plugins.git.repository import LocalGitRepository, GitRepository, GitFormat
25
from bzrlib.plugins.git.remote import RemoteGitRepository
27
from dulwich.objects import Commit
29
from cStringIO import StringIO
32
class BzrFetchGraphWalker(object):
34
def __init__(self, repository, mapping):
35
self.repository = repository
36
self.mapping = mapping
38
self.heads = set(repository.all_revision_ids())
42
revid = self.mapping.revision_id_foreign_to_bzr(sha)
45
def remove(self, revid):
48
self.heads.remove(revid)
49
if revid in self.parents:
50
for p in self.parents[revid]:
55
ret = self.heads.pop()
56
ps = self.repository.get_parent_map([ret])[ret]
57
self.parents[ret] = ps
58
self.heads.update([p for p in ps if not p in self.done])
61
return self.mapping.revision_id_bzr_to_foreign(ret)
62
except InvalidRevisionId:
67
def import_git_blob(repo, mapping, path, blob):
17
from cStringIO import (
21
from dulwich.objects import (
26
from dulwich.object_store import (
39
from bzrlib.errors import (
44
from bzrlib.inventory import (
50
from bzrlib.lru_cache import (
53
from bzrlib.repository import (
56
from bzrlib.revision import (
59
from bzrlib.tsort import (
62
from bzrlib.versionedfile import (
63
FulltextContentFactory,
66
from bzrlib.plugins.git.mapping import (
68
inventory_to_tree_and_blobs,
74
from bzrlib.plugins.git.object_store import (
77
from bzrlib.plugins.git.remote import (
80
from bzrlib.plugins.git.repository import (
87
def import_git_blob(texts, mapping, path, hexsha, base_inv, parent_id,
88
revision_id, parent_invs, shagitmap, lookup_object, executable, symlink):
68
89
"""Import a git blob object into a bzr repository.
70
:param repo: bzr repository
91
:param texts: VersionedFiles to add to
71
92
:param path: Path in the tree
72
93
:param blob: A git blob
94
:return: Inventory delta for this file
74
96
file_id = mapping.generate_file_id(path)
75
repo.texts.add_lines((file_id, blob.id),
77
osutils.split_lines(blob.data))
78
inv.add_path(path, "file", file_id)
81
def import_git_tree(repo, mapping, path, tree, inv, lookup_object):
101
# We just have to hope this is indeed utf-8:
102
ie = cls(file_id, urlutils.basename(path).decode("utf-8"), parent_id)
103
ie.executable = executable
104
# See if this has changed at all
106
base_ie = base_inv[file_id]
112
base_sha = shagitmap.lookup_blob(file_id, base_ie.revision)
116
if (base_sha == hexsha and base_ie.executable == ie.executable
117
and base_ie.kind == ie.kind):
118
# If nothing has changed since the base revision, we're done
120
if base_sha == hexsha and base_ie.kind == ie.kind:
121
ie.text_size = base_ie.text_size
122
ie.text_sha1 = base_ie.text_sha1
123
ie.symlink_target = base_ie.symlink_target
124
if ie.executable == base_ie.executable:
125
ie.revision = base_ie.revision
127
blob = lookup_object(hexsha)
129
blob = lookup_object(hexsha)
130
if ie.kind == "symlink":
132
ie.symlink_target = blob.data
136
ie.text_size = len(blob.data)
137
ie.text_sha1 = osutils.sha_string(blob.data)
138
# Check what revision we should store
140
for pinv in parent_invs:
141
if pinv.revision_id == base_inv.revision_id:
150
if pie.text_sha1 == ie.text_sha1 and pie.executable == ie.executable and pie.symlink_target == ie.symlink_target:
151
# found a revision in one of the parents to use
152
ie.revision = pie.revision
154
parent_keys.append((file_id, pie.revision))
155
if ie.revision is None:
156
# Need to store a new revision
157
ie.revision = revision_id
158
assert file_id is not None
159
assert ie.revision is not None
160
texts.insert_record_stream([FulltextContentFactory((file_id, ie.revision), tuple(parent_keys), ie.text_sha1, blob.data)])
161
shamap = [(hexsha, "blob", (ie.file_id, ie.revision))]
164
if file_id in base_inv:
165
old_path = base_inv.id2path(file_id)
168
invdelta = [(old_path, path, file_id, ie)]
169
invdelta.extend(remove_disappeared_children(base_inv, base_ie, []))
170
return (invdelta, shamap)
173
def import_git_submodule(texts, mapping, path, hexsha, base_inv, parent_id,
174
revision_id, parent_invs, shagitmap, lookup_object):
175
raise NotImplementedError(import_git_submodule)
178
def remove_disappeared_children(base_inv, base_ie, existing_children):
179
if base_ie is None or base_ie.kind != 'directory':
182
deletable = [v for k,v in base_ie.children.iteritems() if k not in existing_children]
185
ret.append((base_inv.id2path(ie.file_id), None, ie.file_id, None))
186
if ie.kind == "directory":
187
deletable.extend(ie.children.values())
191
def import_git_tree(texts, mapping, path, hexsha, base_inv, parent_id,
192
revision_id, parent_invs, shagitmap, lookup_object):
82
193
"""Import a git tree object into a bzr repository.
84
:param repo: A Bzr repository object
195
:param texts: VersionedFiles object to add to
85
196
:param path: Path in the tree
86
197
:param tree: A git tree object
87
:param inv: Inventory object
198
:param base_inv: Base inventory against which to return inventory delta
199
:return: Inventory delta for this subtree
89
202
file_id = mapping.generate_file_id(path)
90
repo.texts.add_lines((file_id, tree.id),
93
inv.add_path(path, "directory", file_id)
94
for mode, name, hexsha in tree.entries():
95
entry_kind = (mode & 0700000) / 0100000
203
# We just have to hope this is indeed utf-8:
204
ie = InventoryDirectory(file_id, urlutils.basename(path.decode("utf-8")),
207
base_ie = base_inv[file_id]
209
# Newly appeared here
211
ie.revision = revision_id
212
texts.add_lines((file_id, ie.revision), (), [])
213
invdelta.append((None, path, file_id, ie))
215
# See if this has changed at all
217
base_sha = shagitmap.lookup_tree(file_id, base_inv.revision_id)
221
if base_sha == hexsha:
222
# If nothing has changed since the base revision, we're done
224
if base_ie.kind != "directory":
225
ie.revision = revision_id
226
texts.add_lines((ie.file_id, ie.revision), (), [])
227
invdelta.append((base_inv.id2path(ie.file_id), path, ie.file_id, ie))
228
# Remember for next time
229
existing_children = set()
232
tree = lookup_object(hexsha)
233
for mode, name, child_hexsha in tree.entries():
96
234
basename = name.decode("utf-8")
100
child_path = urlutils.join(path, name)
102
import_git_tree(repo, mapping, child_path, lookup_object, inv)
103
elif entry_kind == 1:
104
import_git_blob(repo, mapping, child_path, lookup_object, inv)
106
raise AssertionError("Unknown blob kind, perms=%r." % (mode,))
109
def import_git_objects(repo, mapping, object_iter):
235
existing_children.add(basename)
236
child_path = osutils.pathjoin(path, name)
237
if stat.S_ISDIR(mode):
238
subinvdelta, grandchildmodes, subshamap = import_git_tree(
239
texts, mapping, child_path, child_hexsha, base_inv,
240
file_id, revision_id, parent_invs, shagitmap, lookup_object)
241
invdelta.extend(subinvdelta)
242
child_modes.update(grandchildmodes)
243
shamap.extend(subshamap)
244
elif S_ISGITLINK(mode): # submodule
245
subinvdelta, grandchildmodes, subshamap = import_git_submodule(
246
texts, mapping, child_path, child_hexsha, base_inv,
247
file_id, revision_id, parent_invs, shagitmap, lookup_object)
248
invdelta.extend(subinvdelta)
249
child_modes.update(grandchildmodes)
250
shamap.extend(subshamap)
252
subinvdelta, subshamap = import_git_blob(texts, mapping,
253
child_path, child_hexsha, base_inv, file_id, revision_id,
254
parent_invs, shagitmap, lookup_object,
255
mode_is_executable(mode), stat.S_ISLNK(mode))
256
invdelta.extend(subinvdelta)
257
shamap.extend(subshamap)
258
if mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
259
stat.S_IFLNK, DEFAULT_FILE_MODE|0111):
260
child_modes[child_path] = mode
261
# Remove any children that have disappeared
262
invdelta.extend(remove_disappeared_children(base_inv, base_ie, existing_children))
263
shamap.append((hexsha, "tree", (file_id, revision_id)))
264
return invdelta, child_modes, shamap
267
def import_git_objects(repo, mapping, object_iter, target_git_object_retriever,
110
269
"""Import a set of git objects into a bzr repository.
112
:param repo: Bazaar repository
271
:param repo: Target Bazaar repository
113
272
:param mapping: Mapping to use
114
273
:param object_iter: Iterator over Git objects.
275
def lookup_object(sha):
277
return object_iter[sha]
279
return target_git_object_retriever[sha]
116
280
# TODO: a more (memory-)efficient implementation of this
118
for o in object_iter:
286
parent_invs_cache = LRUCache(50)
121
287
# Find and convert commit objects
122
for o in objects.iterkeys():
290
pb.update("finding revisions to fetch", len(graph), None)
292
assert isinstance(head, str)
294
o = lookup_object(head)
123
297
if isinstance(o, Commit):
124
298
rev = mapping.import_commit(o)
125
root_trees[rev] = objects[o.tree_sha]
299
if repo.has_revision(rev.revision_id):
301
squash_revision(repo, rev)
302
root_trees[rev.revision_id] = o.tree
303
revisions[rev.revision_id] = rev
304
graph.append((rev.revision_id, rev.parent_ids))
305
target_git_object_retriever._idmap.add_entry(o.id, "commit",
306
(rev.revision_id, o.tree))
307
heads.extend([p for p in o.parents if p not in checked])
308
elif isinstance(o, Tag):
309
heads.append(o.object[1])
311
trace.warning("Unable to import head object %r" % o)
313
# Order the revisions
126
314
# Create the inventory objects
127
for rev, root_tree in root_trees.iteritems():
315
for i, revid in enumerate(topo_sort(graph)):
317
pb.update("fetching revisions", i, len(graph))
318
rev = revisions[revid]
128
319
# We have to do this here, since we have to walk the tree and
129
# we need to make sure to import the blobs / trees with the riht
320
# we need to make sure to import the blobs / trees with the right
130
321
# path; this may involve adding them more than once.
132
def lookup_object(sha):
135
return reconstruct_git_object(repo, mapping, sha)
136
import_git_tree(repo, mapping, "", tree, inv, lookup_object)
137
repo.add_revision(rev.revision_id, rev, inv)
140
def reconstruct_git_commit(repo, rev):
141
raise NotImplementedError(self.reconstruct_git_commit)
144
def reconstruct_git_object(repo, mapping, sha):
146
revid = mapping.revision_id_foreign_to_bzr(sha)
148
rev = repo.get_revision(revid)
149
except NoSuchRevision:
152
return reconstruct_git_commit(rev)
156
raise KeyError("No such object %s" % sha)
323
for parent_id in rev.parent_ids:
325
parent_invs.append(parent_invs_cache[parent_id])
327
parent_inv = repo.get_inventory(parent_id)
328
parent_invs.append(parent_inv)
329
parent_invs_cache[parent_id] = parent_inv
330
if parent_invs == []:
331
base_inv = Inventory(root_id=None)
333
base_inv = parent_invs[0]
334
inv_delta, unusual_modes, shamap = import_git_tree(repo.texts,
335
mapping, "", root_trees[revid], base_inv, None, revid,
336
parent_invs, target_git_object_retriever._idmap, lookup_object)
337
target_git_object_retriever._idmap.add_entries(shamap)
338
if unusual_modes != {}:
339
for path, mode in unusual_modes.iteritems():
340
warn_unusual_mode(rev.foreign_revid, path, mode)
341
mapping.import_unusual_file_modes(rev, unusual_modes)
343
basis_id = rev.parent_ids[0]
345
basis_id = NULL_REVISION
346
rev.inventory_sha1, inv = repo.add_inventory_by_delta(basis_id,
347
inv_delta, rev.revision_id, rev.parent_ids)
348
parent_invs_cache[rev.revision_id] = inv
349
repo.add_revision(rev.revision_id, rev)
350
if "verify" in debug.debug_flags:
351
new_unusual_modes = mapping.export_unusual_file_modes(rev)
352
if new_unusual_modes != unusual_modes:
353
raise AssertionError("unusual modes don't match: %r != %r" % (unusual_modes, new_unusual_modes))
354
objs = inventory_to_tree_and_blobs(inv, repo.texts, mapping, unusual_modes)
355
for sha1, newobj, path in objs:
356
assert path is not None
357
oldobj = tree_lookup_path(lookup_object, root_trees[revid], path)
359
raise AssertionError("%r != %r in %s" % (oldobj, newobj, path))
361
target_git_object_retriever._idmap.commit()
159
364
class InterGitRepository(InterRepository):
161
_matching_repo_format = GitFormat()
366
_matching_repo_format = GitRepositoryFormat()
164
369
def _get_repo_format_to_test():
168
373
"""See InterRepository.copy_content."""
169
374
self.fetch(revision_id, pb, find_ghosts=False)
171
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
376
def fetch(self, revision_id=None, pb=None, find_ghosts=False, mapping=None,
378
self.fetch_refs(revision_id=revision_id, pb=pb, find_ghosts=find_ghosts,
379
mapping=mapping, fetch_spec=fetch_spec)
382
class InterGitNonGitRepository(InterGitRepository):
383
"""Base InterRepository that copies revisions from a Git into a non-Git
386
def fetch_refs(self, revision_id=None, pb=None, find_ghosts=False,
387
mapping=None, fetch_spec=None):
173
388
if mapping is None:
174
389
mapping = self.source.get_mapping()
177
pb.note("git: %s" % text)
179
info("git: %s" % text)
180
def determine_wants(heads):
181
if revision_id is None:
184
ret = [mapping.revision_id_bzr_to_foreign(revision_id)]
390
if revision_id is not None:
391
interesting_heads = [revision_id]
392
elif fetch_spec is not None:
393
interesting_heads = fetch_spec.heads
395
interesting_heads = None
397
def determine_wants(refs):
399
if interesting_heads is None:
400
ret = [sha for (ref, sha) in refs.iteritems() if not ref.endswith("^{}")]
402
ret = [mapping.revision_id_bzr_to_foreign(revid)[0] for revid in interesting_heads if revid not in (None, NULL_REVISION)]
185
403
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
186
graph_walker = BzrFetchGraphWalker(self.target, mapping)
404
self.fetch_objects(determine_wants, mapping, pb)
408
_GIT_PROGRESS_RE = re.compile(r"(.*?): +(\d+)% \((\d+)/(\d+)\)")
409
def report_git_progress(pb, text):
410
text = text.rstrip("\r\n")
411
g = _GIT_PROGRESS_RE.match(text)
413
(text, pct, current, total) = g.groups()
414
pb.update(text, int(current), int(total))
416
pb.update(text, 0, 0)
419
class InterRemoteGitNonGitRepository(InterGitNonGitRepository):
420
"""InterRepository that copies revisions from a remote Git into a non-Git
423
def fetch_objects(self, determine_wants, mapping, pb=None):
425
report_git_progress(pb, text)
426
store = BazaarObjectStore(self.target, mapping)
187
427
self.target.lock_write()
189
import_git_objects(self.target, mapping,
190
self.source.fetch_objects(determine_wants, graph_walker,
429
# FIXME: This should be more efficient
430
heads = self.target.get_graph().heads(self.target.all_revision_ids())
431
graph_walker = store.get_graph_walker(
432
[store._lookup_revision_sha1(head) for head in heads])
435
def record_determine_wants(heads):
436
wants = determine_wants(heads)
437
recorded_wants.extend(wants)
442
create_pb = pb = ui.ui_factory.nested_progress_bar()
444
self.target.start_write_group()
446
objects_iter = self.source.fetch_objects(
447
record_determine_wants, graph_walker,
448
store.get_raw, progress)
449
import_git_objects(self.target, mapping, objects_iter,
450
store, recorded_wants, pb)
452
self.target.commit_write_group()
193
457
self.target.unlock()