13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib import osutils
18
from bzrlib.errors import InvalidRevisionId
19
from bzrlib.inventory import Inventory
20
from bzrlib.repository import InterRepository
21
from bzrlib.trace import info
23
from bzrlib.plugins.git import git
24
from bzrlib.plugins.git.repository import LocalGitRepository, GitRepository, GitFormat
25
from bzrlib.plugins.git.remote import RemoteGitRepository
27
from dulwich.objects import Commit
29
from cStringIO import StringIO
32
class BzrFetchGraphWalker(object):
34
def __init__(self, repository, mapping):
35
self.repository = repository
36
self.mapping = mapping
38
self.heads = set(repository.all_revision_ids())
42
revid = self.mapping.revision_id_foreign_to_bzr(sha)
45
def remove(self, revid):
48
self.heads.remove(revid)
49
if revid in self.parents:
50
for p in self.parents[revid]:
55
ret = self.heads.pop()
56
ps = self.repository.get_parent_map([ret])[ret]
57
self.parents[ret] = ps
58
self.heads.update([p for p in ps if not p in self.done])
61
return self.mapping.revision_id_bzr_to_foreign(ret)
62
except InvalidRevisionId:
67
def import_git_blob(repo, mapping, path, blob):
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Fetching from git into bzr."""
19
from __future__ import absolute_import
21
from dulwich.objects import (
29
from dulwich.object_store import (
41
from ..errors import (
44
from ..bzr.inventory import (
50
from ..revision import (
53
from ..bzr.inventorytree import InventoryRevisionTree
54
from ..sixish import text_type
55
from ..bzr.testament import (
61
from ..bzr.versionedfile import (
62
ChunkedContentFactory,
65
from .mapping import (
71
from .object_store import (
77
def import_git_blob(texts, mapping, path, name, hexshas,
78
base_bzr_tree, parent_id, revision_id,
79
parent_bzr_trees, lookup_object, modes, store_updater,
68
81
"""Import a git blob object into a bzr repository.
70
:param repo: bzr repository
83
:param texts: VersionedFiles to add to
71
84
:param path: Path in the tree
72
85
:param blob: A git blob
74
file_id = mapping.generate_file_id(path)
75
repo.texts.add_lines((file_id, blob.id),
77
osutils.split_lines(blob.data))
78
inv.add_path(path, "file", file_id)
81
def import_git_tree(repo, mapping, path, tree, inv, lookup_object):
86
:return: Inventory delta for this file
88
if not isinstance(path, bytes):
90
decoded_path = path.decode('utf-8')
91
(base_mode, mode) = modes
92
(base_hexsha, hexsha) = hexshas
93
if mapping.is_special_file(path):
95
if base_hexsha == hexsha and base_mode == mode:
96
# If nothing has changed since the base revision, we're done
98
file_id = lookup_file_id(decoded_path)
99
if stat.S_ISLNK(mode):
103
ie = cls(file_id, name.decode("utf-8"), parent_id)
104
if ie.kind == "file":
105
ie.executable = mode_is_executable(mode)
106
if base_hexsha == hexsha and mode_kind(base_mode) == mode_kind(mode):
107
base_exec = base_bzr_tree.is_executable(decoded_path)
108
if ie.kind == "symlink":
109
ie.symlink_target = base_bzr_tree.get_symlink_target(decoded_path)
111
ie.text_size = base_bzr_tree.get_file_size(decoded_path)
112
ie.text_sha1 = base_bzr_tree.get_file_sha1(decoded_path)
113
if ie.kind == "symlink" or ie.executable == base_exec:
114
ie.revision = base_bzr_tree.get_file_revision(decoded_path)
116
blob = lookup_object(hexsha)
118
blob = lookup_object(hexsha)
119
if ie.kind == "symlink":
121
ie.symlink_target = blob.data.decode("utf-8")
123
ie.text_size = sum(map(len, blob.chunked))
124
ie.text_sha1 = osutils.sha_strings(blob.chunked)
125
# Check what revision we should store
127
for ptree in parent_bzr_trees:
129
ppath = ptree.id2path(file_id)
130
except errors.NoSuchId:
132
pkind = ptree.kind(ppath)
133
if (pkind == ie.kind and
134
((pkind == "symlink" and ptree.get_symlink_target(ppath) == ie.symlink_target) or
135
(pkind == "file" and ptree.get_file_sha1(ppath) == ie.text_sha1 and
136
ptree.is_executable(ppath) == ie.executable))):
137
# found a revision in one of the parents to use
138
ie.revision = ptree.get_file_revision(ppath)
140
parent_key = (file_id, ptree.get_file_revision(ppath))
141
if parent_key not in parent_keys:
142
parent_keys.append(parent_key)
143
if ie.revision is None:
144
# Need to store a new revision
145
ie.revision = revision_id
146
if ie.revision is None:
147
raise ValueError("no file revision set")
148
if ie.kind == 'symlink':
151
chunks = blob.chunked
152
texts.insert_record_stream([
153
ChunkedContentFactory((file_id, ie.revision),
154
tuple(parent_keys), ie.text_sha1, chunks)])
156
if base_hexsha is not None:
157
old_path = decoded_path # Renames are not supported yet
158
if stat.S_ISDIR(base_mode):
159
invdelta.extend(remove_disappeared_children(
160
base_bzr_tree, old_path, lookup_object(base_hexsha), [],
164
invdelta.append((old_path, decoded_path, file_id, ie))
165
if base_hexsha != hexsha:
166
store_updater.add_object(blob, (ie.file_id, ie.revision), path)
170
class SubmodulesRequireSubtrees(BzrError):
171
_fmt = ("The repository you are fetching from contains submodules, "
172
"which require a Bazaar format that supports tree references.")
176
def import_git_submodule(texts, mapping, path, name, hexshas,
177
base_bzr_tree, parent_id, revision_id,
178
parent_bzr_trees, lookup_object,
179
modes, store_updater, lookup_file_id):
180
"""Import a git submodule."""
181
(base_hexsha, hexsha) = hexshas
182
(base_mode, mode) = modes
183
if base_hexsha == hexsha and base_mode == mode:
185
file_id = lookup_file_id(path)
187
ie = TreeReference(file_id, name.decode("utf-8"), parent_id)
188
ie.revision = revision_id
189
if base_hexsha is not None:
190
old_path = path.decode("utf-8") # Renames are not supported yet
191
if stat.S_ISDIR(base_mode):
192
invdelta.extend(remove_disappeared_children(
193
base_bzr_tree, old_path, lookup_object(base_hexsha), [],
197
ie.reference_revision = mapping.revision_id_foreign_to_bzr(hexsha)
198
texts.insert_record_stream([
199
ChunkedContentFactory((file_id, ie.revision), (), None, [])])
200
invdelta.append((old_path, path, file_id, ie))
204
def remove_disappeared_children(base_bzr_tree, path, base_tree,
205
existing_children, lookup_object):
206
"""Generate an inventory delta for removed children.
208
:param base_bzr_tree: Base bzr tree against which to generate the
210
:param path: Path to process (unicode)
211
:param base_tree: Git Tree base object
212
:param existing_children: Children that still exist
213
:param lookup_object: Lookup a git object by its SHA1
214
:return: Inventory delta, as list
216
if not isinstance(path, text_type):
217
raise TypeError(path)
219
for name, mode, hexsha in base_tree.iteritems():
220
if name in existing_children:
222
c_path = posixpath.join(path, name.decode("utf-8"))
223
file_id = base_bzr_tree.path2id(c_path)
225
raise TypeError(file_id)
226
ret.append((c_path, None, file_id, None))
227
if stat.S_ISDIR(mode):
228
ret.extend(remove_disappeared_children(
229
base_bzr_tree, c_path, lookup_object(hexsha), [],
234
def import_git_tree(texts, mapping, path, name, hexshas,
235
base_bzr_tree, parent_id, revision_id, parent_bzr_trees,
236
lookup_object, modes, store_updater,
237
lookup_file_id, allow_submodules=False):
82
238
"""Import a git tree object into a bzr repository.
84
:param repo: A Bzr repository object
85
:param path: Path in the tree
240
:param texts: VersionedFiles object to add to
241
:param path: Path in the tree (str)
242
:param name: Name of the tree (str)
86
243
:param tree: A git tree object
87
:param inv: Inventory object
244
:param base_bzr_tree: Base inventory against which to return inventory
246
:return: Inventory delta for this subtree
89
file_id = mapping.generate_file_id(path)
90
repo.texts.add_lines((file_id, tree.id),
93
inv.add_path(path, "directory", file_id)
94
for mode, name, hexsha in tree.entries():
95
entry_kind = (mode & 0700000) / 0100000
96
basename = name.decode("utf-8")
100
child_path = urlutils.join(path, name)
102
import_git_tree(repo, mapping, child_path, lookup_object, inv)
103
elif entry_kind == 1:
104
import_git_blob(repo, mapping, child_path, lookup_object, inv)
106
raise AssertionError("Unknown blob kind, perms=%r." % (mode,))
109
def import_git_objects(repo, mapping, object_iter):
248
(base_hexsha, hexsha) = hexshas
249
(base_mode, mode) = modes
250
if not isinstance(path, bytes):
251
raise TypeError(path)
252
if not isinstance(name, bytes):
253
raise TypeError(name)
254
if base_hexsha == hexsha and base_mode == mode:
255
# If nothing has changed since the base revision, we're done
258
file_id = lookup_file_id(osutils.safe_unicode(path))
259
# We just have to hope this is indeed utf-8:
260
ie = InventoryDirectory(file_id, name.decode("utf-8"), parent_id)
261
tree = lookup_object(hexsha)
262
if base_hexsha is None:
264
old_path = None # Newly appeared here
266
base_tree = lookup_object(base_hexsha)
267
old_path = path.decode("utf-8") # Renames aren't supported yet
268
new_path = path.decode("utf-8")
269
if base_tree is None or type(base_tree) is not Tree:
270
ie.revision = revision_id
271
invdelta.append((old_path, new_path, ie.file_id, ie))
272
texts.insert_record_stream([
273
ChunkedContentFactory((ie.file_id, ie.revision), (), None, [])])
274
# Remember for next time
275
existing_children = set()
277
for name, child_mode, child_hexsha in tree.iteritems():
278
existing_children.add(name)
279
child_path = posixpath.join(path, name)
280
if type(base_tree) is Tree:
282
child_base_mode, child_base_hexsha = base_tree[name]
284
child_base_hexsha = None
287
child_base_hexsha = None
289
if stat.S_ISDIR(child_mode):
290
subinvdelta, grandchildmodes = import_git_tree(
291
texts, mapping, child_path, name,
292
(child_base_hexsha, child_hexsha), base_bzr_tree, file_id,
293
revision_id, parent_bzr_trees, lookup_object,
294
(child_base_mode, child_mode), store_updater, lookup_file_id,
295
allow_submodules=allow_submodules)
296
elif S_ISGITLINK(child_mode): # submodule
297
if not allow_submodules:
298
raise SubmodulesRequireSubtrees()
299
subinvdelta, grandchildmodes = import_git_submodule(
300
texts, mapping, child_path, name,
301
(child_base_hexsha, child_hexsha),
302
base_bzr_tree, file_id, revision_id, parent_bzr_trees,
303
lookup_object, (child_base_mode, child_mode), store_updater,
306
if not mapping.is_special_file(name):
307
subinvdelta = import_git_blob(
308
texts, mapping, child_path, name,
309
(child_base_hexsha, child_hexsha), base_bzr_tree, file_id,
310
revision_id, parent_bzr_trees, lookup_object,
311
(child_base_mode, child_mode), store_updater,
316
child_modes.update(grandchildmodes)
317
invdelta.extend(subinvdelta)
318
if child_mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
319
stat.S_IFLNK, DEFAULT_FILE_MODE | 0o111,
321
child_modes[child_path] = child_mode
322
# Remove any children that have disappeared
323
if base_tree is not None and type(base_tree) is Tree:
324
invdelta.extend(remove_disappeared_children(
325
base_bzr_tree, old_path, base_tree, existing_children,
327
store_updater.add_object(tree, (file_id, revision_id), path)
328
return invdelta, child_modes
331
def verify_commit_reconstruction(target_git_object_retriever, lookup_object,
332
o, rev, ret_tree, parent_trees, mapping,
333
unusual_modes, verifiers):
334
new_unusual_modes = mapping.export_unusual_file_modes(rev)
335
if new_unusual_modes != unusual_modes:
336
raise AssertionError("unusual modes don't match: %r != %r" % (
337
unusual_modes, new_unusual_modes))
338
# Verify that we can reconstruct the commit properly
339
rec_o = target_git_object_retriever._reconstruct_commit(rev, o.tree, True,
342
raise AssertionError("Reconstructed commit differs: %r != %r" % (
346
for path, obj, ie in _tree_to_objects(
347
ret_tree, parent_trees, target_git_object_retriever._cache.idmap,
348
unusual_modes, mapping.BZR_DUMMY_FILE):
349
old_obj_id = tree_lookup_path(lookup_object, o.tree, path)[1]
351
if obj.id != old_obj_id:
352
diff.append((path, lookup_object(old_obj_id), obj))
353
for (path, old_obj, new_obj) in diff:
354
while (old_obj.type_name == "tree"
355
and new_obj.type_name == "tree"
356
and sorted(old_obj) == sorted(new_obj)):
358
if old_obj[name][0] != new_obj[name][0]:
359
raise AssertionError(
360
"Modes for %s differ: %o != %o" %
361
(path, old_obj[name][0], new_obj[name][0]))
362
if old_obj[name][1] != new_obj[name][1]:
363
# Found a differing child, delve deeper
364
path = posixpath.join(path, name)
365
old_obj = lookup_object(old_obj[name][1])
366
new_obj = new_objs[path]
368
raise AssertionError(
369
"objects differ for %s: %r != %r" % (path, old_obj, new_obj))
372
def ensure_inventories_in_repo(repo, trees):
373
real_inv_vf = repo.inventories.without_fallbacks()
375
revid = t.get_revision_id()
376
if not real_inv_vf.get_parent_map([(revid, )]):
377
repo.add_inventory(revid, t.root_inventory, t.get_parent_ids())
380
def import_git_commit(repo, mapping, head, lookup_object,
381
target_git_object_retriever, trees_cache):
382
o = lookup_object(head)
383
# Note that this uses mapping.revision_id_foreign_to_bzr. If the parents
384
# were bzr roundtripped revisions they would be specified in the
386
rev, roundtrip_revid, verifiers = mapping.import_commit(
387
o, mapping.revision_id_foreign_to_bzr)
388
if roundtrip_revid is not None:
389
original_revid = rev.revision_id
390
rev.revision_id = roundtrip_revid
391
# We have to do this here, since we have to walk the tree and
392
# we need to make sure to import the blobs / trees with the right
393
# path; this may involve adding them more than once.
394
parent_trees = trees_cache.revision_trees(rev.parent_ids)
395
ensure_inventories_in_repo(repo, parent_trees)
396
if parent_trees == []:
397
base_bzr_tree = trees_cache.revision_tree(NULL_REVISION)
401
base_bzr_tree = parent_trees[0]
402
base_tree = lookup_object(o.parents[0]).tree
403
base_mode = stat.S_IFDIR
404
store_updater = target_git_object_retriever._get_updater(rev)
405
tree_supplement = mapping.get_fileid_map(lookup_object, o.tree)
406
inv_delta, unusual_modes = import_git_tree(
407
repo.texts, mapping, b"", b"", (base_tree, o.tree), base_bzr_tree,
408
None, rev.revision_id, parent_trees, lookup_object,
409
(base_mode, stat.S_IFDIR), store_updater,
410
tree_supplement.lookup_file_id,
411
allow_submodules=repo._format.supports_tree_reference)
412
if unusual_modes != {}:
413
for path, mode in unusual_modes.iteritems():
414
warn_unusual_mode(rev.foreign_revid, path, mode)
415
mapping.import_unusual_file_modes(rev, unusual_modes)
417
basis_id = rev.parent_ids[0]
419
basis_id = NULL_REVISION
420
base_bzr_inventory = None
422
base_bzr_inventory = base_bzr_tree.root_inventory
423
rev.inventory_sha1, inv = repo.add_inventory_by_delta(
424
basis_id, inv_delta, rev.revision_id, rev.parent_ids,
426
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
428
if verifiers and roundtrip_revid is not None:
429
testament = StrictTestament3(rev, ret_tree)
430
calculated_verifiers = {"testament3-sha1": testament.as_sha1()}
431
if calculated_verifiers != verifiers:
432
trace.mutter("Testament SHA1 %r for %r did not match %r.",
433
calculated_verifiers["testament3-sha1"],
434
rev.revision_id, verifiers["testament3-sha1"])
435
rev.revision_id = original_revid
436
rev.inventory_sha1, inv = repo.add_inventory_by_delta(
437
basis_id, inv_delta, rev.revision_id, rev.parent_ids,
439
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
441
calculated_verifiers = {}
442
store_updater.add_object(o, calculated_verifiers, None)
443
store_updater.finish()
444
trees_cache.add(ret_tree)
445
repo.add_revision(rev.revision_id, rev)
446
if "verify" in debug.debug_flags:
447
verify_commit_reconstruction(
448
target_git_object_retriever, lookup_object, o, rev, ret_tree,
449
parent_trees, mapping, unusual_modes, verifiers)
452
def import_git_objects(repo, mapping, object_iter,
453
target_git_object_retriever, heads, pb=None,
110
455
"""Import a set of git objects into a bzr repository.
112
:param repo: Bazaar repository
457
:param repo: Target Bazaar repository
113
458
:param mapping: Mapping to use
114
459
:param object_iter: Iterator over Git objects.
460
:return: Tuple with pack hints and last imported revision id
116
# TODO: a more (memory-)efficient implementation of this
118
for o in object_iter:
462
def lookup_object(sha):
464
return object_iter[sha]
466
return target_git_object_retriever[sha]
469
heads = list(set(heads))
470
trees_cache = LRUTreeCache(repo)
121
471
# Find and convert commit objects
122
for o in objects.iterkeys():
474
pb.update("finding revisions to fetch", len(graph), None)
478
if not isinstance(head, bytes):
479
raise TypeError(head)
481
o = lookup_object(head)
123
484
if isinstance(o, Commit):
124
rev = mapping.import_commit(o)
125
root_trees[rev] = objects[o.tree_sha]
485
rev, roundtrip_revid, verifiers = mapping.import_commit(
486
o, mapping.revision_id_foreign_to_bzr)
487
if (repo.has_revision(rev.revision_id)
488
or (roundtrip_revid and
489
repo.has_revision(roundtrip_revid))):
491
graph.append((o.id, o.parents))
492
heads.extend([p for p in o.parents if p not in checked])
493
elif isinstance(o, Tag):
494
if o.object[1] not in checked:
495
heads.append(o.object[1])
497
trace.warning("Unable to import head object %r" % o)
500
# Order the revisions
126
501
# Create the inventory objects
127
for rev, root_tree in root_trees.iteritems():
128
# We have to do this here, since we have to walk the tree and
129
# we need to make sure to import the blobs / trees with the riht
130
# path; this may involve adding them more than once.
132
def lookup_object(sha):
135
return reconstruct_git_object(repo, mapping, sha)
136
import_git_tree(repo, mapping, "", tree, inv, lookup_object)
137
repo.add_revision(rev.revision_id, rev, inv)
140
def reconstruct_git_commit(repo, rev):
141
raise NotImplementedError(self.reconstruct_git_commit)
144
def reconstruct_git_object(repo, mapping, sha):
146
revid = mapping.revision_id_foreign_to_bzr(sha)
148
rev = repo.get_revision(revid)
149
except NoSuchRevision:
152
return reconstruct_git_commit(rev)
156
raise KeyError("No such object %s" % sha)
159
class InterGitRepository(InterRepository):
161
_matching_repo_format = GitFormat()
164
def _get_repo_format_to_test():
167
def copy_content(self, revision_id=None, pb=None):
168
"""See InterRepository.copy_content."""
169
self.fetch(revision_id, pb, find_ghosts=False)
171
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
174
mapping = self.source.get_mapping()
177
pb.note("git: %s" % text)
179
info("git: %s" % text)
180
def determine_wants(heads):
181
if revision_id is None:
184
ret = [mapping.revision_id_bzr_to_foreign(revision_id)]
185
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
186
graph_walker = BzrFetchGraphWalker(self.target, mapping)
187
self.target.lock_write()
503
revision_ids = topo_sort(graph)
505
if limit is not None:
506
revision_ids = revision_ids[:limit]
508
for offset in range(0, len(revision_ids), batch_size):
509
target_git_object_retriever.start_write_group()
189
import_git_objects(self.target, mapping,
190
self.source.fetch_objects(determine_wants, graph_walker,
196
def is_compatible(source, target):
197
"""Be compatible with GitRepository."""
198
# FIXME: Also check target uses VersionedFile
199
return (isinstance(source, LocalGitRepository) and
200
target.supports_rich_root())
511
repo.start_write_group()
513
for i, head in enumerate(
514
revision_ids[offset:offset + batch_size]):
516
pb.update("fetching revisions", offset + i,
518
import_git_commit(repo, mapping, head, lookup_object,
519
target_git_object_retriever, trees_cache)
521
except BaseException:
522
repo.abort_write_group()
525
hint = repo.commit_write_group()
527
pack_hints.extend(hint)
528
except BaseException:
529
target_git_object_retriever.abort_write_group()
532
target_git_object_retriever.commit_write_group()
533
return pack_hints, last_imported
536
class DetermineWantsRecorder(object):
538
def __init__(self, actual):
541
self.remote_refs = {}
543
def __call__(self, refs):
544
if type(refs) is not dict:
545
raise TypeError(refs)
546
self.remote_refs = refs
547
self.wants = self.actual(refs)