13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib import osutils
18
from bzrlib.errors import InvalidRevisionId
19
from bzrlib.inventory import Inventory
20
from bzrlib.repository import InterRepository
21
from bzrlib.trace import info
23
from bzrlib.plugins.git import git
24
from bzrlib.plugins.git.repository import LocalGitRepository, GitRepository, GitFormat
25
from bzrlib.plugins.git.remote import RemoteGitRepository
27
from dulwich.objects import Commit
29
from cStringIO import StringIO
32
class BzrFetchGraphWalker(object):
34
def __init__(self, repository, mapping):
35
self.repository = repository
36
self.mapping = mapping
38
self.heads = set(repository.all_revision_ids())
42
revid = self.mapping.revision_id_foreign_to_bzr(sha)
45
def remove(self, revid):
48
self.heads.remove(revid)
49
if revid in self.parents:
50
for p in self.parents[revid]:
55
ret = self.heads.pop()
56
ps = self.repository.get_parent_map([ret])[ret]
57
self.parents[ret] = ps
58
self.heads.update([p for p in ps if not p in self.done])
61
return self.mapping.revision_id_bzr_to_foreign(ret)
62
except InvalidRevisionId:
67
def import_git_blob(repo, mapping, path, blob):
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Fetching from git into bzr."""
19
from __future__ import absolute_import
21
from dulwich.objects import (
29
from dulwich.object_store import (
41
from ..errors import (
44
from ..bzr.inventory import (
50
from ..revision import (
53
from ..bzr.inventorytree import InventoryRevisionTree
54
from ..sixish import text_type
55
from ..testament import (
61
from ..bzr.versionedfile import (
62
ChunkedContentFactory,
65
from .mapping import (
71
from .object_store import (
77
def import_git_blob(texts, mapping, path, name, hexshas,
78
base_bzr_tree, parent_id, revision_id,
79
parent_bzr_trees, lookup_object, modes, store_updater,
68
81
"""Import a git blob object into a bzr repository.
70
:param repo: bzr repository
83
:param texts: VersionedFiles to add to
71
84
:param path: Path in the tree
72
85
:param blob: A git blob
74
file_id = mapping.generate_file_id(path)
75
repo.texts.add_lines((file_id, blob.id),
77
osutils.split_lines(blob.data))
78
inv.add_path(path, "file", file_id)
81
def import_git_tree(repo, mapping, path, tree, inv, lookup_object):
86
:return: Inventory delta for this file
88
if not isinstance(path, bytes):
90
decoded_path = path.decode('utf-8')
91
(base_mode, mode) = modes
92
(base_hexsha, hexsha) = hexshas
93
if mapping.is_special_file(path):
95
if base_hexsha == hexsha and base_mode == mode:
96
# If nothing has changed since the base revision, we're done
98
file_id = lookup_file_id(decoded_path)
99
if stat.S_ISLNK(mode):
103
ie = cls(file_id, name.decode("utf-8"), parent_id)
104
if ie.kind == "file":
105
ie.executable = mode_is_executable(mode)
106
if base_hexsha == hexsha and mode_kind(base_mode) == mode_kind(mode):
107
base_exec = base_bzr_tree.is_executable(decoded_path)
108
if ie.kind == "symlink":
109
ie.symlink_target = base_bzr_tree.get_symlink_target(decoded_path)
111
ie.text_size = base_bzr_tree.get_file_size(decoded_path)
112
ie.text_sha1 = base_bzr_tree.get_file_sha1(decoded_path)
113
if ie.kind == "symlink" or ie.executable == base_exec:
114
ie.revision = base_bzr_tree.get_file_revision(decoded_path)
116
blob = lookup_object(hexsha)
118
blob = lookup_object(hexsha)
119
if ie.kind == "symlink":
121
ie.symlink_target = blob.data.decode("utf-8")
123
ie.text_size = sum(map(len, blob.chunked))
124
ie.text_sha1 = osutils.sha_strings(blob.chunked)
125
# Check what revision we should store
127
for ptree in parent_bzr_trees:
129
ppath = ptree.id2path(file_id)
130
except errors.NoSuchId:
132
pkind = ptree.kind(ppath, file_id)
134
and ((pkind == "symlink" and
135
ptree.get_symlink_target(ppath, file_id) ==
136
ie.symlink_target) or
138
ptree.get_file_sha1(ppath, file_id) == ie.text_sha1 and
139
ptree.is_executable(ppath, file_id) == ie.executable))):
140
# found a revision in one of the parents to use
141
ie.revision = ptree.get_file_revision(ppath, file_id)
143
parent_key = (file_id, ptree.get_file_revision(ppath, file_id))
144
if parent_key not in parent_keys:
145
parent_keys.append(parent_key)
146
if ie.revision is None:
147
# Need to store a new revision
148
ie.revision = revision_id
149
if ie.revision is None:
150
raise ValueError("no file revision set")
151
if ie.kind == 'symlink':
154
chunks = blob.chunked
155
texts.insert_record_stream([
156
ChunkedContentFactory((file_id, ie.revision),
157
tuple(parent_keys), ie.text_sha1, chunks)])
159
if base_hexsha is not None:
160
old_path = decoded_path # Renames are not supported yet
161
if stat.S_ISDIR(base_mode):
162
invdelta.extend(remove_disappeared_children(
163
base_bzr_tree, old_path, lookup_object(base_hexsha), [],
167
invdelta.append((old_path, decoded_path, file_id, ie))
168
if base_hexsha != hexsha:
169
store_updater.add_object(blob, (ie.file_id, ie.revision), path)
173
class SubmodulesRequireSubtrees(BzrError):
174
_fmt = ("The repository you are fetching from contains submodules, "
175
"which require a Bazaar format that supports tree references.")
179
def import_git_submodule(texts, mapping, path, name, hexshas,
180
base_bzr_tree, parent_id, revision_id,
181
parent_bzr_trees, lookup_object,
182
modes, store_updater, lookup_file_id):
183
"""Import a git submodule."""
184
(base_hexsha, hexsha) = hexshas
185
(base_mode, mode) = modes
186
if base_hexsha == hexsha and base_mode == mode:
188
file_id = lookup_file_id(path)
190
ie = TreeReference(file_id, name.decode("utf-8"), parent_id)
191
ie.revision = revision_id
192
if base_hexsha is not None:
193
old_path = path.decode("utf-8") # Renames are not supported yet
194
if stat.S_ISDIR(base_mode):
195
invdelta.extend(remove_disappeared_children(
196
base_bzr_tree, old_path, lookup_object(base_hexsha), [],
200
ie.reference_revision = mapping.revision_id_foreign_to_bzr(hexsha)
201
texts.insert_record_stream([
202
ChunkedContentFactory((file_id, ie.revision), (), None, [])])
203
invdelta.append((old_path, path, file_id, ie))
207
def remove_disappeared_children(base_bzr_tree, path, base_tree,
208
existing_children, lookup_object):
209
"""Generate an inventory delta for removed children.
211
:param base_bzr_tree: Base bzr tree against which to generate the
213
:param path: Path to process (unicode)
214
:param base_tree: Git Tree base object
215
:param existing_children: Children that still exist
216
:param lookup_object: Lookup a git object by its SHA1
217
:return: Inventory delta, as list
219
if not isinstance(path, text_type):
220
raise TypeError(path)
222
for name, mode, hexsha in base_tree.iteritems():
223
if name in existing_children:
225
c_path = posixpath.join(path, name.decode("utf-8"))
226
file_id = base_bzr_tree.path2id(c_path)
228
raise TypeError(file_id)
229
ret.append((c_path, None, file_id, None))
230
if stat.S_ISDIR(mode):
231
ret.extend(remove_disappeared_children(
232
base_bzr_tree, c_path, lookup_object(hexsha), [],
237
def import_git_tree(texts, mapping, path, name, hexshas,
238
base_bzr_tree, parent_id, revision_id, parent_bzr_trees,
239
lookup_object, modes, store_updater,
240
lookup_file_id, allow_submodules=False):
82
241
"""Import a git tree object into a bzr repository.
84
:param repo: A Bzr repository object
85
:param path: Path in the tree
243
:param texts: VersionedFiles object to add to
244
:param path: Path in the tree (str)
245
:param name: Name of the tree (str)
86
246
:param tree: A git tree object
87
:param inv: Inventory object
247
:param base_bzr_tree: Base inventory against which to return inventory
249
:return: Inventory delta for this subtree
89
file_id = mapping.generate_file_id(path)
90
repo.texts.add_lines((file_id, tree.id),
93
inv.add_path(path, "directory", file_id)
94
for mode, name, hexsha in tree.entries():
95
entry_kind = (mode & 0700000) / 0100000
96
basename = name.decode("utf-8")
100
child_path = urlutils.join(path, name)
102
import_git_tree(repo, mapping, child_path, lookup_object, inv)
103
elif entry_kind == 1:
104
import_git_blob(repo, mapping, child_path, lookup_object, inv)
106
raise AssertionError("Unknown blob kind, perms=%r." % (mode,))
109
def import_git_objects(repo, mapping, object_iter):
251
(base_hexsha, hexsha) = hexshas
252
(base_mode, mode) = modes
253
if not isinstance(path, bytes):
254
raise TypeError(path)
255
if not isinstance(name, bytes):
256
raise TypeError(name)
257
if base_hexsha == hexsha and base_mode == mode:
258
# If nothing has changed since the base revision, we're done
261
file_id = lookup_file_id(osutils.safe_unicode(path))
262
# We just have to hope this is indeed utf-8:
263
ie = InventoryDirectory(file_id, name.decode("utf-8"), parent_id)
264
tree = lookup_object(hexsha)
265
if base_hexsha is None:
267
old_path = None # Newly appeared here
269
base_tree = lookup_object(base_hexsha)
270
old_path = path.decode("utf-8") # Renames aren't supported yet
271
new_path = path.decode("utf-8")
272
if base_tree is None or type(base_tree) is not Tree:
273
ie.revision = revision_id
274
invdelta.append((old_path, new_path, ie.file_id, ie))
275
texts.insert_record_stream([
276
ChunkedContentFactory((ie.file_id, ie.revision), (), None, [])])
277
# Remember for next time
278
existing_children = set()
280
for name, child_mode, child_hexsha in tree.iteritems():
281
existing_children.add(name)
282
child_path = posixpath.join(path, name)
283
if type(base_tree) is Tree:
285
child_base_mode, child_base_hexsha = base_tree[name]
287
child_base_hexsha = None
290
child_base_hexsha = None
292
if stat.S_ISDIR(child_mode):
293
subinvdelta, grandchildmodes = import_git_tree(
294
texts, mapping, child_path, name,
295
(child_base_hexsha, child_hexsha), base_bzr_tree, file_id,
296
revision_id, parent_bzr_trees, lookup_object,
297
(child_base_mode, child_mode), store_updater, lookup_file_id,
298
allow_submodules=allow_submodules)
299
elif S_ISGITLINK(child_mode): # submodule
300
if not allow_submodules:
301
raise SubmodulesRequireSubtrees()
302
subinvdelta, grandchildmodes = import_git_submodule(
303
texts, mapping, child_path, name,
304
(child_base_hexsha, child_hexsha),
305
base_bzr_tree, file_id, revision_id, parent_bzr_trees,
306
lookup_object, (child_base_mode, child_mode), store_updater,
309
if not mapping.is_special_file(name):
310
subinvdelta = import_git_blob(
311
texts, mapping, child_path, name,
312
(child_base_hexsha, child_hexsha), base_bzr_tree, file_id,
313
revision_id, parent_bzr_trees, lookup_object,
314
(child_base_mode, child_mode), store_updater,
319
child_modes.update(grandchildmodes)
320
invdelta.extend(subinvdelta)
321
if child_mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
322
stat.S_IFLNK, DEFAULT_FILE_MODE | 0o111,
324
child_modes[child_path] = child_mode
325
# Remove any children that have disappeared
326
if base_tree is not None and type(base_tree) is Tree:
327
invdelta.extend(remove_disappeared_children(
328
base_bzr_tree, old_path, base_tree, existing_children,
330
store_updater.add_object(tree, (file_id, revision_id), path)
331
return invdelta, child_modes
334
def verify_commit_reconstruction(target_git_object_retriever, lookup_object,
335
o, rev, ret_tree, parent_trees, mapping,
336
unusual_modes, verifiers):
337
new_unusual_modes = mapping.export_unusual_file_modes(rev)
338
if new_unusual_modes != unusual_modes:
339
raise AssertionError("unusual modes don't match: %r != %r" % (
340
unusual_modes, new_unusual_modes))
341
# Verify that we can reconstruct the commit properly
342
rec_o = target_git_object_retriever._reconstruct_commit(rev, o.tree, True,
345
raise AssertionError("Reconstructed commit differs: %r != %r" % (
349
for path, obj, ie in _tree_to_objects(
350
ret_tree, parent_trees, target_git_object_retriever._cache.idmap,
351
unusual_modes, mapping.BZR_DUMMY_FILE):
352
old_obj_id = tree_lookup_path(lookup_object, o.tree, path)[1]
354
if obj.id != old_obj_id:
355
diff.append((path, lookup_object(old_obj_id), obj))
356
for (path, old_obj, new_obj) in diff:
357
while (old_obj.type_name == "tree"
358
and new_obj.type_name == "tree"
359
and sorted(old_obj) == sorted(new_obj)):
361
if old_obj[name][0] != new_obj[name][0]:
362
raise AssertionError(
363
"Modes for %s differ: %o != %o" %
364
(path, old_obj[name][0], new_obj[name][0]))
365
if old_obj[name][1] != new_obj[name][1]:
366
# Found a differing child, delve deeper
367
path = posixpath.join(path, name)
368
old_obj = lookup_object(old_obj[name][1])
369
new_obj = new_objs[path]
371
raise AssertionError(
372
"objects differ for %s: %r != %r" % (path, old_obj, new_obj))
375
def ensure_inventories_in_repo(repo, trees):
376
real_inv_vf = repo.inventories.without_fallbacks()
378
revid = t.get_revision_id()
379
if not real_inv_vf.get_parent_map([(revid, )]):
380
repo.add_inventory(revid, t.root_inventory, t.get_parent_ids())
383
def import_git_commit(repo, mapping, head, lookup_object,
384
target_git_object_retriever, trees_cache):
385
o = lookup_object(head)
386
# Note that this uses mapping.revision_id_foreign_to_bzr. If the parents
387
# were bzr roundtripped revisions they would be specified in the
389
rev, roundtrip_revid, verifiers = mapping.import_commit(
390
o, mapping.revision_id_foreign_to_bzr)
391
if roundtrip_revid is not None:
392
original_revid = rev.revision_id
393
rev.revision_id = roundtrip_revid
394
# We have to do this here, since we have to walk the tree and
395
# we need to make sure to import the blobs / trees with the right
396
# path; this may involve adding them more than once.
397
parent_trees = trees_cache.revision_trees(rev.parent_ids)
398
ensure_inventories_in_repo(repo, parent_trees)
399
if parent_trees == []:
400
base_bzr_tree = trees_cache.revision_tree(NULL_REVISION)
404
base_bzr_tree = parent_trees[0]
405
base_tree = lookup_object(o.parents[0]).tree
406
base_mode = stat.S_IFDIR
407
store_updater = target_git_object_retriever._get_updater(rev)
408
tree_supplement = mapping.get_fileid_map(lookup_object, o.tree)
409
inv_delta, unusual_modes = import_git_tree(
410
repo.texts, mapping, b"", b"", (base_tree, o.tree), base_bzr_tree,
411
None, rev.revision_id, parent_trees, lookup_object,
412
(base_mode, stat.S_IFDIR), store_updater,
413
tree_supplement.lookup_file_id,
414
allow_submodules=repo._format.supports_tree_reference)
415
if unusual_modes != {}:
416
for path, mode in unusual_modes.iteritems():
417
warn_unusual_mode(rev.foreign_revid, path, mode)
418
mapping.import_unusual_file_modes(rev, unusual_modes)
420
basis_id = rev.parent_ids[0]
422
basis_id = NULL_REVISION
423
base_bzr_inventory = None
425
base_bzr_inventory = base_bzr_tree.root_inventory
426
rev.inventory_sha1, inv = repo.add_inventory_by_delta(
427
basis_id, inv_delta, rev.revision_id, rev.parent_ids,
429
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
431
if verifiers and roundtrip_revid is not None:
432
testament = StrictTestament3(rev, ret_tree)
433
calculated_verifiers = {"testament3-sha1": testament.as_sha1()}
434
if calculated_verifiers != verifiers:
435
trace.mutter("Testament SHA1 %r for %r did not match %r.",
436
calculated_verifiers["testament3-sha1"],
437
rev.revision_id, verifiers["testament3-sha1"])
438
rev.revision_id = original_revid
439
rev.inventory_sha1, inv = repo.add_inventory_by_delta(
440
basis_id, inv_delta, rev.revision_id, rev.parent_ids,
442
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
444
calculated_verifiers = {}
445
store_updater.add_object(o, calculated_verifiers, None)
446
store_updater.finish()
447
trees_cache.add(ret_tree)
448
repo.add_revision(rev.revision_id, rev)
449
if "verify" in debug.debug_flags:
450
verify_commit_reconstruction(
451
target_git_object_retriever, lookup_object, o, rev, ret_tree,
452
parent_trees, mapping, unusual_modes, verifiers)
455
def import_git_objects(repo, mapping, object_iter,
456
target_git_object_retriever, heads, pb=None,
110
458
"""Import a set of git objects into a bzr repository.
112
:param repo: Bazaar repository
460
:param repo: Target Bazaar repository
113
461
:param mapping: Mapping to use
114
462
:param object_iter: Iterator over Git objects.
463
:return: Tuple with pack hints and last imported revision id
116
# TODO: a more (memory-)efficient implementation of this
118
for o in object_iter:
465
def lookup_object(sha):
467
return object_iter[sha]
469
return target_git_object_retriever[sha]
472
heads = list(set(heads))
473
trees_cache = LRUTreeCache(repo)
121
474
# Find and convert commit objects
122
for o in objects.iterkeys():
477
pb.update("finding revisions to fetch", len(graph), None)
481
if not isinstance(head, bytes):
482
raise TypeError(head)
484
o = lookup_object(head)
123
487
if isinstance(o, Commit):
124
rev = mapping.import_commit(o)
125
root_trees[rev] = objects[o.tree_sha]
488
rev, roundtrip_revid, verifiers = mapping.import_commit(
489
o, mapping.revision_id_foreign_to_bzr)
490
if (repo.has_revision(rev.revision_id)
491
or (roundtrip_revid and
492
repo.has_revision(roundtrip_revid))):
494
graph.append((o.id, o.parents))
495
heads.extend([p for p in o.parents if p not in checked])
496
elif isinstance(o, Tag):
497
if o.object[1] not in checked:
498
heads.append(o.object[1])
500
trace.warning("Unable to import head object %r" % o)
503
# Order the revisions
126
504
# Create the inventory objects
127
for rev, root_tree in root_trees.iteritems():
128
# We have to do this here, since we have to walk the tree and
129
# we need to make sure to import the blobs / trees with the riht
130
# path; this may involve adding them more than once.
132
def lookup_object(sha):
135
return reconstruct_git_object(repo, mapping, sha)
136
import_git_tree(repo, mapping, "", tree, inv, lookup_object)
137
repo.add_revision(rev.revision_id, rev, inv)
140
def reconstruct_git_commit(repo, rev):
141
raise NotImplementedError(self.reconstruct_git_commit)
144
def reconstruct_git_object(repo, mapping, sha):
146
revid = mapping.revision_id_foreign_to_bzr(sha)
148
rev = repo.get_revision(revid)
149
except NoSuchRevision:
152
return reconstruct_git_commit(rev)
156
raise KeyError("No such object %s" % sha)
159
class InterGitRepository(InterRepository):
161
_matching_repo_format = GitFormat()
164
def _get_repo_format_to_test():
167
def copy_content(self, revision_id=None, pb=None):
168
"""See InterRepository.copy_content."""
169
self.fetch(revision_id, pb, find_ghosts=False)
171
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
174
mapping = self.source.get_mapping()
177
pb.note("git: %s" % text)
179
info("git: %s" % text)
180
def determine_wants(heads):
181
if revision_id is None:
184
ret = [mapping.revision_id_bzr_to_foreign(revision_id)]
185
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
186
graph_walker = BzrFetchGraphWalker(self.target, mapping)
187
self.target.lock_write()
506
revision_ids = topo_sort(graph)
508
if limit is not None:
509
revision_ids = revision_ids[:limit]
511
for offset in range(0, len(revision_ids), batch_size):
512
target_git_object_retriever.start_write_group()
189
import_git_objects(self.target, mapping,
190
self.source.fetch_objects(determine_wants, graph_walker,
196
def is_compatible(source, target):
197
"""Be compatible with GitRepository."""
198
# FIXME: Also check target uses VersionedFile
199
return (isinstance(source, LocalGitRepository) and
200
target.supports_rich_root())
514
repo.start_write_group()
516
for i, head in enumerate(
517
revision_ids[offset:offset + batch_size]):
519
pb.update("fetching revisions", offset + i,
521
import_git_commit(repo, mapping, head, lookup_object,
522
target_git_object_retriever, trees_cache)
524
except BaseException:
525
repo.abort_write_group()
528
hint = repo.commit_write_group()
530
pack_hints.extend(hint)
531
except BaseException:
532
target_git_object_retriever.abort_write_group()
535
target_git_object_retriever.commit_write_group()
536
return pack_hints, last_imported
539
class DetermineWantsRecorder(object):
541
def __init__(self, actual):
544
self.remote_refs = {}
546
def __call__(self, refs):
547
if type(refs) is not dict:
548
raise TypeError(refs)
549
self.remote_refs = refs
550
self.wants = self.actual(refs)