13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib import osutils
18
from bzrlib.errors import InvalidRevisionId
19
from bzrlib.inventory import Inventory
20
from bzrlib.repository import InterRepository
21
from bzrlib.trace import info
23
from bzrlib.plugins.git import git
24
from bzrlib.plugins.git.repository import LocalGitRepository, GitRepository, GitFormat
25
from bzrlib.plugins.git.remote import RemoteGitRepository
27
from dulwich.objects import Commit
29
from cStringIO import StringIO
32
class BzrFetchGraphWalker(object):
34
def __init__(self, repository, mapping):
35
self.repository = repository
36
self.mapping = mapping
38
self.heads = set(repository.all_revision_ids())
42
revid = self.mapping.revision_id_foreign_to_bzr(sha)
45
def remove(self, revid):
48
self.heads.remove(revid)
49
if revid in self.parents:
50
for p in self.parents[revid]:
55
ret = self.heads.pop()
56
ps = self.repository.get_parent_map([ret])[ret]
57
self.parents[ret] = ps
58
self.heads.update([p for p in ps if not p in self.done])
61
return self.mapping.revision_id_bzr_to_foreign(ret)
62
except InvalidRevisionId:
67
def import_git_blob(repo, mapping, path, blob):
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Fetching from git into bzr."""
19
from __future__ import absolute_import
21
from dulwich.objects import (
29
from dulwich.object_store import (
41
from ..errors import (
44
from ..bzr.inventory import (
50
from ..revision import (
53
from ..bzr.inventorytree import InventoryRevisionTree
54
from ..sixish import text_type
55
from ..bzr.testament import (
58
from ..tree import InterTree
62
from ..bzr.versionedfile import (
63
ChunkedContentFactory,
66
from .mapping import (
72
from .object_store import (
78
def import_git_blob(texts, mapping, path, name, hexshas,
79
base_bzr_tree, parent_id, revision_id,
80
parent_bzr_trees, lookup_object, modes, store_updater,
68
82
"""Import a git blob object into a bzr repository.
70
:param repo: bzr repository
84
:param texts: VersionedFiles to add to
71
85
:param path: Path in the tree
72
86
:param blob: A git blob
74
file_id = mapping.generate_file_id(path)
75
repo.texts.add_lines((file_id, blob.id),
77
osutils.split_lines(blob.data))
78
inv.add_path(path, "file", file_id)
81
def import_git_tree(repo, mapping, path, tree, inv, lookup_object):
87
:return: Inventory delta for this file
89
if not isinstance(path, bytes):
91
decoded_path = path.decode('utf-8')
92
(base_mode, mode) = modes
93
(base_hexsha, hexsha) = hexshas
94
if mapping.is_special_file(path):
96
if base_hexsha == hexsha and base_mode == mode:
97
# If nothing has changed since the base revision, we're done
99
file_id = lookup_file_id(decoded_path)
100
if stat.S_ISLNK(mode):
104
ie = cls(file_id, name.decode("utf-8"), parent_id)
105
if ie.kind == "file":
106
ie.executable = mode_is_executable(mode)
107
if base_hexsha == hexsha and mode_kind(base_mode) == mode_kind(mode):
108
base_exec = base_bzr_tree.is_executable(decoded_path)
109
if ie.kind == "symlink":
110
ie.symlink_target = base_bzr_tree.get_symlink_target(decoded_path)
112
ie.text_size = base_bzr_tree.get_file_size(decoded_path)
113
ie.text_sha1 = base_bzr_tree.get_file_sha1(decoded_path)
114
if ie.kind == "symlink" or ie.executable == base_exec:
115
ie.revision = base_bzr_tree.get_file_revision(decoded_path)
117
blob = lookup_object(hexsha)
119
blob = lookup_object(hexsha)
120
if ie.kind == "symlink":
122
ie.symlink_target = blob.data.decode("utf-8")
124
ie.text_size = sum(map(len, blob.chunked))
125
ie.text_sha1 = osutils.sha_strings(blob.chunked)
126
# Check what revision we should store
128
for ptree in parent_bzr_trees:
129
intertree = InterTree.get(ptree, base_bzr_tree)
131
ppath = intertree.find_source_paths(decoded_path, recurse='none')
132
except errors.NoSuchFile:
136
pkind = ptree.kind(ppath)
137
if (pkind == ie.kind and
138
((pkind == "symlink" and ptree.get_symlink_target(ppath) == ie.symlink_target) or
139
(pkind == "file" and ptree.get_file_sha1(ppath) == ie.text_sha1 and
140
ptree.is_executable(ppath) == ie.executable))):
141
# found a revision in one of the parents to use
142
ie.revision = ptree.get_file_revision(ppath)
144
parent_key = (file_id, ptree.get_file_revision(ppath))
145
if parent_key not in parent_keys:
146
parent_keys.append(parent_key)
147
if ie.revision is None:
148
# Need to store a new revision
149
ie.revision = revision_id
150
if ie.revision is None:
151
raise ValueError("no file revision set")
152
if ie.kind == 'symlink':
155
chunks = blob.chunked
156
texts.insert_record_stream([
157
ChunkedContentFactory((file_id, ie.revision),
158
tuple(parent_keys), ie.text_sha1, chunks)])
160
if base_hexsha is not None:
161
old_path = decoded_path # Renames are not supported yet
162
if stat.S_ISDIR(base_mode):
163
invdelta.extend(remove_disappeared_children(
164
base_bzr_tree, old_path, lookup_object(base_hexsha), [],
168
invdelta.append((old_path, decoded_path, file_id, ie))
169
if base_hexsha != hexsha:
170
store_updater.add_object(blob, (ie.file_id, ie.revision), path)
174
class SubmodulesRequireSubtrees(BzrError):
175
_fmt = ("The repository you are fetching from contains submodules, "
176
"which require a Bazaar format that supports tree references.")
180
def import_git_submodule(texts, mapping, path, name, hexshas,
181
base_bzr_tree, parent_id, revision_id,
182
parent_bzr_trees, lookup_object,
183
modes, store_updater, lookup_file_id):
184
"""Import a git submodule."""
185
(base_hexsha, hexsha) = hexshas
186
(base_mode, mode) = modes
187
if base_hexsha == hexsha and base_mode == mode:
189
path = path.decode('utf-8')
190
file_id = lookup_file_id(path)
192
ie = TreeReference(file_id, name.decode("utf-8"), parent_id)
193
ie.revision = revision_id
194
if base_hexsha is not None:
195
old_path = path # Renames are not supported yet
196
if stat.S_ISDIR(base_mode):
197
invdelta.extend(remove_disappeared_children(
198
base_bzr_tree, old_path, lookup_object(base_hexsha), [],
202
ie.reference_revision = mapping.revision_id_foreign_to_bzr(hexsha)
203
texts.insert_record_stream([
204
ChunkedContentFactory((file_id, ie.revision), (), None, [])])
205
invdelta.append((old_path, path, file_id, ie))
209
def remove_disappeared_children(base_bzr_tree, path, base_tree,
210
existing_children, lookup_object):
211
"""Generate an inventory delta for removed children.
213
:param base_bzr_tree: Base bzr tree against which to generate the
215
:param path: Path to process (unicode)
216
:param base_tree: Git Tree base object
217
:param existing_children: Children that still exist
218
:param lookup_object: Lookup a git object by its SHA1
219
:return: Inventory delta, as list
221
if not isinstance(path, text_type):
222
raise TypeError(path)
224
for name, mode, hexsha in base_tree.iteritems():
225
if name in existing_children:
227
c_path = posixpath.join(path, name.decode("utf-8"))
228
file_id = base_bzr_tree.path2id(c_path)
230
raise TypeError(file_id)
231
ret.append((c_path, None, file_id, None))
232
if stat.S_ISDIR(mode):
233
ret.extend(remove_disappeared_children(
234
base_bzr_tree, c_path, lookup_object(hexsha), [],
239
def import_git_tree(texts, mapping, path, name, hexshas,
240
base_bzr_tree, parent_id, revision_id, parent_bzr_trees,
241
lookup_object, modes, store_updater,
242
lookup_file_id, allow_submodules=False):
82
243
"""Import a git tree object into a bzr repository.
84
:param repo: A Bzr repository object
85
:param path: Path in the tree
245
:param texts: VersionedFiles object to add to
246
:param path: Path in the tree (str)
247
:param name: Name of the tree (str)
86
248
:param tree: A git tree object
87
:param inv: Inventory object
249
:param base_bzr_tree: Base inventory against which to return inventory
251
:return: Inventory delta for this subtree
89
file_id = mapping.generate_file_id(path)
90
repo.texts.add_lines((file_id, tree.id),
93
inv.add_path(path, "directory", file_id)
94
for mode, name, hexsha in tree.entries():
95
entry_kind = (mode & 0700000) / 0100000
96
basename = name.decode("utf-8")
100
child_path = urlutils.join(path, name)
102
import_git_tree(repo, mapping, child_path, lookup_object, inv)
103
elif entry_kind == 1:
104
import_git_blob(repo, mapping, child_path, lookup_object, inv)
106
raise AssertionError("Unknown blob kind, perms=%r." % (mode,))
109
def import_git_objects(repo, mapping, object_iter):
253
(base_hexsha, hexsha) = hexshas
254
(base_mode, mode) = modes
255
if not isinstance(path, bytes):
256
raise TypeError(path)
257
if not isinstance(name, bytes):
258
raise TypeError(name)
259
if base_hexsha == hexsha and base_mode == mode:
260
# If nothing has changed since the base revision, we're done
263
file_id = lookup_file_id(osutils.safe_unicode(path))
264
# We just have to hope this is indeed utf-8:
265
ie = InventoryDirectory(file_id, name.decode("utf-8"), parent_id)
266
tree = lookup_object(hexsha)
267
if base_hexsha is None:
269
old_path = None # Newly appeared here
271
base_tree = lookup_object(base_hexsha)
272
old_path = path.decode("utf-8") # Renames aren't supported yet
273
new_path = path.decode("utf-8")
274
if base_tree is None or type(base_tree) is not Tree:
275
ie.revision = revision_id
276
invdelta.append((old_path, new_path, ie.file_id, ie))
277
texts.insert_record_stream([
278
ChunkedContentFactory((ie.file_id, ie.revision), (), None, [])])
279
# Remember for next time
280
existing_children = set()
282
for name, child_mode, child_hexsha in tree.iteritems():
283
existing_children.add(name)
284
child_path = posixpath.join(path, name)
285
if type(base_tree) is Tree:
287
child_base_mode, child_base_hexsha = base_tree[name]
289
child_base_hexsha = None
292
child_base_hexsha = None
294
if stat.S_ISDIR(child_mode):
295
subinvdelta, grandchildmodes = import_git_tree(
296
texts, mapping, child_path, name,
297
(child_base_hexsha, child_hexsha), base_bzr_tree, file_id,
298
revision_id, parent_bzr_trees, lookup_object,
299
(child_base_mode, child_mode), store_updater, lookup_file_id,
300
allow_submodules=allow_submodules)
301
elif S_ISGITLINK(child_mode): # submodule
302
if not allow_submodules:
303
raise SubmodulesRequireSubtrees()
304
subinvdelta, grandchildmodes = import_git_submodule(
305
texts, mapping, child_path, name,
306
(child_base_hexsha, child_hexsha),
307
base_bzr_tree, file_id, revision_id, parent_bzr_trees,
308
lookup_object, (child_base_mode, child_mode), store_updater,
311
if not mapping.is_special_file(name):
312
subinvdelta = import_git_blob(
313
texts, mapping, child_path, name,
314
(child_base_hexsha, child_hexsha), base_bzr_tree, file_id,
315
revision_id, parent_bzr_trees, lookup_object,
316
(child_base_mode, child_mode), store_updater,
321
child_modes.update(grandchildmodes)
322
invdelta.extend(subinvdelta)
323
if child_mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
324
stat.S_IFLNK, DEFAULT_FILE_MODE | 0o111,
326
child_modes[child_path] = child_mode
327
# Remove any children that have disappeared
328
if base_tree is not None and type(base_tree) is Tree:
329
invdelta.extend(remove_disappeared_children(
330
base_bzr_tree, old_path, base_tree, existing_children,
332
store_updater.add_object(tree, (file_id, revision_id), path)
333
return invdelta, child_modes
336
def verify_commit_reconstruction(target_git_object_retriever, lookup_object,
337
o, rev, ret_tree, parent_trees, mapping,
338
unusual_modes, verifiers):
339
new_unusual_modes = mapping.export_unusual_file_modes(rev)
340
if new_unusual_modes != unusual_modes:
341
raise AssertionError("unusual modes don't match: %r != %r" % (
342
unusual_modes, new_unusual_modes))
343
# Verify that we can reconstruct the commit properly
344
rec_o = target_git_object_retriever._reconstruct_commit(rev, o.tree, True,
347
raise AssertionError("Reconstructed commit differs: %r != %r" % (
351
for path, obj, ie in _tree_to_objects(
352
ret_tree, parent_trees, target_git_object_retriever._cache.idmap,
353
unusual_modes, mapping.BZR_DUMMY_FILE):
354
old_obj_id = tree_lookup_path(lookup_object, o.tree, path)[1]
356
if obj.id != old_obj_id:
357
diff.append((path, lookup_object(old_obj_id), obj))
358
for (path, old_obj, new_obj) in diff:
359
while (old_obj.type_name == "tree"
360
and new_obj.type_name == "tree"
361
and sorted(old_obj) == sorted(new_obj)):
363
if old_obj[name][0] != new_obj[name][0]:
364
raise AssertionError(
365
"Modes for %s differ: %o != %o" %
366
(path, old_obj[name][0], new_obj[name][0]))
367
if old_obj[name][1] != new_obj[name][1]:
368
# Found a differing child, delve deeper
369
path = posixpath.join(path, name)
370
old_obj = lookup_object(old_obj[name][1])
371
new_obj = new_objs[path]
373
raise AssertionError(
374
"objects differ for %s: %r != %r" % (path, old_obj, new_obj))
377
def ensure_inventories_in_repo(repo, trees):
378
real_inv_vf = repo.inventories.without_fallbacks()
380
revid = t.get_revision_id()
381
if not real_inv_vf.get_parent_map([(revid, )]):
382
repo.add_inventory(revid, t.root_inventory, t.get_parent_ids())
385
def import_git_commit(repo, mapping, head, lookup_object,
386
target_git_object_retriever, trees_cache, strict):
387
o = lookup_object(head)
388
# Note that this uses mapping.revision_id_foreign_to_bzr. If the parents
389
# were bzr roundtripped revisions they would be specified in the
391
rev, roundtrip_revid, verifiers = mapping.import_commit(
392
o, mapping.revision_id_foreign_to_bzr, strict)
393
if roundtrip_revid is not None:
394
original_revid = rev.revision_id
395
rev.revision_id = roundtrip_revid
396
# We have to do this here, since we have to walk the tree and
397
# we need to make sure to import the blobs / trees with the right
398
# path; this may involve adding them more than once.
399
parent_trees = trees_cache.revision_trees(rev.parent_ids)
400
ensure_inventories_in_repo(repo, parent_trees)
401
if parent_trees == []:
402
base_bzr_tree = trees_cache.revision_tree(NULL_REVISION)
406
base_bzr_tree = parent_trees[0]
407
base_tree = lookup_object(o.parents[0]).tree
408
base_mode = stat.S_IFDIR
409
store_updater = target_git_object_retriever._get_updater(rev)
410
inv_delta, unusual_modes = import_git_tree(
411
repo.texts, mapping, b"", b"", (base_tree, o.tree), base_bzr_tree,
412
None, rev.revision_id, parent_trees, lookup_object,
413
(base_mode, stat.S_IFDIR), store_updater,
414
mapping.generate_file_id,
415
allow_submodules=repo._format.supports_tree_reference)
416
if unusual_modes != {}:
417
for path, mode in unusual_modes.iteritems():
418
warn_unusual_mode(rev.foreign_revid, path, mode)
419
mapping.import_unusual_file_modes(rev, unusual_modes)
421
basis_id = rev.parent_ids[0]
423
basis_id = NULL_REVISION
424
base_bzr_inventory = None
426
base_bzr_inventory = base_bzr_tree.root_inventory
427
rev.inventory_sha1, inv = repo.add_inventory_by_delta(
428
basis_id, inv_delta, rev.revision_id, rev.parent_ids,
430
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
432
if verifiers and roundtrip_revid is not None:
433
testament = StrictTestament3(rev, ret_tree)
434
calculated_verifiers = {"testament3-sha1": testament.as_sha1()}
435
if calculated_verifiers != verifiers:
436
trace.mutter("Testament SHA1 %r for %r did not match %r.",
437
calculated_verifiers["testament3-sha1"],
438
rev.revision_id, verifiers["testament3-sha1"])
439
rev.revision_id = original_revid
440
rev.inventory_sha1, inv = repo.add_inventory_by_delta(
441
basis_id, inv_delta, rev.revision_id, rev.parent_ids,
443
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
445
calculated_verifiers = {}
446
store_updater.add_object(o, calculated_verifiers, None)
447
store_updater.finish()
448
trees_cache.add(ret_tree)
449
repo.add_revision(rev.revision_id, rev)
450
if "verify" in debug.debug_flags:
451
verify_commit_reconstruction(
452
target_git_object_retriever, lookup_object, o, rev, ret_tree,
453
parent_trees, mapping, unusual_modes, verifiers)
456
def import_git_objects(repo, mapping, object_iter,
457
target_git_object_retriever, heads, pb=None,
110
459
"""Import a set of git objects into a bzr repository.
112
:param repo: Bazaar repository
461
:param repo: Target Bazaar repository
113
462
:param mapping: Mapping to use
114
463
:param object_iter: Iterator over Git objects.
464
:return: Tuple with pack hints and last imported revision id
116
# TODO: a more (memory-)efficient implementation of this
118
for o in object_iter:
466
def lookup_object(sha):
468
return object_iter[sha]
470
return target_git_object_retriever[sha]
473
heads = list(set(heads))
474
trees_cache = LRUTreeCache(repo)
121
475
# Find and convert commit objects
122
for o in objects.iterkeys():
478
pb.update("finding revisions to fetch", len(graph), None)
482
if not isinstance(head, bytes):
483
raise TypeError(head)
485
o = lookup_object(head)
123
488
if isinstance(o, Commit):
124
rev = mapping.import_commit(o)
125
root_trees[rev] = objects[o.tree_sha]
489
rev, roundtrip_revid, verifiers = mapping.import_commit(
490
o, mapping.revision_id_foreign_to_bzr, strict=True)
491
if (repo.has_revision(rev.revision_id)
492
or (roundtrip_revid and
493
repo.has_revision(roundtrip_revid))):
495
graph.append((o.id, o.parents))
496
heads.extend([p for p in o.parents if p not in checked])
497
elif isinstance(o, Tag):
498
if o.object[1] not in checked:
499
heads.append(o.object[1])
501
trace.warning("Unable to import head object %r" % o)
504
# Order the revisions
126
505
# Create the inventory objects
127
for rev, root_tree in root_trees.iteritems():
128
# We have to do this here, since we have to walk the tree and
129
# we need to make sure to import the blobs / trees with the riht
130
# path; this may involve adding them more than once.
132
def lookup_object(sha):
135
return reconstruct_git_object(repo, mapping, sha)
136
import_git_tree(repo, mapping, "", tree, inv, lookup_object)
137
repo.add_revision(rev.revision_id, rev, inv)
140
def reconstruct_git_commit(repo, rev):
141
raise NotImplementedError(self.reconstruct_git_commit)
144
def reconstruct_git_object(repo, mapping, sha):
146
revid = mapping.revision_id_foreign_to_bzr(sha)
148
rev = repo.get_revision(revid)
149
except NoSuchRevision:
152
return reconstruct_git_commit(rev)
156
raise KeyError("No such object %s" % sha)
159
class InterGitRepository(InterRepository):
161
_matching_repo_format = GitFormat()
164
def _get_repo_format_to_test():
167
def copy_content(self, revision_id=None, pb=None):
168
"""See InterRepository.copy_content."""
169
self.fetch(revision_id, pb, find_ghosts=False)
171
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
174
mapping = self.source.get_mapping()
177
pb.note("git: %s" % text)
179
info("git: %s" % text)
180
def determine_wants(heads):
181
if revision_id is None:
184
ret = [mapping.revision_id_bzr_to_foreign(revision_id)]
185
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
186
graph_walker = BzrFetchGraphWalker(self.target, mapping)
187
self.target.lock_write()
507
revision_ids = topo_sort(graph)
509
if limit is not None:
510
revision_ids = revision_ids[:limit]
512
for offset in range(0, len(revision_ids), batch_size):
513
target_git_object_retriever.start_write_group()
189
import_git_objects(self.target, mapping,
190
self.source.fetch_objects(determine_wants, graph_walker,
196
def is_compatible(source, target):
197
"""Be compatible with GitRepository."""
198
# FIXME: Also check target uses VersionedFile
199
return (isinstance(source, LocalGitRepository) and
200
target.supports_rich_root())
515
repo.start_write_group()
517
for i, head in enumerate(
518
revision_ids[offset:offset + batch_size]):
520
pb.update("fetching revisions", offset + i,
522
import_git_commit(repo, mapping, head, lookup_object,
523
target_git_object_retriever, trees_cache,
526
except BaseException:
527
repo.abort_write_group()
530
hint = repo.commit_write_group()
532
pack_hints.extend(hint)
533
except BaseException:
534
target_git_object_retriever.abort_write_group()
537
target_git_object_retriever.commit_write_group()
538
return pack_hints, last_imported
541
class DetermineWantsRecorder(object):
543
def __init__(self, actual):
546
self.remote_refs = {}
548
def __call__(self, refs):
549
if type(refs) is not dict:
550
raise TypeError(refs)
551
self.remote_refs = refs
552
self.wants = self.actual(refs)