13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib import osutils
18
from bzrlib.errors import InvalidRevisionId
19
from bzrlib.inventory import Inventory
20
from bzrlib.repository import InterRepository
21
from bzrlib.trace import info
23
from bzrlib.plugins.git import git
24
from bzrlib.plugins.git.repository import LocalGitRepository, GitRepository, GitFormat
25
from bzrlib.plugins.git.remote import RemoteGitRepository
27
from dulwich.objects import Commit
29
from cStringIO import StringIO
32
class BzrFetchGraphWalker(object):
34
def __init__(self, repository, mapping):
35
self.repository = repository
36
self.mapping = mapping
38
self.heads = set(repository.all_revision_ids())
42
revid = self.mapping.revision_id_foreign_to_bzr(sha)
45
def remove(self, revid):
48
self.heads.remove(revid)
49
if revid in self.parents:
50
for p in self.parents[revid]:
55
ret = self.heads.pop()
56
ps = self.repository.get_parent_map([ret])[ret]
57
self.parents[ret] = ps
58
self.heads.update([p for p in ps if not p in self.done])
61
return self.mapping.revision_id_bzr_to_foreign(ret)
62
except InvalidRevisionId:
67
def import_git_blob(repo, mapping, path, blob):
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Fetching from git into bzr."""
19
from __future__ import absolute_import
21
from dulwich.objects import (
29
from dulwich.object_store import (
32
from dulwich.walk import Walker
43
from ...errors import (
46
from ...bzr.inventory import (
52
from ...repository import (
55
from ...revision import (
58
from ...sixish import text_type
59
from ...bzr.inventorytree import InventoryRevisionTree
60
from ...testament import (
63
from ...tsort import (
66
from ...bzr.versionedfile import (
67
ChunkedContentFactory,
70
from .mapping import (
76
from .object_store import (
86
from .repository import (
93
def import_git_blob(texts, mapping, path, name, hexshas,
94
base_bzr_tree, parent_id, revision_id,
95
parent_bzr_trees, lookup_object, modes, store_updater,
68
97
"""Import a git blob object into a bzr repository.
70
:param repo: bzr repository
99
:param texts: VersionedFiles to add to
71
100
:param path: Path in the tree
72
101
:param blob: A git blob
74
file_id = mapping.generate_file_id(path)
75
repo.texts.add_lines((file_id, blob.id),
77
osutils.split_lines(blob.data))
78
inv.add_path(path, "file", file_id)
81
def import_git_tree(repo, mapping, path, tree, inv, lookup_object):
102
:return: Inventory delta for this file
104
(base_mode, mode) = modes
105
(base_hexsha, hexsha) = hexshas
106
if mapping.is_special_file(path):
108
if base_hexsha == hexsha and base_mode == mode:
109
# If nothing has changed since the base revision, we're done
111
file_id = lookup_file_id(osutils.safe_unicode(path))
112
if stat.S_ISLNK(mode):
116
ie = cls(file_id, name.decode("utf-8"), parent_id)
117
if ie.kind == "file":
118
ie.executable = mode_is_executable(mode)
119
if base_hexsha == hexsha and mode_kind(base_mode) == mode_kind(mode):
120
base_exec = base_bzr_tree.is_executable(path)
121
if ie.kind == "symlink":
122
ie.symlink_target = base_bzr_tree.get_symlink_target(path)
124
ie.text_size = base_bzr_tree.get_file_size(path)
125
ie.text_sha1 = base_bzr_tree.get_file_sha1(path)
126
if ie.kind == "symlink" or ie.executable == base_exec:
127
ie.revision = base_bzr_tree.get_file_revision(path)
129
blob = lookup_object(hexsha)
131
blob = lookup_object(hexsha)
132
if ie.kind == "symlink":
134
ie.symlink_target = blob.data.decode("utf-8")
136
ie.text_size = sum(map(len, blob.chunked))
137
ie.text_sha1 = osutils.sha_strings(blob.chunked)
138
# Check what revision we should store
140
for ptree in parent_bzr_trees:
142
ppath = ptree.id2path(file_id)
143
except errors.NoSuchId:
145
pkind = ptree.kind(ppath, file_id)
146
if (pkind == ie.kind and
147
((pkind == "symlink" and ptree.get_symlink_target(ppath, file_id) == ie.symlink_target) or
148
(pkind == "file" and ptree.get_file_sha1(ppath, file_id) == ie.text_sha1 and
149
ptree.is_executable(ppath, file_id) == ie.executable))):
150
# found a revision in one of the parents to use
151
ie.revision = ptree.get_file_revision(ppath, file_id)
153
parent_key = (file_id, ptree.get_file_revision(ppath, file_id))
154
if not parent_key in parent_keys:
155
parent_keys.append(parent_key)
156
if ie.revision is None:
157
# Need to store a new revision
158
ie.revision = revision_id
159
if ie.revision is None:
160
raise ValueError("no file revision set")
161
if ie.kind == 'symlink':
164
chunks = blob.chunked
165
texts.insert_record_stream([
166
ChunkedContentFactory((file_id, ie.revision),
167
tuple(parent_keys), ie.text_sha1, chunks)])
169
if base_hexsha is not None:
170
old_path = path.decode("utf-8") # Renames are not supported yet
171
if stat.S_ISDIR(base_mode):
172
invdelta.extend(remove_disappeared_children(base_bzr_tree, old_path,
173
lookup_object(base_hexsha), [], lookup_object))
176
new_path = path.decode("utf-8")
177
invdelta.append((old_path, new_path, file_id, ie))
178
if base_hexsha != hexsha:
179
store_updater.add_object(blob, (ie.file_id, ie.revision), path)
183
class SubmodulesRequireSubtrees(BzrError):
184
_fmt = ("The repository you are fetching from contains submodules, "
185
"which require a Bazaar format that supports tree references.")
189
def import_git_submodule(texts, mapping, path, name, hexshas,
190
base_bzr_tree, parent_id, revision_id, parent_bzr_trees, lookup_object,
191
modes, store_updater, lookup_file_id):
192
"""Import a git submodule."""
193
(base_hexsha, hexsha) = hexshas
194
(base_mode, mode) = modes
195
if base_hexsha == hexsha and base_mode == mode:
197
file_id = lookup_file_id(path)
199
ie = TreeReference(file_id, name.decode("utf-8"), parent_id)
200
ie.revision = revision_id
201
if base_hexsha is not None:
202
old_path = path.decode("utf-8") # Renames are not supported yet
203
if stat.S_ISDIR(base_mode):
204
invdelta.extend(remove_disappeared_children(base_bzr_tree, old_path,
205
lookup_object(base_hexsha), [], lookup_object))
208
ie.reference_revision = mapping.revision_id_foreign_to_bzr(hexsha)
209
texts.insert_record_stream([
210
ChunkedContentFactory((file_id, ie.revision), (), None, [])])
211
invdelta.append((old_path, path, file_id, ie))
215
def remove_disappeared_children(base_bzr_tree, path, base_tree, existing_children,
217
"""Generate an inventory delta for removed children.
219
:param base_bzr_tree: Base bzr tree against which to generate the
221
:param path: Path to process (unicode)
222
:param base_tree: Git Tree base object
223
:param existing_children: Children that still exist
224
:param lookup_object: Lookup a git object by its SHA1
225
:return: Inventory delta, as list
227
if not isinstance(path, text_type):
228
raise TypeError(path)
230
for name, mode, hexsha in base_tree.iteritems():
231
if name in existing_children:
233
c_path = posixpath.join(path, name.decode("utf-8"))
234
file_id = base_bzr_tree.path2id(c_path)
236
raise TypeError(file_id)
237
ret.append((c_path, None, file_id, None))
238
if stat.S_ISDIR(mode):
239
ret.extend(remove_disappeared_children(
240
base_bzr_tree, c_path, lookup_object(hexsha), [], lookup_object))
244
def import_git_tree(texts, mapping, path, name, hexshas,
245
base_bzr_tree, parent_id, revision_id, parent_bzr_trees,
246
lookup_object, modes, store_updater,
247
lookup_file_id, allow_submodules=False):
82
248
"""Import a git tree object into a bzr repository.
84
:param repo: A Bzr repository object
85
:param path: Path in the tree
250
:param texts: VersionedFiles object to add to
251
:param path: Path in the tree (str)
252
:param name: Name of the tree (str)
86
253
:param tree: A git tree object
87
:param inv: Inventory object
254
:param base_bzr_tree: Base inventory against which to return inventory delta
255
:return: Inventory delta for this subtree
89
file_id = mapping.generate_file_id(path)
90
repo.texts.add_lines((file_id, tree.id),
93
inv.add_path(path, "directory", file_id)
94
for mode, name, hexsha in tree.entries():
95
entry_kind = (mode & 0700000) / 0100000
96
basename = name.decode("utf-8")
100
child_path = urlutils.join(path, name)
102
import_git_tree(repo, mapping, child_path, lookup_object, inv)
103
elif entry_kind == 1:
104
import_git_blob(repo, mapping, child_path, lookup_object, inv)
106
raise AssertionError("Unknown blob kind, perms=%r." % (mode,))
109
def import_git_objects(repo, mapping, object_iter):
257
(base_hexsha, hexsha) = hexshas
258
(base_mode, mode) = modes
259
if not isinstance(path, bytes):
260
raise TypeError(path)
261
if not isinstance(name, bytes):
262
raise TypeError(name)
263
if base_hexsha == hexsha and base_mode == mode:
264
# If nothing has changed since the base revision, we're done
267
file_id = lookup_file_id(osutils.safe_unicode(path))
268
# We just have to hope this is indeed utf-8:
269
ie = InventoryDirectory(file_id, name.decode("utf-8"), parent_id)
270
tree = lookup_object(hexsha)
271
if base_hexsha is None:
273
old_path = None # Newly appeared here
275
base_tree = lookup_object(base_hexsha)
276
old_path = path.decode("utf-8") # Renames aren't supported yet
277
new_path = path.decode("utf-8")
278
if base_tree is None or type(base_tree) is not Tree:
279
ie.revision = revision_id
280
invdelta.append((old_path, new_path, ie.file_id, ie))
281
texts.insert_record_stream([
282
ChunkedContentFactory((ie.file_id, ie.revision), (), None, [])])
283
# Remember for next time
284
existing_children = set()
286
for name, child_mode, child_hexsha in tree.iteritems():
287
existing_children.add(name)
288
child_path = posixpath.join(path, name)
289
if type(base_tree) is Tree:
291
child_base_mode, child_base_hexsha = base_tree[name]
293
child_base_hexsha = None
296
child_base_hexsha = None
298
if stat.S_ISDIR(child_mode):
299
subinvdelta, grandchildmodes = import_git_tree(texts, mapping,
300
child_path, name, (child_base_hexsha, child_hexsha),
301
base_bzr_tree, file_id, revision_id, parent_bzr_trees,
302
lookup_object, (child_base_mode, child_mode), store_updater,
303
lookup_file_id, allow_submodules=allow_submodules)
304
elif S_ISGITLINK(child_mode): # submodule
305
if not allow_submodules:
306
raise SubmodulesRequireSubtrees()
307
subinvdelta, grandchildmodes = import_git_submodule(texts, mapping,
308
child_path, name, (child_base_hexsha, child_hexsha),
309
base_bzr_tree, file_id, revision_id, parent_bzr_trees,
310
lookup_object, (child_base_mode, child_mode), store_updater,
313
if not mapping.is_special_file(name):
314
subinvdelta = import_git_blob(texts, mapping, child_path, name,
315
(child_base_hexsha, child_hexsha), base_bzr_tree, file_id,
316
revision_id, parent_bzr_trees, lookup_object,
317
(child_base_mode, child_mode), store_updater, lookup_file_id)
321
child_modes.update(grandchildmodes)
322
invdelta.extend(subinvdelta)
323
if child_mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
324
stat.S_IFLNK, DEFAULT_FILE_MODE|0o111,
326
child_modes[child_path] = child_mode
327
# Remove any children that have disappeared
328
if base_tree is not None and type(base_tree) is Tree:
329
invdelta.extend(remove_disappeared_children(base_bzr_tree, old_path,
330
base_tree, existing_children, lookup_object))
331
store_updater.add_object(tree, (file_id, revision_id), path)
332
return invdelta, child_modes
335
def verify_commit_reconstruction(target_git_object_retriever, lookup_object,
336
o, rev, ret_tree, parent_trees, mapping, unusual_modes, verifiers):
337
new_unusual_modes = mapping.export_unusual_file_modes(rev)
338
if new_unusual_modes != unusual_modes:
339
raise AssertionError("unusual modes don't match: %r != %r" % (
340
unusual_modes, new_unusual_modes))
341
# Verify that we can reconstruct the commit properly
342
rec_o = target_git_object_retriever._reconstruct_commit(rev, o.tree, True,
345
raise AssertionError("Reconstructed commit differs: %r != %r" % (
349
for path, obj, ie in _tree_to_objects(ret_tree, parent_trees,
350
target_git_object_retriever._cache.idmap, unusual_modes,
351
mapping.BZR_DUMMY_FILE):
352
old_obj_id = tree_lookup_path(lookup_object, o.tree, path)[1]
354
if obj.id != old_obj_id:
355
diff.append((path, lookup_object(old_obj_id), obj))
356
for (path, old_obj, new_obj) in diff:
357
while (old_obj.type_name == "tree" and
358
new_obj.type_name == "tree" and
359
sorted(old_obj) == sorted(new_obj)):
361
if old_obj[name][0] != new_obj[name][0]:
362
raise AssertionError("Modes for %s differ: %o != %o" %
363
(path, old_obj[name][0], new_obj[name][0]))
364
if old_obj[name][1] != new_obj[name][1]:
365
# Found a differing child, delve deeper
366
path = posixpath.join(path, name)
367
old_obj = lookup_object(old_obj[name][1])
368
new_obj = new_objs[path]
370
raise AssertionError("objects differ for %s: %r != %r" % (path,
374
def ensure_inventories_in_repo(repo, trees):
375
real_inv_vf = repo.inventories.without_fallbacks()
377
revid = t.get_revision_id()
378
if not real_inv_vf.get_parent_map([(revid, )]):
379
repo.add_inventory(revid, t.root_inventory, t.get_parent_ids())
382
def import_git_commit(repo, mapping, head, lookup_object,
383
target_git_object_retriever, trees_cache):
384
o = lookup_object(head)
385
# Note that this uses mapping.revision_id_foreign_to_bzr. If the parents
386
# were bzr roundtripped revisions they would be specified in the
388
rev, roundtrip_revid, verifiers = mapping.import_commit(
389
o, mapping.revision_id_foreign_to_bzr)
390
if roundtrip_revid is not None:
391
original_revid = rev.revision_id
392
rev.revision_id = roundtrip_revid
393
# We have to do this here, since we have to walk the tree and
394
# we need to make sure to import the blobs / trees with the right
395
# path; this may involve adding them more than once.
396
parent_trees = trees_cache.revision_trees(rev.parent_ids)
397
ensure_inventories_in_repo(repo, parent_trees)
398
if parent_trees == []:
399
base_bzr_tree = trees_cache.revision_tree(NULL_REVISION)
403
base_bzr_tree = parent_trees[0]
404
base_tree = lookup_object(o.parents[0]).tree
405
base_mode = stat.S_IFDIR
406
store_updater = target_git_object_retriever._get_updater(rev)
407
tree_supplement = mapping.get_fileid_map(lookup_object, o.tree)
408
inv_delta, unusual_modes = import_git_tree(repo.texts,
409
mapping, b"", b"", (base_tree, o.tree), base_bzr_tree,
410
None, rev.revision_id, parent_trees,
411
lookup_object, (base_mode, stat.S_IFDIR), store_updater,
412
tree_supplement.lookup_file_id,
413
allow_submodules=repo._format.supports_tree_reference)
414
if unusual_modes != {}:
415
for path, mode in unusual_modes.iteritems():
416
warn_unusual_mode(rev.foreign_revid, path, mode)
417
mapping.import_unusual_file_modes(rev, unusual_modes)
419
basis_id = rev.parent_ids[0]
421
basis_id = NULL_REVISION
422
base_bzr_inventory = None
424
base_bzr_inventory = base_bzr_tree.root_inventory
425
rev.inventory_sha1, inv = repo.add_inventory_by_delta(basis_id,
426
inv_delta, rev.revision_id, rev.parent_ids,
428
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
430
if verifiers and roundtrip_revid is not None:
431
testament = StrictTestament3(rev, ret_tree)
432
calculated_verifiers = { "testament3-sha1": testament.as_sha1() }
433
if calculated_verifiers != verifiers:
434
trace.mutter("Testament SHA1 %r for %r did not match %r.",
435
calculated_verifiers["testament3-sha1"],
436
rev.revision_id, verifiers["testament3-sha1"])
437
rev.revision_id = original_revid
438
rev.inventory_sha1, inv = repo.add_inventory_by_delta(basis_id,
439
inv_delta, rev.revision_id, rev.parent_ids, base_bzr_tree)
440
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
442
calculated_verifiers = {}
443
store_updater.add_object(o, calculated_verifiers, None)
444
store_updater.finish()
445
trees_cache.add(ret_tree)
446
repo.add_revision(rev.revision_id, rev)
447
if "verify" in debug.debug_flags:
448
verify_commit_reconstruction(target_git_object_retriever,
449
lookup_object, o, rev, ret_tree, parent_trees, mapping,
450
unusual_modes, verifiers)
453
def import_git_objects(repo, mapping, object_iter,
454
target_git_object_retriever, heads, pb=None, limit=None):
110
455
"""Import a set of git objects into a bzr repository.
112
:param repo: Bazaar repository
457
:param repo: Target Bazaar repository
113
458
:param mapping: Mapping to use
114
459
:param object_iter: Iterator over Git objects.
460
:return: Tuple with pack hints and last imported revision id
116
# TODO: a more (memory-)efficient implementation of this
118
for o in object_iter:
462
def lookup_object(sha):
464
return object_iter[sha]
466
return target_git_object_retriever[sha]
469
heads = list(set(heads))
470
trees_cache = LRUTreeCache(repo)
121
471
# Find and convert commit objects
122
for o in objects.iterkeys():
474
pb.update("finding revisions to fetch", len(graph), None)
478
if not isinstance(head, bytes):
479
raise TypeError(head)
481
o = lookup_object(head)
123
484
if isinstance(o, Commit):
124
rev = mapping.import_commit(o)
125
root_trees[rev] = objects[o.tree_sha]
485
rev, roundtrip_revid, verifiers = mapping.import_commit(o,
486
mapping.revision_id_foreign_to_bzr)
487
if (repo.has_revision(rev.revision_id) or
488
(roundtrip_revid and repo.has_revision(roundtrip_revid))):
490
graph.append((o.id, o.parents))
491
heads.extend([p for p in o.parents if p not in checked])
492
elif isinstance(o, Tag):
493
if o.object[1] not in checked:
494
heads.append(o.object[1])
496
trace.warning("Unable to import head object %r" % o)
499
# Order the revisions
126
500
# Create the inventory objects
127
for rev, root_tree in root_trees.iteritems():
128
# We have to do this here, since we have to walk the tree and
129
# we need to make sure to import the blobs / trees with the riht
130
# path; this may involve adding them more than once.
132
def lookup_object(sha):
135
return reconstruct_git_object(repo, mapping, sha)
136
import_git_tree(repo, mapping, "", tree, inv, lookup_object)
137
repo.add_revision(rev.revision_id, rev, inv)
140
def reconstruct_git_commit(repo, rev):
141
raise NotImplementedError(self.reconstruct_git_commit)
144
def reconstruct_git_object(repo, mapping, sha):
146
revid = mapping.revision_id_foreign_to_bzr(sha)
148
rev = repo.get_revision(revid)
149
except NoSuchRevision:
152
return reconstruct_git_commit(rev)
156
raise KeyError("No such object %s" % sha)
159
class InterGitRepository(InterRepository):
161
_matching_repo_format = GitFormat()
164
def _get_repo_format_to_test():
167
def copy_content(self, revision_id=None, pb=None):
168
"""See InterRepository.copy_content."""
169
self.fetch(revision_id, pb, find_ghosts=False)
171
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
174
mapping = self.source.get_mapping()
177
pb.note("git: %s" % text)
179
info("git: %s" % text)
180
def determine_wants(heads):
181
if revision_id is None:
184
ret = [mapping.revision_id_bzr_to_foreign(revision_id)]
185
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
186
graph_walker = BzrFetchGraphWalker(self.target, mapping)
187
self.target.lock_write()
502
revision_ids = topo_sort(graph)
504
if limit is not None:
505
revision_ids = revision_ids[:limit]
507
for offset in range(0, len(revision_ids), batch_size):
508
target_git_object_retriever.start_write_group()
189
import_git_objects(self.target, mapping,
190
self.source.fetch_objects(determine_wants, graph_walker,
196
def is_compatible(source, target):
197
"""Be compatible with GitRepository."""
198
# FIXME: Also check target uses VersionedFile
199
return (isinstance(source, LocalGitRepository) and
200
target.supports_rich_root())
510
repo.start_write_group()
512
for i, head in enumerate(
513
revision_ids[offset:offset+batch_size]):
515
pb.update("fetching revisions", offset+i,
517
import_git_commit(repo, mapping, head, lookup_object,
518
target_git_object_retriever, trees_cache)
521
repo.abort_write_group()
524
hint = repo.commit_write_group()
526
pack_hints.extend(hint)
528
target_git_object_retriever.abort_write_group()
531
target_git_object_retriever.commit_write_group()
532
return pack_hints, last_imported
535
class DetermineWantsRecorder(object):
537
def __init__(self, actual):
540
self.remote_refs = {}
542
def __call__(self, refs):
543
if type(refs) is not dict:
544
raise TypeError(refs)
545
self.remote_refs = refs
546
self.wants = self.actual(refs)