13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib import osutils
18
from bzrlib.errors import InvalidRevisionId
19
from bzrlib.inventory import Inventory
20
from bzrlib.repository import InterRepository
21
from bzrlib.trace import info
23
from bzrlib.plugins.git import git
24
from bzrlib.plugins.git.repository import LocalGitRepository, GitRepository, GitFormat
25
from bzrlib.plugins.git.remote import RemoteGitRepository
27
from dulwich.objects import Commit
29
from cStringIO import StringIO
32
class BzrFetchGraphWalker(object):
34
def __init__(self, repository, mapping):
35
self.repository = repository
36
self.mapping = mapping
38
self.heads = set(repository.all_revision_ids())
42
revid = self.mapping.revision_id_foreign_to_bzr(sha)
45
def remove(self, revid):
48
self.heads.remove(revid)
49
if revid in self.parents:
50
for p in self.parents[revid]:
55
ret = self.heads.pop()
56
ps = self.repository.get_parent_map([ret])[ret]
57
self.parents[ret] = ps
58
self.heads.update([p for p in ps if not p in self.done])
61
return self.mapping.revision_id_bzr_to_foreign(ret)
62
except InvalidRevisionId:
67
def import_git_blob(repo, mapping, path, blob):
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Fetching from git into bzr."""
19
from dulwich.objects import (
27
from dulwich.object_store import (
39
from ..errors import (
42
from ..bzr.inventory import (
48
from ..revision import (
51
from ..bzr.inventorytree import InventoryRevisionTree
52
from ..bzr.testament import (
55
from ..tree import InterTree
59
from ..bzr.versionedfile import (
60
ChunkedContentFactory,
63
from .mapping import (
69
from .object_store import (
75
def import_git_blob(texts, mapping, path, name, hexshas,
76
base_bzr_tree, parent_id, revision_id,
77
parent_bzr_trees, lookup_object, modes, store_updater,
68
79
"""Import a git blob object into a bzr repository.
70
:param repo: bzr repository
81
:param texts: VersionedFiles to add to
71
82
:param path: Path in the tree
72
83
:param blob: A git blob
74
file_id = mapping.generate_file_id(path)
75
repo.texts.add_lines((file_id, blob.id),
77
osutils.split_lines(blob.data))
78
inv.add_path(path, "file", file_id)
81
def import_git_tree(repo, mapping, path, tree, inv, lookup_object):
84
:return: Inventory delta for this file
86
if not isinstance(path, bytes):
88
decoded_path = path.decode('utf-8')
89
(base_mode, mode) = modes
90
(base_hexsha, hexsha) = hexshas
91
if mapping.is_special_file(path):
93
if base_hexsha == hexsha and base_mode == mode:
94
# If nothing has changed since the base revision, we're done
96
file_id = lookup_file_id(decoded_path)
97
if stat.S_ISLNK(mode):
101
ie = cls(file_id, name.decode("utf-8"), parent_id)
102
if ie.kind == "file":
103
ie.executable = mode_is_executable(mode)
104
if base_hexsha == hexsha and mode_kind(base_mode) == mode_kind(mode):
105
base_exec = base_bzr_tree.is_executable(decoded_path)
106
if ie.kind == "symlink":
107
ie.symlink_target = base_bzr_tree.get_symlink_target(decoded_path)
109
ie.text_size = base_bzr_tree.get_file_size(decoded_path)
110
ie.text_sha1 = base_bzr_tree.get_file_sha1(decoded_path)
111
if ie.kind == "symlink" or ie.executable == base_exec:
112
ie.revision = base_bzr_tree.get_file_revision(decoded_path)
114
blob = lookup_object(hexsha)
116
blob = lookup_object(hexsha)
117
if ie.kind == "symlink":
119
ie.symlink_target = blob.data.decode("utf-8")
121
ie.text_size = sum(map(len, blob.chunked))
122
ie.text_sha1 = osutils.sha_strings(blob.chunked)
123
# Check what revision we should store
125
for ptree in parent_bzr_trees:
126
intertree = InterTree.get(ptree, base_bzr_tree)
128
ppath = intertree.find_source_paths(decoded_path, recurse='none')
129
except errors.NoSuchFile:
133
pkind = ptree.kind(ppath)
134
if (pkind == ie.kind and
135
((pkind == "symlink" and ptree.get_symlink_target(ppath) == ie.symlink_target) or
136
(pkind == "file" and ptree.get_file_sha1(ppath) == ie.text_sha1 and
137
ptree.is_executable(ppath) == ie.executable))):
138
# found a revision in one of the parents to use
139
ie.revision = ptree.get_file_revision(ppath)
141
parent_key = (file_id, ptree.get_file_revision(ppath))
142
if parent_key not in parent_keys:
143
parent_keys.append(parent_key)
144
if ie.revision is None:
145
# Need to store a new revision
146
ie.revision = revision_id
147
if ie.revision is None:
148
raise ValueError("no file revision set")
149
if ie.kind == 'symlink':
152
chunks = blob.chunked
153
texts.insert_record_stream([
154
ChunkedContentFactory((file_id, ie.revision),
155
tuple(parent_keys), ie.text_sha1, chunks)])
157
if base_hexsha is not None:
158
old_path = decoded_path # Renames are not supported yet
159
if stat.S_ISDIR(base_mode):
160
invdelta.extend(remove_disappeared_children(
161
base_bzr_tree, old_path, lookup_object(base_hexsha), [],
165
invdelta.append((old_path, decoded_path, file_id, ie))
166
if base_hexsha != hexsha:
167
store_updater.add_object(blob, (ie.file_id, ie.revision), path)
171
class SubmodulesRequireSubtrees(BzrError):
172
_fmt = ("The repository you are fetching from contains submodules, "
173
"which require a Bazaar format that supports tree references.")
177
def import_git_submodule(texts, mapping, path, name, hexshas,
178
base_bzr_tree, parent_id, revision_id,
179
parent_bzr_trees, lookup_object,
180
modes, store_updater, lookup_file_id):
181
"""Import a git submodule."""
182
(base_hexsha, hexsha) = hexshas
183
(base_mode, mode) = modes
184
if base_hexsha == hexsha and base_mode == mode:
186
path = path.decode('utf-8')
187
file_id = lookup_file_id(path)
189
ie = TreeReference(file_id, name.decode("utf-8"), parent_id)
190
ie.revision = revision_id
191
if base_hexsha is not None:
192
old_path = path # Renames are not supported yet
193
if stat.S_ISDIR(base_mode):
194
invdelta.extend(remove_disappeared_children(
195
base_bzr_tree, old_path, lookup_object(base_hexsha), [],
199
ie.reference_revision = mapping.revision_id_foreign_to_bzr(hexsha)
200
texts.insert_record_stream([
201
ChunkedContentFactory((file_id, ie.revision), (), None, [])])
202
invdelta.append((old_path, path, file_id, ie))
206
def remove_disappeared_children(base_bzr_tree, path, base_tree,
207
existing_children, lookup_object):
208
"""Generate an inventory delta for removed children.
210
:param base_bzr_tree: Base bzr tree against which to generate the
212
:param path: Path to process (unicode)
213
:param base_tree: Git Tree base object
214
:param existing_children: Children that still exist
215
:param lookup_object: Lookup a git object by its SHA1
216
:return: Inventory delta, as list
218
if not isinstance(path, str):
219
raise TypeError(path)
221
for name, mode, hexsha in base_tree.iteritems():
222
if name in existing_children:
224
c_path = posixpath.join(path, name.decode("utf-8"))
225
file_id = base_bzr_tree.path2id(c_path)
227
raise TypeError(file_id)
228
ret.append((c_path, None, file_id, None))
229
if stat.S_ISDIR(mode):
230
ret.extend(remove_disappeared_children(
231
base_bzr_tree, c_path, lookup_object(hexsha), [],
236
def import_git_tree(texts, mapping, path, name, hexshas,
237
base_bzr_tree, parent_id, revision_id, parent_bzr_trees,
238
lookup_object, modes, store_updater,
239
lookup_file_id, allow_submodules=False):
82
240
"""Import a git tree object into a bzr repository.
84
:param repo: A Bzr repository object
85
:param path: Path in the tree
242
:param texts: VersionedFiles object to add to
243
:param path: Path in the tree (str)
244
:param name: Name of the tree (str)
86
245
:param tree: A git tree object
87
:param inv: Inventory object
246
:param base_bzr_tree: Base inventory against which to return inventory
248
:return: Inventory delta for this subtree
89
file_id = mapping.generate_file_id(path)
90
repo.texts.add_lines((file_id, tree.id),
93
inv.add_path(path, "directory", file_id)
94
for mode, name, hexsha in tree.entries():
95
entry_kind = (mode & 0700000) / 0100000
96
basename = name.decode("utf-8")
100
child_path = urlutils.join(path, name)
102
import_git_tree(repo, mapping, child_path, lookup_object, inv)
103
elif entry_kind == 1:
104
import_git_blob(repo, mapping, child_path, lookup_object, inv)
106
raise AssertionError("Unknown blob kind, perms=%r." % (mode,))
109
def import_git_objects(repo, mapping, object_iter):
250
(base_hexsha, hexsha) = hexshas
251
(base_mode, mode) = modes
252
if not isinstance(path, bytes):
253
raise TypeError(path)
254
if not isinstance(name, bytes):
255
raise TypeError(name)
256
if base_hexsha == hexsha and base_mode == mode:
257
# If nothing has changed since the base revision, we're done
260
file_id = lookup_file_id(osutils.safe_unicode(path))
261
# We just have to hope this is indeed utf-8:
262
ie = InventoryDirectory(file_id, name.decode("utf-8"), parent_id)
263
tree = lookup_object(hexsha)
264
if base_hexsha is None:
266
old_path = None # Newly appeared here
268
base_tree = lookup_object(base_hexsha)
269
old_path = path.decode("utf-8") # Renames aren't supported yet
270
new_path = path.decode("utf-8")
271
if base_tree is None or type(base_tree) is not Tree:
272
ie.revision = revision_id
273
invdelta.append((old_path, new_path, ie.file_id, ie))
274
texts.insert_record_stream([
275
ChunkedContentFactory((ie.file_id, ie.revision), (), None, [])])
276
# Remember for next time
277
existing_children = set()
279
for name, child_mode, child_hexsha in tree.iteritems():
280
existing_children.add(name)
281
child_path = posixpath.join(path, name)
282
if type(base_tree) is Tree:
284
child_base_mode, child_base_hexsha = base_tree[name]
286
child_base_hexsha = None
289
child_base_hexsha = None
291
if stat.S_ISDIR(child_mode):
292
subinvdelta, grandchildmodes = import_git_tree(
293
texts, mapping, child_path, name,
294
(child_base_hexsha, child_hexsha), base_bzr_tree, file_id,
295
revision_id, parent_bzr_trees, lookup_object,
296
(child_base_mode, child_mode), store_updater, lookup_file_id,
297
allow_submodules=allow_submodules)
298
elif S_ISGITLINK(child_mode): # submodule
299
if not allow_submodules:
300
raise SubmodulesRequireSubtrees()
301
subinvdelta, grandchildmodes = import_git_submodule(
302
texts, mapping, child_path, name,
303
(child_base_hexsha, child_hexsha),
304
base_bzr_tree, file_id, revision_id, parent_bzr_trees,
305
lookup_object, (child_base_mode, child_mode), store_updater,
308
if not mapping.is_special_file(name):
309
subinvdelta = import_git_blob(
310
texts, mapping, child_path, name,
311
(child_base_hexsha, child_hexsha), base_bzr_tree, file_id,
312
revision_id, parent_bzr_trees, lookup_object,
313
(child_base_mode, child_mode), store_updater,
318
child_modes.update(grandchildmodes)
319
invdelta.extend(subinvdelta)
320
if child_mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
321
stat.S_IFLNK, DEFAULT_FILE_MODE | 0o111,
323
child_modes[child_path] = child_mode
324
# Remove any children that have disappeared
325
if base_tree is not None and type(base_tree) is Tree:
326
invdelta.extend(remove_disappeared_children(
327
base_bzr_tree, old_path, base_tree, existing_children,
329
store_updater.add_object(tree, (file_id, revision_id), path)
330
return invdelta, child_modes
333
def verify_commit_reconstruction(target_git_object_retriever, lookup_object,
334
o, rev, ret_tree, parent_trees, mapping,
335
unusual_modes, verifiers):
336
new_unusual_modes = mapping.export_unusual_file_modes(rev)
337
if new_unusual_modes != unusual_modes:
338
raise AssertionError("unusual modes don't match: %r != %r" % (
339
unusual_modes, new_unusual_modes))
340
# Verify that we can reconstruct the commit properly
341
rec_o = target_git_object_retriever._reconstruct_commit(rev, o.tree, True,
344
raise AssertionError("Reconstructed commit differs: %r != %r" % (
348
for path, obj, ie in _tree_to_objects(
349
ret_tree, parent_trees, target_git_object_retriever._cache.idmap,
350
unusual_modes, mapping.BZR_DUMMY_FILE):
351
old_obj_id = tree_lookup_path(lookup_object, o.tree, path)[1]
353
if obj.id != old_obj_id:
354
diff.append((path, lookup_object(old_obj_id), obj))
355
for (path, old_obj, new_obj) in diff:
356
while (old_obj.type_name == "tree"
357
and new_obj.type_name == "tree"
358
and sorted(old_obj) == sorted(new_obj)):
360
if old_obj[name][0] != new_obj[name][0]:
361
raise AssertionError(
362
"Modes for %s differ: %o != %o" %
363
(path, old_obj[name][0], new_obj[name][0]))
364
if old_obj[name][1] != new_obj[name][1]:
365
# Found a differing child, delve deeper
366
path = posixpath.join(path, name)
367
old_obj = lookup_object(old_obj[name][1])
368
new_obj = new_objs[path]
370
raise AssertionError(
371
"objects differ for %s: %r != %r" % (path, old_obj, new_obj))
374
def ensure_inventories_in_repo(repo, trees):
375
real_inv_vf = repo.inventories.without_fallbacks()
377
revid = t.get_revision_id()
378
if not real_inv_vf.get_parent_map([(revid, )]):
379
repo.add_inventory(revid, t.root_inventory, t.get_parent_ids())
382
def import_git_commit(repo, mapping, head, lookup_object,
383
target_git_object_retriever, trees_cache, strict):
384
o = lookup_object(head)
385
# Note that this uses mapping.revision_id_foreign_to_bzr. If the parents
386
# were bzr roundtripped revisions they would be specified in the
388
rev, roundtrip_revid, verifiers = mapping.import_commit(
389
o, mapping.revision_id_foreign_to_bzr, strict)
390
if roundtrip_revid is not None:
391
original_revid = rev.revision_id
392
rev.revision_id = roundtrip_revid
393
# We have to do this here, since we have to walk the tree and
394
# we need to make sure to import the blobs / trees with the right
395
# path; this may involve adding them more than once.
396
parent_trees = trees_cache.revision_trees(rev.parent_ids)
397
ensure_inventories_in_repo(repo, parent_trees)
398
if parent_trees == []:
399
base_bzr_tree = trees_cache.revision_tree(NULL_REVISION)
403
base_bzr_tree = parent_trees[0]
404
base_tree = lookup_object(o.parents[0]).tree
405
base_mode = stat.S_IFDIR
406
store_updater = target_git_object_retriever._get_updater(rev)
407
inv_delta, unusual_modes = import_git_tree(
408
repo.texts, mapping, b"", b"", (base_tree, o.tree), base_bzr_tree,
409
None, rev.revision_id, parent_trees, lookup_object,
410
(base_mode, stat.S_IFDIR), store_updater,
411
mapping.generate_file_id,
412
allow_submodules=repo._format.supports_tree_reference)
413
if unusual_modes != {}:
414
for path, mode in unusual_modes.iteritems():
415
warn_unusual_mode(rev.foreign_revid, path, mode)
416
mapping.import_unusual_file_modes(rev, unusual_modes)
418
basis_id = rev.parent_ids[0]
420
basis_id = NULL_REVISION
421
base_bzr_inventory = None
423
base_bzr_inventory = base_bzr_tree.root_inventory
424
rev.inventory_sha1, inv = repo.add_inventory_by_delta(
425
basis_id, inv_delta, rev.revision_id, rev.parent_ids,
427
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
429
if verifiers and roundtrip_revid is not None:
430
testament = StrictTestament3(rev, ret_tree)
431
calculated_verifiers = {"testament3-sha1": testament.as_sha1()}
432
if calculated_verifiers != verifiers:
433
trace.mutter("Testament SHA1 %r for %r did not match %r.",
434
calculated_verifiers["testament3-sha1"],
435
rev.revision_id, verifiers["testament3-sha1"])
436
rev.revision_id = original_revid
437
rev.inventory_sha1, inv = repo.add_inventory_by_delta(
438
basis_id, inv_delta, rev.revision_id, rev.parent_ids,
440
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
442
calculated_verifiers = {}
443
store_updater.add_object(o, calculated_verifiers, None)
444
store_updater.finish()
445
trees_cache.add(ret_tree)
446
repo.add_revision(rev.revision_id, rev)
447
if "verify" in debug.debug_flags:
448
verify_commit_reconstruction(
449
target_git_object_retriever, lookup_object, o, rev, ret_tree,
450
parent_trees, mapping, unusual_modes, verifiers)
453
def import_git_objects(repo, mapping, object_iter,
454
target_git_object_retriever, heads, pb=None,
110
456
"""Import a set of git objects into a bzr repository.
112
:param repo: Bazaar repository
458
:param repo: Target Bazaar repository
113
459
:param mapping: Mapping to use
114
460
:param object_iter: Iterator over Git objects.
461
:return: Tuple with pack hints and last imported revision id
116
# TODO: a more (memory-)efficient implementation of this
118
for o in object_iter:
463
def lookup_object(sha):
465
return object_iter[sha]
467
return target_git_object_retriever[sha]
470
heads = list(set(heads))
471
trees_cache = LRUTreeCache(repo)
121
472
# Find and convert commit objects
122
for o in objects.iterkeys():
475
pb.update("finding revisions to fetch", len(graph), None)
479
if not isinstance(head, bytes):
480
raise TypeError(head)
482
o = lookup_object(head)
123
485
if isinstance(o, Commit):
124
rev = mapping.import_commit(o)
125
root_trees[rev] = objects[o.tree_sha]
486
rev, roundtrip_revid, verifiers = mapping.import_commit(
487
o, mapping.revision_id_foreign_to_bzr, strict=True)
488
if (repo.has_revision(rev.revision_id)
489
or (roundtrip_revid and
490
repo.has_revision(roundtrip_revid))):
492
graph.append((o.id, o.parents))
493
heads.extend([p for p in o.parents if p not in checked])
494
elif isinstance(o, Tag):
495
if o.object[1] not in checked:
496
heads.append(o.object[1])
498
trace.warning("Unable to import head object %r" % o)
501
# Order the revisions
126
502
# Create the inventory objects
127
for rev, root_tree in root_trees.iteritems():
128
# We have to do this here, since we have to walk the tree and
129
# we need to make sure to import the blobs / trees with the riht
130
# path; this may involve adding them more than once.
132
def lookup_object(sha):
135
return reconstruct_git_object(repo, mapping, sha)
136
import_git_tree(repo, mapping, "", tree, inv, lookup_object)
137
repo.add_revision(rev.revision_id, rev, inv)
140
def reconstruct_git_commit(repo, rev):
141
raise NotImplementedError(self.reconstruct_git_commit)
144
def reconstruct_git_object(repo, mapping, sha):
146
revid = mapping.revision_id_foreign_to_bzr(sha)
148
rev = repo.get_revision(revid)
149
except NoSuchRevision:
152
return reconstruct_git_commit(rev)
156
raise KeyError("No such object %s" % sha)
159
class InterGitRepository(InterRepository):
161
_matching_repo_format = GitFormat()
164
def _get_repo_format_to_test():
167
def copy_content(self, revision_id=None, pb=None):
168
"""See InterRepository.copy_content."""
169
self.fetch(revision_id, pb, find_ghosts=False)
171
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
174
mapping = self.source.get_mapping()
177
pb.note("git: %s" % text)
179
info("git: %s" % text)
180
def determine_wants(heads):
181
if revision_id is None:
184
ret = [mapping.revision_id_bzr_to_foreign(revision_id)]
185
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
186
graph_walker = BzrFetchGraphWalker(self.target, mapping)
187
self.target.lock_write()
504
revision_ids = topo_sort(graph)
506
if limit is not None:
507
revision_ids = revision_ids[:limit]
509
for offset in range(0, len(revision_ids), batch_size):
510
target_git_object_retriever.start_write_group()
189
import_git_objects(self.target, mapping,
190
self.source.fetch_objects(determine_wants, graph_walker,
196
def is_compatible(source, target):
197
"""Be compatible with GitRepository."""
198
# FIXME: Also check target uses VersionedFile
199
return (isinstance(source, LocalGitRepository) and
200
target.supports_rich_root())
512
repo.start_write_group()
514
for i, head in enumerate(
515
revision_ids[offset:offset + batch_size]):
517
pb.update("fetching revisions", offset + i,
519
import_git_commit(repo, mapping, head, lookup_object,
520
target_git_object_retriever, trees_cache,
523
except BaseException:
524
repo.abort_write_group()
527
hint = repo.commit_write_group()
529
pack_hints.extend(hint)
530
except BaseException:
531
target_git_object_retriever.abort_write_group()
534
target_git_object_retriever.commit_write_group()
535
return pack_hints, last_imported
538
class DetermineWantsRecorder(object):
540
def __init__(self, actual):
543
self.remote_refs = {}
545
def __call__(self, refs):
546
if type(refs) is not dict:
547
raise TypeError(refs)
548
self.remote_refs = refs
549
self.wants = self.actual(refs)