13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib import osutils, ui, urlutils
18
from bzrlib.errors import InvalidRevisionId
19
from bzrlib.inventory import Inventory
20
from bzrlib.repository import InterRepository
21
from bzrlib.trace import info
22
from bzrlib.tsort import topo_sort
24
from bzrlib.plugins.git import git
25
from bzrlib.plugins.git.repository import (
30
from bzrlib.plugins.git.remote import RemoteGitRepository
32
from dulwich.objects import Commit
34
from cStringIO import StringIO
37
class BzrFetchGraphWalker(object):
39
def __init__(self, repository, mapping):
40
self.repository = repository
41
self.mapping = mapping
43
self.heads = set(repository.all_revision_ids())
47
revid = self.mapping.revision_id_foreign_to_bzr(sha)
50
def remove(self, revid):
53
self.heads.remove(revid)
54
if revid in self.parents:
55
for p in self.parents[revid]:
60
ret = self.heads.pop()
61
ps = self.repository.get_parent_map([ret])[ret]
62
self.parents[ret] = ps
63
self.heads.update([p for p in ps if not p in self.done])
66
return self.mapping.revision_id_bzr_to_foreign(ret)
67
except InvalidRevisionId:
72
def import_git_blob(repo, mapping, path, blob, inv, parent_invs, executable):
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Fetching from git into bzr."""
19
from dulwich.objects import (
27
from dulwich.object_store import (
39
from ..errors import (
42
from ..bzr.inventory import (
48
from ..revision import (
51
from ..bzr.inventorytree import InventoryRevisionTree
52
from ..bzr.testament import (
55
from ..tree import InterTree
59
from ..bzr.versionedfile import (
60
ChunkedContentFactory,
63
from .mapping import (
70
from .object_store import (
76
def import_git_blob(texts, mapping, path, name, hexshas,
77
base_bzr_tree, parent_id, revision_id,
78
parent_bzr_trees, lookup_object, modes, store_updater,
73
80
"""Import a git blob object into a bzr repository.
75
:param repo: bzr repository
82
:param texts: VersionedFiles to add to
76
83
:param path: Path in the tree
77
84
:param blob: A git blob
79
file_id = mapping.generate_file_id(path)
80
text_revision = inv.revision_id
81
repo.texts.add_lines((file_id, text_revision),
82
[(file_id, p[file_id].revision) for p in parent_invs if file_id in p],
83
osutils.split_lines(blob.data))
84
ie = inv.add_path(path, "file", file_id)
85
ie.revision = text_revision
86
ie.text_size = len(blob.data)
87
ie.text_sha1 = osutils.sha_string(blob.data)
88
ie.executable = executable
91
def import_git_tree(repo, mapping, path, tree, inv, parent_invs, lookup_object):
85
:return: Inventory delta for this file
87
if not isinstance(path, bytes):
89
decoded_path = decode_git_path(path)
90
(base_mode, mode) = modes
91
(base_hexsha, hexsha) = hexshas
92
if mapping.is_special_file(path):
94
if base_hexsha == hexsha and base_mode == mode:
95
# If nothing has changed since the base revision, we're done
97
file_id = lookup_file_id(decoded_path)
98
if stat.S_ISLNK(mode):
102
ie = cls(file_id, decode_git_path(name), parent_id)
103
if ie.kind == "file":
104
ie.executable = mode_is_executable(mode)
105
if base_hexsha == hexsha and mode_kind(base_mode) == mode_kind(mode):
106
base_exec = base_bzr_tree.is_executable(decoded_path)
107
if ie.kind == "symlink":
108
ie.symlink_target = base_bzr_tree.get_symlink_target(decoded_path)
110
ie.text_size = base_bzr_tree.get_file_size(decoded_path)
111
ie.text_sha1 = base_bzr_tree.get_file_sha1(decoded_path)
112
if ie.kind == "symlink" or ie.executable == base_exec:
113
ie.revision = base_bzr_tree.get_file_revision(decoded_path)
115
blob = lookup_object(hexsha)
117
blob = lookup_object(hexsha)
118
if ie.kind == "symlink":
120
ie.symlink_target = decode_git_path(blob.data)
122
ie.text_size = sum(map(len, blob.chunked))
123
ie.text_sha1 = osutils.sha_strings(blob.chunked)
124
# Check what revision we should store
126
for ptree in parent_bzr_trees:
127
intertree = InterTree.get(ptree, base_bzr_tree)
129
ppath = intertree.find_source_paths(decoded_path, recurse='none')
130
except errors.NoSuchFile:
134
pkind = ptree.kind(ppath)
135
if (pkind == ie.kind and
136
((pkind == "symlink" and ptree.get_symlink_target(ppath) == ie.symlink_target) or
137
(pkind == "file" and ptree.get_file_sha1(ppath) == ie.text_sha1 and
138
ptree.is_executable(ppath) == ie.executable))):
139
# found a revision in one of the parents to use
140
ie.revision = ptree.get_file_revision(ppath)
142
parent_key = (file_id, ptree.get_file_revision(ppath))
143
if parent_key not in parent_keys:
144
parent_keys.append(parent_key)
145
if ie.revision is None:
146
# Need to store a new revision
147
ie.revision = revision_id
148
if ie.revision is None:
149
raise ValueError("no file revision set")
150
if ie.kind == 'symlink':
153
chunks = blob.chunked
154
texts.insert_record_stream([
155
ChunkedContentFactory((file_id, ie.revision),
156
tuple(parent_keys), ie.text_sha1, chunks)])
158
if base_hexsha is not None:
159
old_path = decoded_path # Renames are not supported yet
160
if stat.S_ISDIR(base_mode):
161
invdelta.extend(remove_disappeared_children(
162
base_bzr_tree, old_path, lookup_object(base_hexsha), [],
166
invdelta.append((old_path, decoded_path, file_id, ie))
167
if base_hexsha != hexsha:
168
store_updater.add_object(blob, (ie.file_id, ie.revision), path)
172
class SubmodulesRequireSubtrees(BzrError):
173
_fmt = ("The repository you are fetching from contains submodules, "
174
"which require a Bazaar format that supports tree references.")
178
def import_git_submodule(texts, mapping, path, name, hexshas,
179
base_bzr_tree, parent_id, revision_id,
180
parent_bzr_trees, lookup_object,
181
modes, store_updater, lookup_file_id):
182
"""Import a git submodule."""
183
(base_hexsha, hexsha) = hexshas
184
(base_mode, mode) = modes
185
if base_hexsha == hexsha and base_mode == mode:
187
path = decode_git_path(path)
188
file_id = lookup_file_id(path)
190
ie = TreeReference(file_id, decode_git_path(name), parent_id)
191
ie.revision = revision_id
192
if base_hexsha is not None:
193
old_path = path # Renames are not supported yet
194
if stat.S_ISDIR(base_mode):
195
invdelta.extend(remove_disappeared_children(
196
base_bzr_tree, old_path, lookup_object(base_hexsha), [],
200
ie.reference_revision = mapping.revision_id_foreign_to_bzr(hexsha)
201
texts.insert_record_stream([
202
ChunkedContentFactory((file_id, ie.revision), (), None, [])])
203
invdelta.append((old_path, path, file_id, ie))
207
def remove_disappeared_children(base_bzr_tree, path, base_tree,
208
existing_children, lookup_object):
209
"""Generate an inventory delta for removed children.
211
:param base_bzr_tree: Base bzr tree against which to generate the
213
:param path: Path to process (unicode)
214
:param base_tree: Git Tree base object
215
:param existing_children: Children that still exist
216
:param lookup_object: Lookup a git object by its SHA1
217
:return: Inventory delta, as list
219
if not isinstance(path, str):
220
raise TypeError(path)
222
for name, mode, hexsha in base_tree.iteritems():
223
if name in existing_children:
225
c_path = posixpath.join(path, decode_git_path(name))
226
file_id = base_bzr_tree.path2id(c_path)
228
raise TypeError(file_id)
229
ret.append((c_path, None, file_id, None))
230
if stat.S_ISDIR(mode):
231
ret.extend(remove_disappeared_children(
232
base_bzr_tree, c_path, lookup_object(hexsha), [],
237
def import_git_tree(texts, mapping, path, name, hexshas,
238
base_bzr_tree, parent_id, revision_id, parent_bzr_trees,
239
lookup_object, modes, store_updater,
240
lookup_file_id, allow_submodules=False):
92
241
"""Import a git tree object into a bzr repository.
94
:param repo: A Bzr repository object
95
:param path: Path in the tree
243
:param texts: VersionedFiles object to add to
244
:param path: Path in the tree (str)
245
:param name: Name of the tree (str)
96
246
:param tree: A git tree object
97
:param inv: Inventory object
247
:param base_bzr_tree: Base inventory against which to return inventory
249
:return: Inventory delta for this subtree
99
file_id = mapping.generate_file_id(path)
100
text_revision = inv.revision_id
101
repo.texts.add_lines((file_id, text_revision),
102
[(file_id, p[file_id].revision) for p in parent_invs if file_id in p],
104
ie = inv.add_path(path, "directory", file_id)
105
ie.revision = text_revision
106
for mode, name, hexsha in tree.entries():
107
entry_kind = (mode & 0700000) / 0100000
108
basename = name.decode("utf-8")
112
child_path = urlutils.join(path, name)
114
tree = lookup_object(hexsha)
115
import_git_tree(repo, mapping, child_path, tree, inv, parent_invs, lookup_object)
116
elif entry_kind == 1:
117
blob = lookup_object(hexsha)
118
fs_mode = mode & 0777
119
import_git_blob(repo, mapping, child_path, blob, inv, parent_invs, bool(fs_mode & 0111))
121
raise AssertionError("Unknown blob kind, perms=%r." % (mode,))
124
def import_git_objects(repo, mapping, object_iter, pb=None):
251
(base_hexsha, hexsha) = hexshas
252
(base_mode, mode) = modes
253
if not isinstance(path, bytes):
254
raise TypeError(path)
255
if not isinstance(name, bytes):
256
raise TypeError(name)
257
if base_hexsha == hexsha and base_mode == mode:
258
# If nothing has changed since the base revision, we're done
261
file_id = lookup_file_id(osutils.safe_unicode(path))
262
ie = InventoryDirectory(file_id, decode_git_path(name), parent_id)
263
tree = lookup_object(hexsha)
264
if base_hexsha is None:
266
old_path = None # Newly appeared here
268
base_tree = lookup_object(base_hexsha)
269
old_path = decode_git_path(path) # Renames aren't supported yet
270
new_path = decode_git_path(path)
271
if base_tree is None or type(base_tree) is not Tree:
272
ie.revision = revision_id
273
invdelta.append((old_path, new_path, ie.file_id, ie))
274
texts.insert_record_stream([
275
ChunkedContentFactory((ie.file_id, ie.revision), (), None, [])])
276
# Remember for next time
277
existing_children = set()
279
for name, child_mode, child_hexsha in tree.iteritems():
280
existing_children.add(name)
281
child_path = posixpath.join(path, name)
282
if type(base_tree) is Tree:
284
child_base_mode, child_base_hexsha = base_tree[name]
286
child_base_hexsha = None
289
child_base_hexsha = None
291
if stat.S_ISDIR(child_mode):
292
subinvdelta, grandchildmodes = import_git_tree(
293
texts, mapping, child_path, name,
294
(child_base_hexsha, child_hexsha), base_bzr_tree, file_id,
295
revision_id, parent_bzr_trees, lookup_object,
296
(child_base_mode, child_mode), store_updater, lookup_file_id,
297
allow_submodules=allow_submodules)
298
elif S_ISGITLINK(child_mode): # submodule
299
if not allow_submodules:
300
raise SubmodulesRequireSubtrees()
301
subinvdelta, grandchildmodes = import_git_submodule(
302
texts, mapping, child_path, name,
303
(child_base_hexsha, child_hexsha),
304
base_bzr_tree, file_id, revision_id, parent_bzr_trees,
305
lookup_object, (child_base_mode, child_mode), store_updater,
308
if not mapping.is_special_file(name):
309
subinvdelta = import_git_blob(
310
texts, mapping, child_path, name,
311
(child_base_hexsha, child_hexsha), base_bzr_tree, file_id,
312
revision_id, parent_bzr_trees, lookup_object,
313
(child_base_mode, child_mode), store_updater,
318
child_modes.update(grandchildmodes)
319
invdelta.extend(subinvdelta)
320
if child_mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
321
stat.S_IFLNK, DEFAULT_FILE_MODE | 0o111,
323
child_modes[child_path] = child_mode
324
# Remove any children that have disappeared
325
if base_tree is not None and type(base_tree) is Tree:
326
invdelta.extend(remove_disappeared_children(
327
base_bzr_tree, old_path, base_tree, existing_children,
329
store_updater.add_object(tree, (file_id, revision_id), path)
330
return invdelta, child_modes
333
def verify_commit_reconstruction(target_git_object_retriever, lookup_object,
334
o, rev, ret_tree, parent_trees, mapping,
335
unusual_modes, verifiers):
336
new_unusual_modes = mapping.export_unusual_file_modes(rev)
337
if new_unusual_modes != unusual_modes:
338
raise AssertionError("unusual modes don't match: %r != %r" % (
339
unusual_modes, new_unusual_modes))
340
# Verify that we can reconstruct the commit properly
341
rec_o = target_git_object_retriever._reconstruct_commit(rev, o.tree, True,
344
raise AssertionError("Reconstructed commit differs: %r != %r" % (
348
for path, obj, ie in _tree_to_objects(
349
ret_tree, parent_trees, target_git_object_retriever._cache.idmap,
350
unusual_modes, mapping.BZR_DUMMY_FILE):
351
old_obj_id = tree_lookup_path(lookup_object, o.tree, path)[1]
353
if obj.id != old_obj_id:
354
diff.append((path, lookup_object(old_obj_id), obj))
355
for (path, old_obj, new_obj) in diff:
356
while (old_obj.type_name == "tree"
357
and new_obj.type_name == "tree"
358
and sorted(old_obj) == sorted(new_obj)):
360
if old_obj[name][0] != new_obj[name][0]:
361
raise AssertionError(
362
"Modes for %s differ: %o != %o" %
363
(path, old_obj[name][0], new_obj[name][0]))
364
if old_obj[name][1] != new_obj[name][1]:
365
# Found a differing child, delve deeper
366
path = posixpath.join(path, name)
367
old_obj = lookup_object(old_obj[name][1])
368
new_obj = new_objs[path]
370
raise AssertionError(
371
"objects differ for %s: %r != %r" % (path, old_obj, new_obj))
374
def ensure_inventories_in_repo(repo, trees):
375
real_inv_vf = repo.inventories.without_fallbacks()
377
revid = t.get_revision_id()
378
if not real_inv_vf.get_parent_map([(revid, )]):
379
repo.add_inventory(revid, t.root_inventory, t.get_parent_ids())
382
def import_git_commit(repo, mapping, head, lookup_object,
383
target_git_object_retriever, trees_cache, strict):
384
o = lookup_object(head)
385
# Note that this uses mapping.revision_id_foreign_to_bzr. If the parents
386
# were bzr roundtripped revisions they would be specified in the
388
rev, roundtrip_revid, verifiers = mapping.import_commit(
389
o, mapping.revision_id_foreign_to_bzr, strict)
390
if roundtrip_revid is not None:
391
original_revid = rev.revision_id
392
rev.revision_id = roundtrip_revid
393
# We have to do this here, since we have to walk the tree and
394
# we need to make sure to import the blobs / trees with the right
395
# path; this may involve adding them more than once.
396
parent_trees = trees_cache.revision_trees(rev.parent_ids)
397
ensure_inventories_in_repo(repo, parent_trees)
398
if parent_trees == []:
399
base_bzr_tree = trees_cache.revision_tree(NULL_REVISION)
403
base_bzr_tree = parent_trees[0]
404
base_tree = lookup_object(o.parents[0]).tree
405
base_mode = stat.S_IFDIR
406
store_updater = target_git_object_retriever._get_updater(rev)
407
inv_delta, unusual_modes = import_git_tree(
408
repo.texts, mapping, b"", b"", (base_tree, o.tree), base_bzr_tree,
409
None, rev.revision_id, parent_trees, lookup_object,
410
(base_mode, stat.S_IFDIR), store_updater,
411
mapping.generate_file_id,
412
allow_submodules=repo._format.supports_tree_reference)
413
if unusual_modes != {}:
414
for path, mode in unusual_modes.iteritems():
415
warn_unusual_mode(rev.foreign_revid, path, mode)
416
mapping.import_unusual_file_modes(rev, unusual_modes)
418
basis_id = rev.parent_ids[0]
420
basis_id = NULL_REVISION
421
base_bzr_inventory = None
423
base_bzr_inventory = base_bzr_tree.root_inventory
424
rev.inventory_sha1, inv = repo.add_inventory_by_delta(
425
basis_id, inv_delta, rev.revision_id, rev.parent_ids,
427
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
429
if verifiers and roundtrip_revid is not None:
430
testament = StrictTestament3(rev, ret_tree)
431
calculated_verifiers = {"testament3-sha1": testament.as_sha1()}
432
if calculated_verifiers != verifiers:
433
trace.mutter("Testament SHA1 %r for %r did not match %r.",
434
calculated_verifiers["testament3-sha1"],
435
rev.revision_id, verifiers["testament3-sha1"])
436
rev.revision_id = original_revid
437
rev.inventory_sha1, inv = repo.add_inventory_by_delta(
438
basis_id, inv_delta, rev.revision_id, rev.parent_ids,
440
ret_tree = InventoryRevisionTree(repo, inv, rev.revision_id)
442
calculated_verifiers = {}
443
store_updater.add_object(o, calculated_verifiers, None)
444
store_updater.finish()
445
trees_cache.add(ret_tree)
446
repo.add_revision(rev.revision_id, rev)
447
if "verify" in debug.debug_flags:
448
verify_commit_reconstruction(
449
target_git_object_retriever, lookup_object, o, rev, ret_tree,
450
parent_trees, mapping, unusual_modes, verifiers)
453
def import_git_objects(repo, mapping, object_iter,
454
target_git_object_retriever, heads, pb=None,
125
456
"""Import a set of git objects into a bzr repository.
127
:param repo: Bazaar repository
458
:param repo: Target Bazaar repository
128
459
:param mapping: Mapping to use
129
460
:param object_iter: Iterator over Git objects.
461
:return: Tuple with pack hints and last imported revision id
131
# TODO: a more (memory-)efficient implementation of this
133
for i, o in enumerate(object_iter):
135
pb.update("fetching objects", i)
463
def lookup_object(sha):
465
return object_iter[sha]
467
return target_git_object_retriever[sha]
470
heads = list(set(heads))
471
trees_cache = LRUTreeCache(repo)
140
472
# Find and convert commit objects
141
for o in objects.itervalues():
475
pb.update("finding revisions to fetch", len(graph), None)
479
if not isinstance(head, bytes):
480
raise TypeError(head)
482
o = lookup_object(head)
142
485
if isinstance(o, Commit):
143
rev = mapping.import_commit(o)
144
root_trees[rev.revision_id] = objects[o.tree]
145
revisions[rev.revision_id] = rev
146
graph.append((rev.revision_id, rev.parent_ids))
486
rev, roundtrip_revid, verifiers = mapping.import_commit(
487
o, mapping.revision_id_foreign_to_bzr, strict=True)
488
if (repo.has_revision(rev.revision_id)
489
or (roundtrip_revid and
490
repo.has_revision(roundtrip_revid))):
492
graph.append((o.id, o.parents))
493
heads.extend([p for p in o.parents if p not in checked])
494
elif isinstance(o, Tag):
495
if o.object[1] not in checked:
496
heads.append(o.object[1])
498
trace.warning("Unable to import head object %r" % o)
147
501
# Order the revisions
148
502
# Create the inventory objects
149
for i, revid in enumerate(topo_sort(graph)):
151
pb.update("fetching revisions", i, len(graph))
152
root_tree = root_trees[revid]
153
rev = revisions[revid]
154
# We have to do this here, since we have to walk the tree and
155
# we need to make sure to import the blobs / trees with the riht
156
# path; this may involve adding them more than once.
158
inv.revision_id = rev.revision_id
159
def lookup_object(sha):
162
return reconstruct_git_object(repo, mapping, sha)
163
parent_invs = [repo.get_inventory(r) for r in rev.parent_ids]
164
import_git_tree(repo, mapping, "", root_tree, inv, parent_invs, lookup_object)
165
repo.add_revision(rev.revision_id, rev, inv)
168
def reconstruct_git_commit(repo, rev):
169
raise NotImplementedError(self.reconstruct_git_commit)
172
def reconstruct_git_object(repo, mapping, sha):
174
revid = mapping.revision_id_foreign_to_bzr(sha)
176
rev = repo.get_revision(revid)
177
except NoSuchRevision:
180
return reconstruct_git_commit(rev)
184
raise KeyError("No such object %s" % sha)
187
class InterGitRepository(InterRepository):
189
_matching_repo_format = GitFormat()
192
def _get_repo_format_to_test():
195
def copy_content(self, revision_id=None, pb=None):
196
"""See InterRepository.copy_content."""
197
self.fetch(revision_id, pb, find_ghosts=False)
199
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
202
mapping = self.source.get_mapping()
204
pb.note("git: %s", text)
205
def determine_wants(heads):
206
if revision_id is None:
209
ret = [mapping.revision_id_bzr_to_foreign(revision_id)]
210
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
211
graph_walker = BzrFetchGraphWalker(self.target, mapping)
214
create_pb = pb = ui.ui_factory.nested_progress_bar()
504
revision_ids = topo_sort(graph)
506
if limit is not None:
507
revision_ids = revision_ids[:limit]
509
for offset in range(0, len(revision_ids), batch_size):
510
target_git_object_retriever.start_write_group()
216
self.target.lock_write()
512
repo.start_write_group()
218
self.target.start_write_group()
220
import_git_objects(self.target, mapping,
221
iter(self.source.fetch_objects(determine_wants, graph_walker,
224
self.target.commit_write_group()
232
def is_compatible(source, target):
233
"""Be compatible with GitRepository."""
234
# FIXME: Also check target uses VersionedFile
235
return (isinstance(source, GitRepository) and
236
target.supports_rich_root())
514
for i, head in enumerate(
515
revision_ids[offset:offset + batch_size]):
517
pb.update("fetching revisions", offset + i,
519
import_git_commit(repo, mapping, head, lookup_object,
520
target_git_object_retriever, trees_cache,
523
except BaseException:
524
repo.abort_write_group()
527
hint = repo.commit_write_group()
529
pack_hints.extend(hint)
530
except BaseException:
531
target_git_object_retriever.abort_write_group()
534
target_git_object_retriever.commit_write_group()
535
return pack_hints, last_imported
538
class DetermineWantsRecorder(object):
540
def __init__(self, actual):
543
self.remote_refs = {}
545
def __call__(self, refs):
546
if type(refs) is not dict:
547
raise TypeError(refs)
548
self.remote_refs = refs
549
self.wants = self.actual(refs)