14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib import osutils, ui, urlutils
18
from bzrlib.errors import InvalidRevisionId, NoSuchRevision
19
from bzrlib.inventory import Inventory
20
from bzrlib.repository import InterRepository
21
from bzrlib.trace import info
22
from bzrlib.tsort import topo_sort
17
from dulwich.objects import (
23
from dulwich.object_store import (
26
from itertools import (
39
from bzrlib.errors import (
43
from bzrlib.inventory import (
50
from bzrlib.repository import (
53
from bzrlib.revision import (
56
from bzrlib.revisiontree import (
59
from bzrlib.tsort import (
62
from bzrlib.versionedfile import (
63
ChunkedContentFactory,
66
from bzrlib.plugins.git.mapping import (
73
from bzrlib.plugins.git.object_store import (
77
from bzrlib.plugins.git.remote import (
24
80
from bzrlib.plugins.git.repository import (
29
from bzrlib.plugins.git.converter import GitObjectConverter
30
from bzrlib.plugins.git.remote import RemoteGitRepository
33
from dulwich.client import SimpleFetchGraphWalker
34
from dulwich.objects import Commit
36
from cStringIO import StringIO
39
class BzrFetchGraphWalker(object):
40
"""GraphWalker implementation that uses a Bazaar repository."""
42
def __init__(self, repository, mapping):
43
self.repository = repository
44
self.mapping = mapping
46
self.heads = set(repository.all_revision_ids())
50
return iter(self.next, None)
53
revid = self.mapping.revision_id_foreign_to_bzr(sha)
56
def remove(self, revid):
58
if revid in self.heads:
59
self.heads.remove(revid)
60
if revid in self.parents:
61
for p in self.parents[revid]:
66
ret = self.heads.pop()
67
ps = self.repository.get_parent_map([ret])[ret]
68
self.parents[ret] = ps
69
self.heads.update([p for p in ps if not p in self.done])
72
return self.mapping.revision_id_bzr_to_foreign(ret)[0]
73
except InvalidRevisionId:
78
def import_git_blob(repo, mapping, path, blob, inv, parent_invs, gitmap, executable):
87
def import_git_blob(texts, mapping, path, name, (base_hexsha, hexsha),
88
base_inv, parent_id, revision_id,
89
parent_invs, lookup_object, (base_mode, mode), store_updater):
79
90
"""Import a git blob object into a bzr repository.
81
:param repo: bzr repository
92
:param texts: VersionedFiles to add to
82
93
:param path: Path in the tree
83
94
:param blob: A git blob
95
:return: Inventory delta for this file
85
file_id = mapping.generate_file_id(path)
86
text_revision = inv.revision_id
87
repo.texts.add_lines((file_id, text_revision),
88
[(file_id, p[file_id].revision) for p in parent_invs if file_id in p],
89
osutils.split_lines(blob.data))
90
ie = inv.add_path(path, "file", file_id)
91
ie.revision = text_revision
92
ie.text_size = len(blob.data)
93
ie.text_sha1 = osutils.sha_string(blob.data)
94
ie.executable = executable
95
gitmap._idmap.add_entry(blob.sha().hexdigest(), "blob", (ie.file_id, ie.revision))
98
def import_git_tree(repo, mapping, path, tree, inv, parent_invs,
99
gitmap, lookup_object):
97
if base_hexsha == hexsha and base_mode == mode:
98
# If nothing has changed since the base revision, we're done
100
file_id = mapping.generate_file_id(path)
101
if stat.S_ISLNK(mode):
105
ie = cls(file_id, name.decode("utf-8"), parent_id)
106
ie.executable = mode_is_executable(mode)
107
if base_hexsha == hexsha and mode_kind(base_mode) == mode_kind(mode):
108
base_ie = base_inv[base_inv.path2id(path)]
109
ie.text_size = base_ie.text_size
110
ie.text_sha1 = base_ie.text_sha1
111
ie.symlink_target = base_ie.symlink_target
112
if ie.executable == base_ie.executable:
113
ie.revision = base_ie.revision
115
blob = lookup_object(hexsha)
117
blob = lookup_object(hexsha)
118
if ie.kind == "symlink":
120
ie.symlink_target = blob.data
124
ie.text_size = sum(imap(len, blob.chunked))
125
ie.text_sha1 = osutils.sha_strings(blob.chunked)
126
# Check what revision we should store
128
for pinv in parent_invs[1:]:
133
if pie.text_sha1 == ie.text_sha1 and pie.executable == ie.executable and pie.symlink_target == ie.symlink_target:
134
# found a revision in one of the parents to use
135
ie.revision = pie.revision
137
parent_keys.append((file_id, pie.revision))
138
if ie.revision is None:
139
# Need to store a new revision
140
ie.revision = revision_id
141
assert ie.revision is not None
142
if ie.kind == 'symlink':
145
chunks = blob.chunked
146
texts.insert_record_stream([ChunkedContentFactory((file_id, ie.revision), tuple(parent_keys), ie.text_sha1, chunks)])
148
if base_hexsha is not None:
149
old_path = path # Renames are not supported yet
150
if stat.S_ISDIR(base_mode):
151
invdelta.extend(remove_disappeared_children(base_inv, old_path,
152
lookup_object(base_hexsha), [], lookup_object))
155
invdelta.append((old_path, path, file_id, ie))
156
if base_hexsha != hexsha:
157
store_updater.add_object(blob, ie)
161
class SubmodulesRequireSubtrees(BzrError):
162
_fmt = """The repository you are fetching from contains submodules. To continue, upgrade your Bazaar repository to a format that supports nested trees, such as 'development-subtree'."""
166
def import_git_submodule(texts, mapping, path, name, (base_hexsha, hexsha),
167
base_inv, parent_id, revision_id, parent_invs, lookup_object,
168
(base_mode, mode), store_updater):
169
if base_hexsha == hexsha and base_mode == mode:
171
file_id = mapping.generate_file_id(path)
172
ie = TreeReference(file_id, name.decode("utf-8"), parent_id)
173
ie.revision = revision_id
174
if base_hexsha is None:
178
ie.reference_revision = mapping.revision_id_foreign_to_bzr(hexsha)
179
texts.insert_record_stream([ChunkedContentFactory((file_id, ie.revision), (), None, [])])
180
invdelta = [(oldpath, path, file_id, ie)]
184
def remove_disappeared_children(base_inv, path, base_tree, existing_children,
187
for name, mode, hexsha in base_tree.iteritems():
188
if name in existing_children:
190
c_path = posixpath.join(path, name.decode("utf-8"))
191
ret.append((c_path, None, base_inv.path2id(c_path), None))
192
if stat.S_ISDIR(mode):
193
ret.extend(remove_disappeared_children(
194
base_inv, c_path, lookup_object(hexsha), [], lookup_object))
198
def import_git_tree(texts, mapping, path, name, (base_hexsha, hexsha),
199
base_inv, parent_id, revision_id, parent_invs,
200
lookup_object, (base_mode, mode), store_updater, allow_submodules=False):
100
201
"""Import a git tree object into a bzr repository.
102
:param repo: A Bzr repository object
203
:param texts: VersionedFiles object to add to
103
204
:param path: Path in the tree
104
205
:param tree: A git tree object
105
:param inv: Inventory object
206
:param base_inv: Base inventory against which to return inventory delta
207
:return: Inventory delta for this subtree
209
if base_hexsha == hexsha and base_mode == mode:
210
# If nothing has changed since the base revision, we're done
107
213
file_id = mapping.generate_file_id(path)
108
text_revision = inv.revision_id
109
repo.texts.add_lines((file_id, text_revision),
110
[(file_id, p[file_id].revision) for p in parent_invs if file_id in p],
112
ie = inv.add_path(path, "directory", file_id)
113
ie.revision = text_revision
114
gitmap._idmap.add_entry(tree.sha().hexdigest(), "tree", (file_id, text_revision))
115
for mode, name, hexsha in tree.entries():
116
entry_kind = (mode & 0700000) / 0100000
117
basename = name.decode("utf-8")
121
child_path = urlutils.join(path, name)
123
tree = lookup_object(hexsha)
124
import_git_tree(repo, mapping, child_path, tree, inv, parent_invs, gitmap, lookup_object)
125
elif entry_kind == 1:
126
blob = lookup_object(hexsha)
127
fs_mode = mode & 0777
128
import_git_blob(repo, mapping, child_path, blob, inv, parent_invs, gitmap, bool(fs_mode & 0111))
130
raise AssertionError("Unknown blob kind, perms=%r." % (mode,))
133
def import_git_objects(repo, mapping, object_iter, target_git_object_retriever,
214
# We just have to hope this is indeed utf-8:
215
ie = InventoryDirectory(file_id, name.decode("utf-8"), parent_id)
216
tree = lookup_object(hexsha)
217
if base_hexsha is None:
219
old_path = None # Newly appeared here
221
base_tree = lookup_object(base_hexsha)
222
old_path = path # Renames aren't supported yet
223
if base_tree is None or type(base_tree) is not Tree:
224
ie.revision = revision_id
225
invdelta.append((old_path, path, ie.file_id, ie))
226
texts.insert_record_stream([ChunkedContentFactory((ie.file_id, ie.revision), (), None, [])])
227
# Remember for next time
228
existing_children = set()
230
for child_mode, name, child_hexsha in tree.entries():
231
existing_children.add(name)
232
child_path = posixpath.join(path, name)
233
if type(base_tree) is Tree:
235
child_base_mode, child_base_hexsha = base_tree[name]
237
child_base_hexsha = None
240
child_base_hexsha = None
242
if stat.S_ISDIR(child_mode):
243
subinvdelta, grandchildmodes = import_git_tree(
244
texts, mapping, child_path, name,
245
(child_base_hexsha, child_hexsha),
246
base_inv, file_id, revision_id, parent_invs, lookup_object,
247
(child_base_mode, child_mode), store_updater,
248
allow_submodules=allow_submodules)
249
elif S_ISGITLINK(child_mode): # submodule
250
if not allow_submodules:
251
raise SubmodulesRequireSubtrees()
252
subinvdelta, grandchildmodes = import_git_submodule(
253
texts, mapping, child_path, name,
254
(child_base_hexsha, child_hexsha),
255
base_inv, file_id, revision_id, parent_invs, lookup_object,
256
(child_base_mode, child_mode), store_updater)
258
subinvdelta = import_git_blob(texts, mapping,
259
child_path, name, (child_base_hexsha, child_hexsha),
260
base_inv, file_id, revision_id, parent_invs, lookup_object,
261
(child_base_mode, child_mode), store_updater)
263
child_modes.update(grandchildmodes)
264
invdelta.extend(subinvdelta)
265
if child_mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
266
stat.S_IFLNK, DEFAULT_FILE_MODE|0111):
267
child_modes[child_path] = child_mode
268
# Remove any children that have disappeared
269
if base_tree is not None and type(base_tree) is Tree:
270
invdelta.extend(remove_disappeared_children(base_inv, old_path,
271
base_tree, existing_children, lookup_object))
272
store_updater.add_object(tree, ie)
273
return invdelta, child_modes
276
def import_git_commit(repo, mapping, head, lookup_object,
277
target_git_object_retriever, trees_cache):
278
o = lookup_object(head)
279
rev = mapping.import_commit(o)
280
# We have to do this here, since we have to walk the tree and
281
# we need to make sure to import the blobs / trees with the right
282
# path; this may involve adding them more than once.
283
parent_trees = trees_cache.revision_trees(rev.parent_ids)
284
if parent_trees == []:
285
base_inv = Inventory(root_id=None)
289
base_inv = parent_trees[0].inventory
290
base_tree = lookup_object(o.parents[0]).tree
291
base_mode = stat.S_IFDIR
292
store_updater = target_git_object_retriever._get_updater(rev)
293
store_updater.add_object(o, None)
294
inv_delta, unusual_modes = import_git_tree(repo.texts,
295
mapping, "", u"", (base_tree, o.tree), base_inv,
296
None, rev.revision_id, [p.inventory for p in parent_trees],
297
lookup_object, (base_mode, stat.S_IFDIR), store_updater,
298
allow_submodules=getattr(repo._format, "supports_tree_reference", False))
299
store_updater.finish()
300
if unusual_modes != {}:
301
for path, mode in unusual_modes.iteritems():
302
warn_unusual_mode(rev.foreign_revid, path, mode)
303
mapping.import_unusual_file_modes(rev, unusual_modes)
305
basis_id = rev.parent_ids[0]
307
basis_id = NULL_REVISION
309
rev.inventory_sha1, inv = repo.add_inventory_by_delta(basis_id,
310
inv_delta, rev.revision_id, rev.parent_ids,
312
trees_cache.add(RevisionTree(repo, inv, rev.revision_id))
313
repo.add_revision(rev.revision_id, rev)
314
if "verify" in debug.debug_flags:
315
new_unusual_modes = mapping.export_unusual_file_modes(rev)
316
if new_unusual_modes != unusual_modes:
317
raise AssertionError("unusual modes don't match: %r != %r" % (
318
unusual_modes, new_unusual_modes))
321
def import_git_objects(repo, mapping, object_iter,
322
target_git_object_retriever, heads, pb=None, limit=None):
135
323
"""Import a set of git objects into a bzr repository.
137
:param repo: Bazaar repository
325
:param repo: Target Bazaar repository
138
326
:param mapping: Mapping to use
139
327
:param object_iter: Iterator over Git objects.
328
:return: Tuple with pack hints and last imported revision id
141
# TODO: a more (memory-)efficient implementation of this
330
def lookup_object(sha):
332
return object_iter[sha]
334
return target_git_object_retriever[sha]
337
heads = list(set(heads))
338
trees_cache = LRUTreeCache(repo)
145
339
# Find and convert commit objects
146
for o in object_iter.iterobjects():
342
pb.update("finding revisions to fetch", len(graph), None)
344
assert isinstance(head, str)
346
o = lookup_object(head)
147
349
if isinstance(o, Commit):
148
350
rev = mapping.import_commit(o)
149
root_trees[rev.revision_id] = object_iter[o.tree]
150
revisions[rev.revision_id] = rev
151
graph.append((rev.revision_id, rev.parent_ids))
152
target_git_object_retriever._idmap.add_entry(o.sha().hexdigest(), "commit", (rev.revision_id, o._tree))
351
if repo.has_revision(rev.revision_id):
353
squash_revision(repo, rev)
354
graph.append((o.id, o.parents))
355
heads.extend([p for p in o.parents if p not in checked])
356
elif isinstance(o, Tag):
357
if o.object[1] not in checked:
358
heads.append(o.object[1])
360
trace.warning("Unable to import head object %r" % o)
153
363
# Order the revisions
154
364
# Create the inventory objects
155
for i, revid in enumerate(topo_sort(graph)):
157
pb.update("fetching revisions", i, len(graph))
158
root_tree = root_trees[revid]
159
rev = revisions[revid]
160
# We have to do this here, since we have to walk the tree and
161
# we need to make sure to import the blobs / trees with the riht
162
# path; this may involve adding them more than once.
164
inv.revision_id = rev.revision_id
165
def lookup_object(sha):
166
if sha in object_iter:
167
return object_iter[sha]
168
return target_git_object_retriever[sha]
169
parent_invs = [repo.get_inventory(r) for r in rev.parent_ids]
170
import_git_tree(repo, mapping, "", root_tree, inv, parent_invs,
171
target_git_object_retriever, lookup_object)
172
repo.add_revision(rev.revision_id, rev, inv)
175
class InterGitNonGitRepository(InterRepository):
177
_matching_repo_format = GitFormat()
366
revision_ids = topo_sort(graph)
368
if limit is not None:
369
revision_ids = revision_ids[:limit]
371
for offset in range(0, len(revision_ids), batch_size):
372
target_git_object_retriever.start_write_group()
374
repo.start_write_group()
376
for i, head in enumerate(
377
revision_ids[offset:offset+batch_size]):
379
pb.update("fetching revisions", offset+i,
381
import_git_commit(repo, mapping, head, lookup_object,
382
target_git_object_retriever,
386
repo.abort_write_group()
389
hint = repo.commit_write_group()
391
pack_hints.extend(hint)
393
target_git_object_retriever.abort_write_group()
396
target_git_object_retriever.commit_write_group()
397
return pack_hints, last_imported
400
class InterGitRepository(InterRepository):
402
_matching_repo_format = GitRepositoryFormat()
180
405
def _get_repo_format_to_test():
184
409
"""See InterRepository.copy_content."""
185
410
self.fetch(revision_id, pb, find_ghosts=False)
187
def fetch_objects(self, determine_wants, mapping, pb=None):
412
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
413
mapping=None, fetch_spec=None):
414
self.fetch_refs(revision_id=revision_id, pb=pb,
415
find_ghosts=find_ghosts, mapping=mapping, fetch_spec=fetch_spec)
418
class InterGitNonGitRepository(InterGitRepository):
419
"""Base InterRepository that copies revisions from a Git into a non-Git
422
def fetch_refs(self, revision_id=None, pb=None, find_ghosts=False,
423
mapping=None, fetch_spec=None):
425
mapping = self.source.get_mapping()
426
if revision_id is not None:
427
interesting_heads = [revision_id]
428
elif fetch_spec is not None:
429
interesting_heads = fetch_spec.heads
431
interesting_heads = None
433
def determine_wants(refs):
435
if interesting_heads is None:
436
ret = [sha for (ref, sha) in refs.iteritems() if not ref.endswith("^{}")]
438
ret = [mapping.revision_id_bzr_to_foreign(revid)[0] for revid in interesting_heads if revid not in (None, NULL_REVISION)]
439
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
440
(pack_hint, _) = self.fetch_objects(determine_wants, mapping, pb)
441
if pack_hint is not None and self.target._format.pack_compresses:
442
self.target.pack(hint=pack_hint)
443
if interesting_heads is not None:
444
present_interesting_heads = self.target.has_revisions(interesting_heads)
445
missing_interesting_heads = set(interesting_heads) - present_interesting_heads
446
if missing_interesting_heads:
447
raise AssertionError("Missing interesting heads: %r" % missing_interesting_heads)
451
_GIT_PROGRESS_RE = re.compile(r"(.*?): +(\d+)% \((\d+)/(\d+)\)")
452
def report_git_progress(pb, text):
453
text = text.rstrip("\r\n")
454
g = _GIT_PROGRESS_RE.match(text)
456
(text, pct, current, total) = g.groups()
457
pb.update(text, int(current), int(total))
459
pb.update(text, 0, 0)
462
class InterRemoteGitNonGitRepository(InterGitNonGitRepository):
463
"""InterRepository that copies revisions from a remote Git into a non-Git
466
def get_target_heads(self):
467
# FIXME: This should be more efficient
468
all_revs = self.target.all_revision_ids()
469
parent_map = self.target.get_parent_map(all_revs)
471
map(all_parents.update, parent_map.itervalues())
472
return set(all_revs) - all_parents
474
def fetch_objects(self, determine_wants, mapping, pb=None, limit=None):
188
475
def progress(text):
189
pb.update("git: %s" % text.rstrip("\r\n"), 0, 0)
190
graph_walker = BzrFetchGraphWalker(self.target, mapping)
476
report_git_progress(pb, text)
477
store = BazaarObjectStore(self.target, mapping)
478
self.target.lock_write()
480
heads = self.get_target_heads()
481
graph_walker = store.get_graph_walker(
482
[store._lookup_revision_sha1(head) for head in heads])
485
def record_determine_wants(heads):
486
wants = determine_wants(heads)
487
recorded_wants.extend(wants)
492
create_pb = pb = ui.ui_factory.nested_progress_bar()
494
objects_iter = self.source.fetch_objects(
495
record_determine_wants, graph_walker,
496
store.get_raw, progress)
497
return import_git_objects(self.target, mapping,
498
objects_iter, store, recorded_wants, pb, limit)
506
def is_compatible(source, target):
507
"""Be compatible with GitRepository."""
508
# FIXME: Also check target uses VersionedFile
509
return (isinstance(source, RemoteGitRepository) and
510
target.supports_rich_root() and
511
not isinstance(target, GitRepository))
514
class InterLocalGitNonGitRepository(InterGitNonGitRepository):
515
"""InterRepository that copies revisions from a local Git into a non-Git
518
def fetch_objects(self, determine_wants, mapping, pb=None, limit=None):
521
wants = determine_wants(self.source._git.get_refs())
193
524
create_pb = pb = ui.ui_factory.nested_progress_bar()
194
target_git_object_retriever = GitObjectConverter(self.target, mapping)
525
target_git_object_retriever = BazaarObjectStore(self.target, mapping)
197
527
self.target.lock_write()
199
self.target.start_write_group()
201
objects_iter = self.source.fetch_objects(determine_wants,
203
target_git_object_retriever.__getitem__,
205
import_git_objects(self.target, mapping, objects_iter,
206
target_git_object_retriever, pb)
208
self.target.commit_write_group()
529
return import_git_objects(self.target, mapping,
530
self.source._git.object_store,
531
target_git_object_retriever, wants, pb, limit)
210
533
self.target.unlock()
213
536
create_pb.finished()
215
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
218
mapping = self.source.get_mapping()
219
def determine_wants(heads):
220
if revision_id is None:
223
ret = [mapping.revision_id_bzr_to_foreign(revision_id)[0]]
224
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
225
return self.fetch_objects(determine_wants, mapping, pb)
228
539
def is_compatible(source, target):
229
540
"""Be compatible with GitRepository."""
230
541
# FIXME: Also check target uses VersionedFile
231
return (isinstance(source, GitRepository) and
542
return (isinstance(source, LocalGitRepository) and
232
543
target.supports_rich_root() and
233
544
not isinstance(target, GitRepository))
236
class InterGitRepository(InterRepository):
238
_matching_repo_format = GitFormat()
241
def _get_repo_format_to_test():
244
def copy_content(self, revision_id=None, pb=None):
245
"""See InterRepository.copy_content."""
246
self.fetch(revision_id, pb, find_ghosts=False)
248
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
251
mapping = self.source.get_mapping()
547
class InterGitGitRepository(InterGitRepository):
548
"""InterRepository that copies between Git repositories."""
550
def fetch_objects(self, determine_wants, mapping, pb=None):
252
551
def progress(text):
253
info("git: %s", text)
552
trace.note("git: %s", text)
553
graphwalker = self.target._git.get_graph_walker()
554
if (isinstance(self.source, LocalGitRepository) and
555
isinstance(self.target, LocalGitRepository)):
556
return self.source._git.fetch(self.target._git, determine_wants,
558
elif (isinstance(self.source, LocalGitRepository) and
559
isinstance(self.target, RemoteGitRepository)):
560
raise NotImplementedError
561
elif (isinstance(self.source, RemoteGitRepository) and
562
isinstance(self.target, LocalGitRepository)):
563
f, commit = self.target._git.object_store.add_thin_pack()
565
refs = self.source._git.fetch_pack(determine_wants,
566
graphwalker, f.write, progress)
575
def fetch_refs(self, revision_id=None, pb=None, find_ghosts=False,
576
mapping=None, fetch_spec=None, branches=None):
578
mapping = self.source.get_mapping()
254
579
r = self.target._git
255
if revision_id is None:
256
determine_wants = lambda x: [y for y in x.values() if not y in r.object_store]
580
if revision_id is not None:
258
581
args = [mapping.revision_id_bzr_to_foreign(revision_id)[0]]
582
elif fetch_spec is not None:
583
args = [mapping.revision_id_bzr_to_foreign(revid)[0] for revid in fetch_spec.heads]
584
if branches is not None:
585
determine_wants = lambda x: [x[y] for y in branches if not x[y] in r.object_store]
586
elif fetch_spec is None and revision_id is None:
587
determine_wants = r.object_store.determine_wants_all
259
589
determine_wants = lambda x: [y for y in args if not y in r.object_store]
590
return self.fetch_objects(determine_wants, mapping)[0]
261
graphwalker = SimpleFetchGraphWalker(r.heads().values(), r.get_parents)
262
f, commit = r.object_store.add_pack()
264
self.source._git.fetch_pack(path, determine_wants, graphwalker, f.write, progress)
272
594
def is_compatible(source, target):
273
595
"""Be compatible with GitRepository."""
274
return (isinstance(source, GitRepository) and
596
return (isinstance(source, GitRepository) and
275
597
isinstance(target, GitRepository))