14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib import osutils, ui, urlutils
18
from bzrlib.errors import InvalidRevisionId
19
from bzrlib.inventory import Inventory
20
from bzrlib.repository import InterRepository
21
from bzrlib.trace import info
22
from bzrlib.tsort import topo_sort
24
from bzrlib.plugins.git import git
17
from dulwich.objects import (
22
from dulwich.object_store import (
35
from bzrlib.errors import (
39
from bzrlib.inventory import (
46
from bzrlib.repository import (
49
from bzrlib.revision import (
52
from bzrlib.tsort import (
55
from bzrlib.versionedfile import (
56
FulltextContentFactory,
59
from bzrlib.plugins.git.mapping import (
61
inventory_to_tree_and_blobs,
66
from bzrlib.plugins.git.object_store import (
70
from bzrlib.plugins.git.remote import (
25
73
from bzrlib.plugins.git.repository import (
30
from bzrlib.plugins.git.remote import RemoteGitRepository
32
from dulwich.objects import Commit
34
from cStringIO import StringIO
37
class BzrFetchGraphWalker(object):
39
def __init__(self, repository, mapping):
40
self.repository = repository
41
self.mapping = mapping
43
self.heads = set(repository.all_revision_ids())
47
revid = self.mapping.revision_id_foreign_to_bzr(sha)
50
def remove(self, revid):
53
self.heads.remove(revid)
54
if revid in self.parents:
55
for p in self.parents[revid]:
60
ret = self.heads.pop()
61
ps = self.repository.get_parent_map([ret])[ret]
62
self.parents[ret] = ps
63
self.heads.update([p for p in ps if not p in self.done])
66
return self.mapping.revision_id_bzr_to_foreign(ret)
67
except InvalidRevisionId:
72
def import_git_blob(repo, mapping, path, blob, inv, parent_invs, executable):
80
def import_git_blob(texts, mapping, path, hexsha, base_inv, base_inv_shamap,
81
base_ie, parent_id, revision_id, parent_invs, lookup_object,
73
83
"""Import a git blob object into a bzr repository.
75
:param repo: bzr repository
85
:param texts: VersionedFiles to add to
76
86
:param path: Path in the tree
77
87
:param blob: A git blob
88
:return: Inventory delta for this file
79
90
file_id = mapping.generate_file_id(path)
80
text_revision = inv.revision_id
81
repo.texts.add_lines((file_id, text_revision),
82
[(file_id, p[file_id].revision) for p in parent_invs if file_id in p],
83
osutils.split_lines(blob.data))
84
ie = inv.add_path(path, "file", file_id)
85
ie.revision = text_revision
86
ie.text_size = len(blob.data)
87
ie.text_sha1 = osutils.sha_string(blob.data)
95
# We just have to hope this is indeed utf-8:
96
ie = cls(file_id, urlutils.basename(path).decode("utf-8"), parent_id)
88
97
ie.executable = executable
91
def import_git_tree(repo, mapping, path, tree, inv, parent_invs, lookup_object):
98
# See if this has changed at all
103
base_sha = base_inv_shamap.lookup_blob(file_id, base_ie.revision)
107
if (base_sha == hexsha and base_ie.executable == ie.executable
108
and base_ie.kind == ie.kind):
109
# If nothing has changed since the base revision, we're done
111
if base_sha == hexsha and base_ie.kind == ie.kind:
112
ie.text_size = base_ie.text_size
113
ie.text_sha1 = base_ie.text_sha1
114
ie.symlink_target = base_ie.symlink_target
115
if ie.executable == base_ie.executable:
116
ie.revision = base_ie.revision
118
blob = lookup_object(hexsha)
120
blob = lookup_object(hexsha)
121
if ie.kind == "symlink":
123
ie.symlink_target = blob.data
127
ie.text_size = len(blob.data)
128
ie.text_sha1 = osutils.sha_string(blob.data)
129
# Check what revision we should store
131
for pinv in parent_invs:
132
if pinv.revision_id == base_inv.revision_id:
141
if pie.text_sha1 == ie.text_sha1 and pie.executable == ie.executable and pie.symlink_target == ie.symlink_target:
142
# found a revision in one of the parents to use
143
ie.revision = pie.revision
145
parent_keys.append((file_id, pie.revision))
146
if ie.revision is None:
147
# Need to store a new revision
148
ie.revision = revision_id
149
assert file_id is not None
150
assert ie.revision is not None
151
if ie.kind == 'symlink':
155
texts.insert_record_stream([FulltextContentFactory((file_id, ie.revision), tuple(parent_keys), ie.text_sha1, data)])
156
shamap = { ie.file_id: hexsha }
158
if base_ie is not None:
159
old_path = base_inv.id2path(file_id)
160
if base_ie.kind == "directory":
161
invdelta.extend(remove_disappeared_children(old_path, base_ie.children, []))
164
invdelta.append((old_path, path, file_id, ie))
165
return (invdelta, shamap)
168
class SubmodulesRequireSubtrees(BzrError):
169
_fmt = """The repository you are fetching from contains submodules. To continue, upgrade your Bazaar repository to a format that supports nested trees, such as 'development-subtree'."""
173
def import_git_submodule(texts, mapping, path, hexsha, base_inv, base_ie,
174
parent_id, revision_id, parent_invs, lookup_object):
175
file_id = mapping.generate_file_id(path)
176
ie = TreeReference(file_id, urlutils.basename(path.decode("utf-8")),
178
ie.revision = revision_id
183
if (base_ie.kind == ie.kind and
184
base_ie.reference_revision == ie.reference_revision):
185
ie.revision = base_ie.revision
186
ie.reference_revision = mapping.revision_id_foreign_to_bzr(hexsha)
187
texts.insert_record_stream([FulltextContentFactory((file_id, ie.revision), (), None, "")])
188
invdelta = [(oldpath, path, file_id, ie)]
189
return invdelta, {}, {}
192
def remove_disappeared_children(path, base_children, existing_children):
194
deletable = [(osutils.pathjoin(path, k), v) for k,v in base_children.iteritems() if k not in existing_children]
196
(path, ie) = deletable.pop()
197
ret.append((path, None, ie.file_id, None))
198
if ie.kind == "directory":
199
for name, child_ie in ie.children.iteritems():
200
deletable.append((osutils.pathjoin(path, name), child_ie))
204
def import_git_tree(texts, mapping, path, hexsha, base_inv, base_inv_shamap,
205
base_ie, parent_id, revision_id, parent_invs, lookup_object,
206
allow_submodules=False):
92
207
"""Import a git tree object into a bzr repository.
94
:param repo: A Bzr repository object
209
:param texts: VersionedFiles object to add to
95
210
:param path: Path in the tree
96
211
:param tree: A git tree object
97
:param inv: Inventory object
212
:param base_inv: Base inventory against which to return inventory delta
213
:return: Inventory delta for this subtree
99
216
file_id = mapping.generate_file_id(path)
100
text_revision = inv.revision_id
101
repo.texts.add_lines((file_id, text_revision),
102
[(file_id, p[file_id].revision) for p in parent_invs if file_id in p],
104
ie = inv.add_path(path, "directory", file_id)
105
ie.revision = text_revision
106
for mode, name, hexsha in tree.entries():
107
entry_kind = (mode & 0700000) / 0100000
217
# We just have to hope this is indeed utf-8:
218
ie = InventoryDirectory(file_id, urlutils.basename(path.decode("utf-8")),
221
# Newly appeared here
222
ie.revision = revision_id
223
texts.insert_record_stream([FulltextContentFactory((file_id, ie.revision), (), None, "")])
224
invdelta.append((None, path, file_id, ie))
226
# See if this has changed at all
228
base_sha = base_inv_shamap.lookup_tree(file_id)
229
except (KeyError, NotImplementedError):
232
if base_sha == hexsha:
233
# If nothing has changed since the base revision, we're done
235
if base_ie.kind != "directory":
236
ie.revision = revision_id
237
texts.insert_record_stream([FulltextContentFactory((ie.file_id, ie.revision), (), None, "")])
238
invdelta.append((base_inv.id2path(ie.file_id), path, ie.file_id, ie))
239
if base_ie is not None and base_ie.kind == "directory":
240
base_children = base_ie.children
243
# Remember for next time
244
existing_children = set()
247
tree = lookup_object(hexsha)
248
for mode, name, child_hexsha in tree.entries():
108
249
basename = name.decode("utf-8")
112
child_path = urlutils.join(path, name)
114
tree = lookup_object(hexsha)
115
import_git_tree(repo, mapping, child_path, tree, inv, parent_invs, lookup_object)
116
elif entry_kind == 1:
117
blob = lookup_object(hexsha)
118
fs_mode = mode & 0777
119
import_git_blob(repo, mapping, child_path, blob, inv, parent_invs, bool(fs_mode & 0111))
121
raise AssertionError("Unknown blob kind, perms=%r." % (mode,))
124
def import_git_objects(repo, mapping, object_iter, pb=None):
250
existing_children.add(basename)
251
child_path = osutils.pathjoin(path, name)
252
if stat.S_ISDIR(mode):
253
subinvdelta, grandchildmodes, subshamap = import_git_tree(
254
texts, mapping, child_path, child_hexsha, base_inv,
255
base_inv_shamap, base_children.get(basename), file_id,
256
revision_id, parent_invs, lookup_object,
257
allow_submodules=allow_submodules)
258
elif S_ISGITLINK(mode): # submodule
259
if not allow_submodules:
260
raise SubmodulesRequireSubtrees()
261
subinvdelta, grandchildmodes, subshamap = import_git_submodule(
262
texts, mapping, child_path, child_hexsha, base_inv, base_children.get(basename),
263
file_id, revision_id, parent_invs, lookup_object)
265
subinvdelta, subshamap = import_git_blob(texts, mapping,
266
child_path, child_hexsha, base_inv, base_inv_shamap,
267
base_children.get(basename), file_id,
268
revision_id, parent_invs, lookup_object,
269
mode_is_executable(mode), stat.S_ISLNK(mode))
271
child_modes.update(grandchildmodes)
272
invdelta.extend(subinvdelta)
273
shamap.update(subshamap)
274
if mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
275
stat.S_IFLNK, DEFAULT_FILE_MODE|0111):
276
child_modes[child_path] = mode
277
# Remove any children that have disappeared
278
if base_ie is not None and base_ie.kind == "directory":
279
invdelta.extend(remove_disappeared_children(base_inv.id2path(file_id),
280
base_children, existing_children))
281
shamap[file_id] = hexsha
282
return invdelta, child_modes, shamap
285
def import_git_commit(repo, mapping, head, lookup_object,
286
target_git_object_retriever, parent_invs_cache):
287
o = lookup_object(head)
288
rev = mapping.import_commit(o)
289
# We have to do this here, since we have to walk the tree and
290
# we need to make sure to import the blobs / trees with the right
291
# path; this may involve adding them more than once.
292
parent_invs = parent_invs_cache.get_inventories(rev.parent_ids)
293
if parent_invs == []:
294
base_inv = Inventory(root_id=None)
296
base_inv_shamap = None # Should never be accessed
298
base_inv = parent_invs[0]
299
base_ie = base_inv.root
300
base_inv_shamap = target_git_object_retriever._idmap.get_inventory_sha_map(base_inv.revision_id)
301
inv_delta, unusual_modes, shamap = import_git_tree(repo.texts,
302
mapping, "", o.tree, base_inv, base_inv_shamap, base_ie, None,
303
rev.revision_id, parent_invs, lookup_object,
304
allow_submodules=getattr(repo._format, "supports_tree_reference", False))
306
for (oldpath, newpath, fileid, new_ie) in inv_delta:
308
entries.append((fileid, None, None, None))
310
if new_ie.kind in ("file", "symlink"):
311
entries.append((fileid, "blob", shamap[fileid], new_ie.revision))
312
elif new_ie.kind == "directory":
313
entries.append((fileid, "tree", shamap[fileid], rev.revision_id))
316
target_git_object_retriever._idmap.add_entries(rev.revision_id,
317
rev.parent_ids, head, o.tree, entries)
318
if unusual_modes != {}:
319
for path, mode in unusual_modes.iteritems():
320
warn_unusual_mode(rev.foreign_revid, path, mode)
321
mapping.import_unusual_file_modes(rev, unusual_modes)
323
basis_id = rev.parent_ids[0]
325
basis_id = NULL_REVISION
327
rev.inventory_sha1, inv = repo.add_inventory_by_delta(basis_id,
328
inv_delta, rev.revision_id, rev.parent_ids,
330
parent_invs_cache.add(rev.revision_id, inv)
331
repo.add_revision(rev.revision_id, rev)
332
if "verify" in debug.debug_flags:
333
new_unusual_modes = mapping.export_unusual_file_modes(rev)
334
if new_unusual_modes != unusual_modes:
335
raise AssertionError("unusual modes don't match: %r != %r" % (unusual_modes, new_unusual_modes))
336
objs = inventory_to_tree_and_blobs(inv, repo.texts, mapping, unusual_modes)
337
for newsha1, newobj, path in objs:
338
assert path is not None
342
(oldmode, oldsha1) = tree_lookup_path(lookup_object, o.tree, path)
343
if oldsha1 != newsha1:
344
raise AssertionError("%r != %r in %s" % (oldsha1, newsha1, path))
347
def import_git_objects(repo, mapping, object_iter,
348
target_git_object_retriever, heads, pb=None, limit=None):
125
349
"""Import a set of git objects into a bzr repository.
127
:param repo: Bazaar repository
351
:param repo: Target Bazaar repository
128
352
:param mapping: Mapping to use
129
353
:param object_iter: Iterator over Git objects.
354
:return: Tuple with pack hints and last imported revision id
356
def lookup_object(sha):
358
return object_iter[sha]
360
return target_git_object_retriever[sha]
131
361
# TODO: a more (memory-)efficient implementation of this
133
for i, o in enumerate(object_iter):
135
pb.update("fetching objects", i)
364
heads = list(set(heads))
365
parent_invs_cache = LRUInventoryCache(repo)
366
target_git_object_retriever.start_write_group() # FIXME: try/finally
140
367
# Find and convert commit objects
141
for o in objects.itervalues():
370
pb.update("finding revisions to fetch", len(graph), None)
372
assert isinstance(head, str)
374
o = lookup_object(head)
142
377
if isinstance(o, Commit):
143
378
rev = mapping.import_commit(o)
144
root_trees[rev.revision_id] = objects[o.tree]
145
revisions[rev.revision_id] = rev
146
graph.append((rev.revision_id, rev.parent_ids))
379
if repo.has_revision(rev.revision_id):
381
squash_revision(repo, rev)
382
graph.append((o.id, o.parents))
383
heads.extend([p for p in o.parents if p not in checked])
384
elif isinstance(o, Tag):
385
if o.object[1] not in checked:
386
heads.append(o.object[1])
388
trace.warning("Unable to import head object %r" % o)
147
391
# Order the revisions
148
392
# Create the inventory objects
149
for i, revid in enumerate(topo_sort(graph)):
151
pb.update("fetching revisions", i, len(graph))
152
root_tree = root_trees[revid]
153
rev = revisions[revid]
154
# We have to do this here, since we have to walk the tree and
155
# we need to make sure to import the blobs / trees with the riht
156
# path; this may involve adding them more than once.
158
inv.revision_id = rev.revision_id
159
def lookup_object(sha):
162
return reconstruct_git_object(repo, mapping, sha)
163
parent_invs = [repo.get_inventory(r) for r in rev.parent_ids]
164
import_git_tree(repo, mapping, "", root_tree, inv, parent_invs, lookup_object)
165
repo.add_revision(rev.revision_id, rev, inv)
168
def reconstruct_git_commit(repo, rev):
169
raise NotImplementedError(self.reconstruct_git_commit)
172
def reconstruct_git_object(repo, mapping, sha):
174
revid = mapping.revision_id_foreign_to_bzr(sha)
176
rev = repo.get_revision(revid)
177
except NoSuchRevision:
180
return reconstruct_git_commit(rev)
184
raise KeyError("No such object %s" % sha)
394
revision_ids = topo_sort(graph)
396
if limit is not None:
397
revision_ids = revision_ids[:limit]
399
for offset in range(0, len(revision_ids), batch_size):
400
repo.start_write_group()
402
for i, head in enumerate(revision_ids[offset:offset+batch_size]):
404
pb.update("fetching revisions", offset+i, len(revision_ids))
405
import_git_commit(repo, mapping, head, lookup_object,
406
target_git_object_retriever,
410
repo.abort_write_group()
413
hint = repo.commit_write_group()
415
pack_hints.extend(hint)
416
target_git_object_retriever.commit_write_group()
417
return pack_hints, last_imported
187
420
class InterGitRepository(InterRepository):
189
_matching_repo_format = GitFormat()
422
_matching_repo_format = GitRepositoryFormat()
192
425
def _get_repo_format_to_test():
196
429
"""See InterRepository.copy_content."""
197
430
self.fetch(revision_id, pb, find_ghosts=False)
199
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
432
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
433
mapping=None, fetch_spec=None):
434
self.fetch_refs(revision_id=revision_id, pb=pb,
435
find_ghosts=find_ghosts, mapping=mapping, fetch_spec=fetch_spec)
438
class InterGitNonGitRepository(InterGitRepository):
439
"""Base InterRepository that copies revisions from a Git into a non-Git
442
def fetch_refs(self, revision_id=None, pb=None, find_ghosts=False,
443
mapping=None, fetch_spec=None):
201
444
if mapping is None:
202
445
mapping = self.source.get_mapping()
204
pb.note("git: %s", text)
205
def determine_wants(heads):
206
if revision_id is None:
446
if revision_id is not None:
447
interesting_heads = [revision_id]
448
elif fetch_spec is not None:
449
interesting_heads = fetch_spec.heads
451
interesting_heads = None
453
def determine_wants(refs):
455
if interesting_heads is None:
456
ret = [sha for (ref, sha) in refs.iteritems() if not ref.endswith("^{}")]
209
ret = [mapping.revision_id_bzr_to_foreign(revision_id)]
458
ret = [mapping.revision_id_bzr_to_foreign(revid)[0] for revid in interesting_heads if revid not in (None, NULL_REVISION)]
210
459
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
211
graph_walker = BzrFetchGraphWalker(self.target, mapping)
460
(pack_hint, _) = self.fetch_objects(determine_wants, mapping, pb)
461
if pack_hint is not None and self.target._format.pack_compresses:
462
self.target.pack(hint=pack_hint)
463
if interesting_heads is not None:
464
present_interesting_heads = self.target.has_revisions(interesting_heads)
465
missing_interesting_heads = set(interesting_heads) - present_interesting_heads
466
if missing_interesting_heads:
467
raise AssertionError("Missing interesting heads: %r" % missing_interesting_heads)
471
_GIT_PROGRESS_RE = re.compile(r"(.*?): +(\d+)% \((\d+)/(\d+)\)")
472
def report_git_progress(pb, text):
473
text = text.rstrip("\r\n")
474
g = _GIT_PROGRESS_RE.match(text)
476
(text, pct, current, total) = g.groups()
477
pb.update(text, int(current), int(total))
479
pb.update(text, 0, 0)
482
class InterRemoteGitNonGitRepository(InterGitNonGitRepository):
483
"""InterRepository that copies revisions from a remote Git into a non-Git
486
def get_target_heads(self):
487
# FIXME: This should be more efficient
488
all_revs = self.target.all_revision_ids()
489
parent_map = self.target.get_parent_map(all_revs)
491
map(all_parents.update, parent_map.itervalues())
492
return set(all_revs) - all_parents
494
def fetch_objects(self, determine_wants, mapping, pb=None, limit=None):
496
report_git_progress(pb, text)
497
store = BazaarObjectStore(self.target, mapping)
498
self.target.lock_write()
500
heads = self.get_target_heads()
501
graph_walker = store.get_graph_walker(
502
[store._lookup_revision_sha1(head) for head in heads])
505
def record_determine_wants(heads):
506
wants = determine_wants(heads)
507
recorded_wants.extend(wants)
512
create_pb = pb = ui.ui_factory.nested_progress_bar()
514
objects_iter = self.source.fetch_objects(
515
record_determine_wants, graph_walker,
516
store.get_raw, progress)
517
return import_git_objects(self.target, mapping,
518
objects_iter, store, recorded_wants, pb, limit)
526
def is_compatible(source, target):
527
"""Be compatible with GitRepository."""
528
# FIXME: Also check target uses VersionedFile
529
return (isinstance(source, RemoteGitRepository) and
530
target.supports_rich_root() and
531
not isinstance(target, GitRepository))
534
class InterLocalGitNonGitRepository(InterGitNonGitRepository):
535
"""InterRepository that copies revisions from a local Git into a non-Git
538
def fetch_objects(self, determine_wants, mapping, pb=None, limit=None):
541
wants = determine_wants(self.source._git.get_refs())
214
544
create_pb = pb = ui.ui_factory.nested_progress_bar()
545
target_git_object_retriever = BazaarObjectStore(self.target, mapping)
216
547
self.target.lock_write()
218
self.target.start_write_group()
220
import_git_objects(self.target, mapping,
221
iter(self.source.fetch_objects(determine_wants, graph_walker,
224
self.target.commit_write_group()
549
return import_git_objects(self.target, mapping,
550
self.source._git.object_store,
551
target_git_object_retriever, wants, pb, limit)
226
553
self.target.unlock()