14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib import osutils
18
from bzrlib.errors import InvalidRevisionId
19
from bzrlib.inventory import Inventory
20
from bzrlib.repository import InterRepository
21
from bzrlib.trace import info
23
from bzrlib.plugins.git import git
24
from bzrlib.plugins.git.repository import LocalGitRepository, GitRepository, GitFormat
25
from bzrlib.plugins.git.remote import RemoteGitRepository
27
from dulwich.objects import Commit
29
from cStringIO import StringIO
32
class BzrFetchGraphWalker(object):
34
def __init__(self, repository, mapping):
35
self.repository = repository
36
self.mapping = mapping
38
self.heads = set(repository.all_revision_ids())
42
revid = self.mapping.revision_id_foreign_to_bzr(sha)
45
def remove(self, revid):
48
self.heads.remove(revid)
49
if revid in self.parents:
50
for p in self.parents[revid]:
55
ret = self.heads.pop()
56
ps = self.repository.get_parent_map([ret])[ret]
57
self.parents[ret] = ps
58
self.heads.update([p for p in ps if not p in self.done])
61
return self.mapping.revision_id_bzr_to_foreign(ret)
62
except InvalidRevisionId:
67
def import_git_blob(repo, mapping, path, blob):
17
from dulwich.objects import (
22
from dulwich.object_store import (
35
from bzrlib.errors import (
39
from bzrlib.inventory import (
46
from bzrlib.repository import (
49
from bzrlib.revision import (
52
from bzrlib.tsort import (
55
from bzrlib.versionedfile import (
56
FulltextContentFactory,
59
from bzrlib.plugins.git.mapping import (
61
inventory_to_tree_and_blobs,
66
from bzrlib.plugins.git.object_store import (
70
from bzrlib.plugins.git.remote import (
73
from bzrlib.plugins.git.repository import (
80
def import_git_blob(texts, mapping, path, hexsha, base_inv, base_inv_shamap,
81
base_ie, parent_id, revision_id, parent_invs, lookup_object,
68
83
"""Import a git blob object into a bzr repository.
70
:param repo: bzr repository
85
:param texts: VersionedFiles to add to
71
86
:param path: Path in the tree
72
87
:param blob: A git blob
88
:return: Inventory delta for this file
74
90
file_id = mapping.generate_file_id(path)
75
repo.texts.add_lines((file_id, blob.id),
77
osutils.split_lines(blob.data))
78
inv.add_path(path, "file", file_id)
81
def import_git_tree(repo, mapping, path, tree, inv, lookup_object):
95
# We just have to hope this is indeed utf-8:
96
ie = cls(file_id, urlutils.basename(path).decode("utf-8"), parent_id)
97
ie.executable = executable
98
# See if this has changed at all
103
base_sha = base_inv_shamap.lookup_blob(file_id, base_ie.revision)
107
if (base_sha == hexsha and base_ie.executable == ie.executable
108
and base_ie.kind == ie.kind):
109
# If nothing has changed since the base revision, we're done
111
if base_sha == hexsha and base_ie.kind == ie.kind:
112
ie.text_size = base_ie.text_size
113
ie.text_sha1 = base_ie.text_sha1
114
ie.symlink_target = base_ie.symlink_target
115
if ie.executable == base_ie.executable:
116
ie.revision = base_ie.revision
118
blob = lookup_object(hexsha)
120
blob = lookup_object(hexsha)
121
if ie.kind == "symlink":
123
ie.symlink_target = blob.data
127
ie.text_size = len(blob.data)
128
ie.text_sha1 = osutils.sha_string(blob.data)
129
# Check what revision we should store
131
for pinv in parent_invs:
132
if pinv.revision_id == base_inv.revision_id:
141
if pie.text_sha1 == ie.text_sha1 and pie.executable == ie.executable and pie.symlink_target == ie.symlink_target:
142
# found a revision in one of the parents to use
143
ie.revision = pie.revision
145
parent_keys.append((file_id, pie.revision))
146
if ie.revision is None:
147
# Need to store a new revision
148
ie.revision = revision_id
149
assert file_id is not None
150
assert ie.revision is not None
151
if ie.kind == 'symlink':
155
texts.insert_record_stream([FulltextContentFactory((file_id, ie.revision), tuple(parent_keys), ie.text_sha1, data)])
156
shamap = { ie.file_id: hexsha }
158
if base_ie is not None:
159
old_path = base_inv.id2path(file_id)
160
if base_ie.kind == "directory":
161
invdelta.extend(remove_disappeared_children(old_path, base_ie.children, []))
164
invdelta.append((old_path, path, file_id, ie))
165
return (invdelta, shamap)
168
class SubmodulesRequireSubtrees(BzrError):
169
_fmt = """The repository you are fetching from contains submodules. To continue, upgrade your Bazaar repository to a format that supports nested trees, such as 'development-subtree'."""
173
def import_git_submodule(texts, mapping, path, hexsha, base_inv, base_ie,
174
parent_id, revision_id, parent_invs, lookup_object):
175
file_id = mapping.generate_file_id(path)
176
ie = TreeReference(file_id, urlutils.basename(path.decode("utf-8")),
178
ie.revision = revision_id
183
if (base_ie.kind == ie.kind and
184
base_ie.reference_revision == ie.reference_revision):
185
ie.revision = base_ie.revision
186
ie.reference_revision = mapping.revision_id_foreign_to_bzr(hexsha)
187
texts.insert_record_stream([FulltextContentFactory((file_id, ie.revision), (), None, "")])
188
invdelta = [(oldpath, path, file_id, ie)]
189
return invdelta, {}, {}
192
def remove_disappeared_children(path, base_children, existing_children):
194
deletable = [(osutils.pathjoin(path, k), v) for k,v in base_children.iteritems() if k not in existing_children]
196
(path, ie) = deletable.pop()
197
ret.append((path, None, ie.file_id, None))
198
if ie.kind == "directory":
199
for name, child_ie in ie.children.iteritems():
200
deletable.append((osutils.pathjoin(path, name), child_ie))
204
def import_git_tree(texts, mapping, path, hexsha, base_inv, base_inv_shamap,
205
base_ie, parent_id, revision_id, parent_invs, lookup_object,
206
allow_submodules=False):
82
207
"""Import a git tree object into a bzr repository.
84
:param repo: A Bzr repository object
209
:param texts: VersionedFiles object to add to
85
210
:param path: Path in the tree
86
211
:param tree: A git tree object
87
:param inv: Inventory object
212
:param base_inv: Base inventory against which to return inventory delta
213
:return: Inventory delta for this subtree
89
216
file_id = mapping.generate_file_id(path)
90
repo.texts.add_lines((file_id, tree.id),
93
inv.add_path(path, "directory", file_id)
94
for mode, name, hexsha in tree.entries():
95
entry_kind = (mode & 0700000) / 0100000
217
# We just have to hope this is indeed utf-8:
218
ie = InventoryDirectory(file_id, urlutils.basename(path.decode("utf-8")),
221
# Newly appeared here
222
ie.revision = revision_id
223
texts.insert_record_stream([FulltextContentFactory((file_id, ie.revision), (), None, "")])
224
invdelta.append((None, path, file_id, ie))
226
# See if this has changed at all
228
base_sha = base_inv_shamap.lookup_tree(file_id)
229
except (KeyError, NotImplementedError):
232
if base_sha == hexsha:
233
# If nothing has changed since the base revision, we're done
235
if base_ie.kind != "directory":
236
ie.revision = revision_id
237
texts.insert_record_stream([FulltextContentFactory((ie.file_id, ie.revision), (), None, "")])
238
invdelta.append((base_inv.id2path(ie.file_id), path, ie.file_id, ie))
239
if base_ie is not None and base_ie.kind == "directory":
240
base_children = base_ie.children
243
# Remember for next time
244
existing_children = set()
247
tree = lookup_object(hexsha)
248
for mode, name, child_hexsha in tree.entries():
96
249
basename = name.decode("utf-8")
100
child_path = urlutils.join(path, name)
102
import_git_tree(repo, mapping, child_path, lookup_object, inv)
103
elif entry_kind == 1:
104
import_git_blob(repo, mapping, child_path, lookup_object, inv)
106
raise AssertionError("Unknown blob kind, perms=%r." % (mode,))
109
def import_git_objects(repo, mapping, object_iter):
250
existing_children.add(basename)
251
child_path = osutils.pathjoin(path, name)
252
if stat.S_ISDIR(mode):
253
subinvdelta, grandchildmodes, subshamap = import_git_tree(
254
texts, mapping, child_path, child_hexsha, base_inv,
255
base_inv_shamap, base_children.get(basename), file_id,
256
revision_id, parent_invs, lookup_object,
257
allow_submodules=allow_submodules)
258
elif S_ISGITLINK(mode): # submodule
259
if not allow_submodules:
260
raise SubmodulesRequireSubtrees()
261
subinvdelta, grandchildmodes, subshamap = import_git_submodule(
262
texts, mapping, child_path, child_hexsha, base_inv, base_children.get(basename),
263
file_id, revision_id, parent_invs, lookup_object)
265
subinvdelta, subshamap = import_git_blob(texts, mapping,
266
child_path, child_hexsha, base_inv, base_inv_shamap,
267
base_children.get(basename), file_id,
268
revision_id, parent_invs, lookup_object,
269
mode_is_executable(mode), stat.S_ISLNK(mode))
271
child_modes.update(grandchildmodes)
272
invdelta.extend(subinvdelta)
273
shamap.update(subshamap)
274
if mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
275
stat.S_IFLNK, DEFAULT_FILE_MODE|0111):
276
child_modes[child_path] = mode
277
# Remove any children that have disappeared
278
if base_ie is not None and base_ie.kind == "directory":
279
invdelta.extend(remove_disappeared_children(base_inv.id2path(file_id),
280
base_children, existing_children))
281
shamap[file_id] = hexsha
282
return invdelta, child_modes, shamap
285
def import_git_commit(repo, mapping, head, lookup_object,
286
target_git_object_retriever, parent_invs_cache):
287
o = lookup_object(head)
288
rev = mapping.import_commit(o)
289
# We have to do this here, since we have to walk the tree and
290
# we need to make sure to import the blobs / trees with the right
291
# path; this may involve adding them more than once.
292
parent_invs = parent_invs_cache.get_inventories(rev.parent_ids)
293
if parent_invs == []:
294
base_inv = Inventory(root_id=None)
296
base_inv_shamap = None # Should never be accessed
298
base_inv = parent_invs[0]
299
base_ie = base_inv.root
300
base_inv_shamap = target_git_object_retriever._idmap.get_inventory_sha_map(base_inv.revision_id)
301
inv_delta, unusual_modes, shamap = import_git_tree(repo.texts,
302
mapping, "", o.tree, base_inv, base_inv_shamap, base_ie, None,
303
rev.revision_id, parent_invs, lookup_object,
304
allow_submodules=getattr(repo._format, "supports_tree_reference", False))
306
for (oldpath, newpath, fileid, new_ie) in inv_delta:
308
entries.append((fileid, None, None, None))
310
if new_ie.kind in ("file", "symlink"):
311
entries.append((fileid, "blob", shamap[fileid], new_ie.revision))
312
elif new_ie.kind == "directory":
313
entries.append((fileid, "tree", shamap[fileid], rev.revision_id))
316
target_git_object_retriever._idmap.add_entries(rev.revision_id,
317
rev.parent_ids, head, o.tree, entries)
318
if unusual_modes != {}:
319
for path, mode in unusual_modes.iteritems():
320
warn_unusual_mode(rev.foreign_revid, path, mode)
321
mapping.import_unusual_file_modes(rev, unusual_modes)
323
basis_id = rev.parent_ids[0]
325
basis_id = NULL_REVISION
327
rev.inventory_sha1, inv = repo.add_inventory_by_delta(basis_id,
328
inv_delta, rev.revision_id, rev.parent_ids,
330
parent_invs_cache.add(rev.revision_id, inv)
331
repo.add_revision(rev.revision_id, rev)
332
if "verify" in debug.debug_flags:
333
new_unusual_modes = mapping.export_unusual_file_modes(rev)
334
if new_unusual_modes != unusual_modes:
335
raise AssertionError("unusual modes don't match: %r != %r" % (unusual_modes, new_unusual_modes))
336
objs = inventory_to_tree_and_blobs(inv, repo.texts, mapping, unusual_modes)
337
for newsha1, newobj, path in objs:
338
assert path is not None
342
(oldmode, oldsha1) = tree_lookup_path(lookup_object, o.tree, path)
343
if oldsha1 != newsha1:
344
raise AssertionError("%r != %r in %s" % (oldsha1, newsha1, path))
347
def import_git_objects(repo, mapping, object_iter,
348
target_git_object_retriever, heads, pb=None, limit=None):
110
349
"""Import a set of git objects into a bzr repository.
112
:param repo: Bazaar repository
351
:param repo: Target Bazaar repository
113
352
:param mapping: Mapping to use
114
353
:param object_iter: Iterator over Git objects.
354
:return: Tuple with pack hints and last imported revision id
356
def lookup_object(sha):
358
return object_iter[sha]
360
return target_git_object_retriever[sha]
116
361
# TODO: a more (memory-)efficient implementation of this
118
for o in object_iter:
364
heads = list(set(heads))
365
parent_invs_cache = LRUInventoryCache(repo)
366
target_git_object_retriever.start_write_group() # FIXME: try/finally
121
367
# Find and convert commit objects
122
for o in objects.iterkeys():
370
pb.update("finding revisions to fetch", len(graph), None)
372
assert isinstance(head, str)
374
o = lookup_object(head)
123
377
if isinstance(o, Commit):
124
378
rev = mapping.import_commit(o)
125
root_trees[rev] = objects[o.tree_sha]
379
if repo.has_revision(rev.revision_id):
381
squash_revision(repo, rev)
382
graph.append((o.id, o.parents))
383
heads.extend([p for p in o.parents if p not in checked])
384
elif isinstance(o, Tag):
385
if o.object[1] not in checked:
386
heads.append(o.object[1])
388
trace.warning("Unable to import head object %r" % o)
391
# Order the revisions
126
392
# Create the inventory objects
127
for rev, root_tree in root_trees.iteritems():
128
# We have to do this here, since we have to walk the tree and
129
# we need to make sure to import the blobs / trees with the riht
130
# path; this may involve adding them more than once.
132
def lookup_object(sha):
135
return reconstruct_git_object(repo, mapping, sha)
136
import_git_tree(repo, mapping, "", tree, inv, lookup_object)
137
repo.add_revision(rev.revision_id, rev, inv)
140
def reconstruct_git_commit(repo, rev):
141
raise NotImplementedError(self.reconstruct_git_commit)
144
def reconstruct_git_object(repo, mapping, sha):
146
revid = mapping.revision_id_foreign_to_bzr(sha)
148
rev = repo.get_revision(revid)
149
except NoSuchRevision:
152
return reconstruct_git_commit(rev)
156
raise KeyError("No such object %s" % sha)
394
revision_ids = topo_sort(graph)
396
if limit is not None:
397
revision_ids = revision_ids[:limit]
399
for offset in range(0, len(revision_ids), batch_size):
400
repo.start_write_group()
402
for i, head in enumerate(revision_ids[offset:offset+batch_size]):
404
pb.update("fetching revisions", offset+i, len(revision_ids))
405
import_git_commit(repo, mapping, head, lookup_object,
406
target_git_object_retriever,
410
repo.abort_write_group()
413
hint = repo.commit_write_group()
415
pack_hints.extend(hint)
416
target_git_object_retriever.commit_write_group()
417
return pack_hints, last_imported
159
420
class InterGitRepository(InterRepository):
161
_matching_repo_format = GitFormat()
422
_matching_repo_format = GitRepositoryFormat()
164
425
def _get_repo_format_to_test():
168
429
"""See InterRepository.copy_content."""
169
430
self.fetch(revision_id, pb, find_ghosts=False)
171
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
432
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
433
mapping=None, fetch_spec=None):
434
self.fetch_refs(revision_id=revision_id, pb=pb,
435
find_ghosts=find_ghosts, mapping=mapping, fetch_spec=fetch_spec)
438
class InterGitNonGitRepository(InterGitRepository):
439
"""Base InterRepository that copies revisions from a Git into a non-Git
442
def fetch_refs(self, revision_id=None, pb=None, find_ghosts=False,
443
mapping=None, fetch_spec=None):
173
444
if mapping is None:
174
445
mapping = self.source.get_mapping()
177
pb.note("git: %s" % text)
179
info("git: %s" % text)
180
def determine_wants(heads):
181
if revision_id is None:
184
ret = [mapping.revision_id_bzr_to_foreign(revision_id)]
446
if revision_id is not None:
447
interesting_heads = [revision_id]
448
elif fetch_spec is not None:
449
interesting_heads = fetch_spec.heads
451
interesting_heads = None
453
def determine_wants(refs):
455
if interesting_heads is None:
456
ret = [sha for (ref, sha) in refs.iteritems() if not ref.endswith("^{}")]
458
ret = [mapping.revision_id_bzr_to_foreign(revid)[0] for revid in interesting_heads if revid not in (None, NULL_REVISION)]
185
459
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
186
graph_walker = BzrFetchGraphWalker(self.target, mapping)
460
(pack_hint, _) = self.fetch_objects(determine_wants, mapping, pb)
461
if pack_hint is not None and self.target._format.pack_compresses:
462
self.target.pack(hint=pack_hint)
463
if interesting_heads is not None:
464
present_interesting_heads = self.target.has_revisions(interesting_heads)
465
missing_interesting_heads = set(interesting_heads) - present_interesting_heads
466
if missing_interesting_heads:
467
raise AssertionError("Missing interesting heads: %r" % missing_interesting_heads)
471
_GIT_PROGRESS_RE = re.compile(r"(.*?): +(\d+)% \((\d+)/(\d+)\)")
472
def report_git_progress(pb, text):
473
text = text.rstrip("\r\n")
474
g = _GIT_PROGRESS_RE.match(text)
476
(text, pct, current, total) = g.groups()
477
pb.update(text, int(current), int(total))
479
pb.update(text, 0, 0)
482
class InterRemoteGitNonGitRepository(InterGitNonGitRepository):
483
"""InterRepository that copies revisions from a remote Git into a non-Git
486
def get_target_heads(self):
487
# FIXME: This should be more efficient
488
all_revs = self.target.all_revision_ids()
489
parent_map = self.target.get_parent_map(all_revs)
491
map(all_parents.update, parent_map.itervalues())
492
return set(all_revs) - all_parents
494
def fetch_objects(self, determine_wants, mapping, pb=None, limit=None):
496
report_git_progress(pb, text)
497
store = BazaarObjectStore(self.target, mapping)
187
498
self.target.lock_write()
189
import_git_objects(self.target, mapping,
190
self.source.fetch_objects(determine_wants, graph_walker,
500
heads = self.get_target_heads()
501
graph_walker = store.get_graph_walker(
502
[store._lookup_revision_sha1(head) for head in heads])
505
def record_determine_wants(heads):
506
wants = determine_wants(heads)
507
recorded_wants.extend(wants)
512
create_pb = pb = ui.ui_factory.nested_progress_bar()
514
objects_iter = self.source.fetch_objects(
515
record_determine_wants, graph_walker,
516
store.get_raw, progress)
517
return import_git_objects(self.target, mapping,
518
objects_iter, store, recorded_wants, pb, limit)
193
523
self.target.unlock()
196
526
def is_compatible(source, target):
197
527
"""Be compatible with GitRepository."""
198
528
# FIXME: Also check target uses VersionedFile
199
return (isinstance(source, LocalGitRepository) and
200
target.supports_rich_root())
529
return (isinstance(source, RemoteGitRepository) and
530
target.supports_rich_root() and
531
not isinstance(target, GitRepository))
534
class InterLocalGitNonGitRepository(InterGitNonGitRepository):
535
"""InterRepository that copies revisions from a local Git into a non-Git
538
def fetch_objects(self, determine_wants, mapping, pb=None, limit=None):
541
wants = determine_wants(self.source._git.get_refs())
544
create_pb = pb = ui.ui_factory.nested_progress_bar()
545
target_git_object_retriever = BazaarObjectStore(self.target, mapping)
547
self.target.lock_write()
549
return import_git_objects(self.target, mapping,
550
self.source._git.object_store,
551
target_git_object_retriever, wants, pb, limit)
559
def is_compatible(source, target):
560
"""Be compatible with GitRepository."""
561
# FIXME: Also check target uses VersionedFile
562
return (isinstance(source, LocalGitRepository) and
563
target.supports_rich_root() and
564
not isinstance(target, GitRepository))
567
class InterGitGitRepository(InterGitRepository):
568
"""InterRepository that copies between Git repositories."""
570
def fetch_objects(self, determine_wants, mapping, pb=None):
572
trace.note("git: %s", text)
573
graphwalker = self.target._git.get_graph_walker()
574
if (isinstance(self.source, LocalGitRepository) and
575
isinstance(self.target, LocalGitRepository)):
576
return self.source._git.fetch(self.target._git, determine_wants,
578
elif (isinstance(self.source, LocalGitRepository) and
579
isinstance(self.target, RemoteGitRepository)):
580
raise NotImplementedError
581
elif (isinstance(self.source, RemoteGitRepository) and
582
isinstance(self.target, LocalGitRepository)):
583
f, commit = self.target._git.object_store.add_thin_pack()
585
refs = self.source._git.fetch_pack(determine_wants,
586
graphwalker, f.write, progress)
595
def fetch_refs(self, revision_id=None, pb=None, find_ghosts=False,
596
mapping=None, fetch_spec=None, branches=None):
598
mapping = self.source.get_mapping()
600
if revision_id is not None:
601
args = [mapping.revision_id_bzr_to_foreign(revision_id)[0]]
602
elif fetch_spec is not None:
603
args = [mapping.revision_id_bzr_to_foreign(revid)[0] for revid in fetch_spec.heads]
604
if branches is not None:
605
determine_wants = lambda x: [x[y] for y in branches if not x[y] in r.object_store]
606
elif fetch_spec is None and revision_id is None:
607
determine_wants = r.object_store.determine_wants_all
609
determine_wants = lambda x: [y for y in args if not y in r.object_store]
610
return self.fetch_objects(determine_wants, mapping)[0]
614
def is_compatible(source, target):
615
"""Be compatible with GitRepository."""
616
return (isinstance(source, GitRepository) and
617
isinstance(target, GitRepository))