14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from dulwich.objects import (
23
from dulwich.object_store import (
26
from itertools import (
39
from bzrlib.errors import (
43
from bzrlib.inventory import (
50
from bzrlib.repository import (
53
from bzrlib.revision import (
56
from bzrlib.tsort import (
59
from bzrlib.versionedfile import (
60
ChunkedContentFactory,
63
from bzrlib.plugins.git.mapping import (
65
inventory_to_tree_and_blobs,
71
from bzrlib.plugins.git.object_store import (
75
from bzrlib.plugins.git.remote import (
17
from bzrlib import osutils, ui, urlutils
18
from bzrlib.errors import InvalidRevisionId
19
from bzrlib.inventory import Inventory
20
from bzrlib.repository import InterRepository
21
from bzrlib.trace import info
22
from bzrlib.tsort import topo_sort
78
24
from bzrlib.plugins.git.repository import (
85
def import_git_blob(texts, mapping, path, name, (base_hexsha, hexsha),
86
base_inv, parent_id, revision_id,
87
parent_invs, lookup_object, (base_mode, mode), store_updater):
29
from bzrlib.plugins.git.remote import RemoteGitRepository
32
from dulwich.client import SimpleFetchGraphWalker
33
from dulwich.objects import Commit
35
from cStringIO import StringIO
38
class BzrFetchGraphWalker(object):
39
"""GraphWalker implementation that uses a Bazaar repository."""
41
def __init__(self, repository, mapping):
42
self.repository = repository
43
self.mapping = mapping
45
self.heads = set(repository.all_revision_ids())
49
return iter(self.next, None)
52
revid = self.mapping.revision_id_foreign_to_bzr(sha)
55
def remove(self, revid):
57
if revid in self.heads:
58
self.heads.remove(revid)
59
if revid in self.parents:
60
for p in self.parents[revid]:
65
ret = self.heads.pop()
66
ps = self.repository.get_parent_map([ret])[ret]
67
self.parents[ret] = ps
68
self.heads.update([p for p in ps if not p in self.done])
71
return self.mapping.revision_id_bzr_to_foreign(ret)[0]
72
except InvalidRevisionId:
77
def import_git_blob(repo, mapping, path, blob, inv, parent_invs, executable):
88
78
"""Import a git blob object into a bzr repository.
90
:param texts: VersionedFiles to add to
80
:param repo: bzr repository
91
81
:param path: Path in the tree
92
82
:param blob: A git blob
93
:return: Inventory delta for this file
95
if base_hexsha == hexsha and base_mode == mode:
96
# If nothing has changed since the base revision, we're done
98
file_id = mapping.generate_file_id(path)
99
if stat.S_ISLNK(mode):
103
ie = cls(file_id, name.decode("utf-8"), parent_id)
104
ie.executable = mode_is_executable(mode)
105
if base_hexsha == hexsha and mode_kind(base_mode) == mode_kind(mode):
106
base_ie = base_inv[base_inv.path2id(path)]
107
ie.text_size = base_ie.text_size
108
ie.text_sha1 = base_ie.text_sha1
109
ie.symlink_target = base_ie.symlink_target
110
if ie.executable == base_ie.executable:
111
ie.revision = base_ie.revision
113
blob = lookup_object(hexsha)
115
blob = lookup_object(hexsha)
116
if ie.kind == "symlink":
118
ie.symlink_target = blob.data
122
ie.text_size = sum(imap(len, blob.chunked))
123
ie.text_sha1 = osutils.sha_strings(blob.chunked)
124
# Check what revision we should store
126
for pinv in parent_invs[1:]:
131
if pie.text_sha1 == ie.text_sha1 and pie.executable == ie.executable and pie.symlink_target == ie.symlink_target:
132
# found a revision in one of the parents to use
133
ie.revision = pie.revision
135
parent_keys.append((file_id, pie.revision))
136
if ie.revision is None:
137
# Need to store a new revision
138
ie.revision = revision_id
139
assert ie.revision is not None
140
if ie.kind == 'symlink':
143
chunks = blob.chunked
144
texts.insert_record_stream([ChunkedContentFactory((file_id, ie.revision), tuple(parent_keys), ie.text_sha1, chunks)])
146
if base_hexsha is not None:
147
old_path = path # Renames are not supported yet
148
if stat.S_ISDIR(base_mode):
149
invdelta.extend(remove_disappeared_children(base_inv, old_path,
150
lookup_object(base_hexsha), [], lookup_object))
153
invdelta.append((old_path, path, file_id, ie))
154
if base_hexsha != hexsha:
155
store_updater.add_object(blob, ie)
159
class SubmodulesRequireSubtrees(BzrError):
160
_fmt = """The repository you are fetching from contains submodules. To continue, upgrade your Bazaar repository to a format that supports nested trees, such as 'development-subtree'."""
164
def import_git_submodule(texts, mapping, path, name, (base_hexsha, hexsha),
165
base_inv, parent_id, revision_id, parent_invs, lookup_object,
166
(base_mode, mode), store_updater):
167
if base_hexsha == hexsha and base_mode == mode:
169
file_id = mapping.generate_file_id(path)
170
ie = TreeReference(file_id, name.decode("utf-8"), parent_id)
171
ie.revision = revision_id
172
if base_hexsha is None:
176
ie.reference_revision = mapping.revision_id_foreign_to_bzr(hexsha)
177
texts.insert_record_stream([ChunkedContentFactory((file_id, ie.revision), (), None, [])])
178
invdelta = [(oldpath, path, file_id, ie)]
182
def remove_disappeared_children(base_inv, path, base_tree, existing_children,
185
for name, mode, hexsha in base_tree.iteritems():
186
if name in existing_children:
188
c_path = posixpath.join(path, name.decode("utf-8"))
189
ret.append((c_path, None, base_inv.path2id(c_path), None))
190
if stat.S_ISDIR(mode):
191
ret.extend(remove_disappeared_children(
192
base_inv, c_path, lookup_object(hexsha), [], lookup_object))
196
def import_git_tree(texts, mapping, path, name, (base_hexsha, hexsha),
197
base_inv, parent_id, revision_id, parent_invs,
198
lookup_object, (base_mode, mode), store_updater, allow_submodules=False):
84
file_id = mapping.generate_file_id(path)
85
text_revision = inv.revision_id
86
repo.texts.add_lines((file_id, text_revision),
87
[(file_id, p[file_id].revision) for p in parent_invs if file_id in p],
88
osutils.split_lines(blob.data))
89
ie = inv.add_path(path, "file", file_id)
90
ie.revision = text_revision
91
ie.text_size = len(blob.data)
92
ie.text_sha1 = osutils.sha_string(blob.data)
93
ie.executable = executable
96
def import_git_tree(repo, mapping, path, tree, inv, parent_invs, lookup_object):
199
97
"""Import a git tree object into a bzr repository.
201
:param texts: VersionedFiles object to add to
99
:param repo: A Bzr repository object
202
100
:param path: Path in the tree
203
101
:param tree: A git tree object
204
:param base_inv: Base inventory against which to return inventory delta
205
:return: Inventory delta for this subtree
102
:param inv: Inventory object
207
if base_hexsha == hexsha and base_mode == mode:
208
# If nothing has changed since the base revision, we're done
211
104
file_id = mapping.generate_file_id(path)
212
# We just have to hope this is indeed utf-8:
213
ie = InventoryDirectory(file_id, name.decode("utf-8"), parent_id)
214
tree = lookup_object(hexsha)
215
if base_hexsha is None:
217
old_path = None # Newly appeared here
219
base_tree = lookup_object(base_hexsha)
220
old_path = path # Renames aren't supported yet
221
if base_tree is None or type(base_tree) is not Tree:
222
ie.revision = revision_id
223
invdelta.append((old_path, path, ie.file_id, ie))
224
texts.insert_record_stream([ChunkedContentFactory((ie.file_id, ie.revision), (), None, [])])
225
# Remember for next time
226
existing_children = set()
228
for child_mode, name, child_hexsha in tree.entries():
229
existing_children.add(name)
230
child_path = posixpath.join(path, name)
231
if type(base_tree) is Tree:
233
child_base_mode, child_base_hexsha = base_tree[name]
235
child_base_hexsha = None
238
child_base_hexsha = None
240
if stat.S_ISDIR(child_mode):
241
subinvdelta, grandchildmodes = import_git_tree(
242
texts, mapping, child_path, name,
243
(child_base_hexsha, child_hexsha),
244
base_inv, file_id, revision_id, parent_invs, lookup_object,
245
(child_base_mode, child_mode), store_updater,
246
allow_submodules=allow_submodules)
247
elif S_ISGITLINK(child_mode): # submodule
248
if not allow_submodules:
249
raise SubmodulesRequireSubtrees()
250
subinvdelta, grandchildmodes = import_git_submodule(
251
texts, mapping, child_path, name,
252
(child_base_hexsha, child_hexsha),
253
base_inv, file_id, revision_id, parent_invs, lookup_object,
254
(child_base_mode, child_mode), store_updater)
256
subinvdelta = import_git_blob(texts, mapping,
257
child_path, name, (child_base_hexsha, child_hexsha),
258
base_inv, file_id, revision_id, parent_invs, lookup_object,
259
(child_base_mode, child_mode), store_updater)
261
child_modes.update(grandchildmodes)
262
invdelta.extend(subinvdelta)
263
if child_mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
264
stat.S_IFLNK, DEFAULT_FILE_MODE|0111):
265
child_modes[child_path] = child_mode
266
# Remove any children that have disappeared
267
if base_tree is not None and type(base_tree) is Tree:
268
invdelta.extend(remove_disappeared_children(base_inv, old_path,
269
base_tree, existing_children, lookup_object))
270
store_updater.add_object(tree, ie)
271
return invdelta, child_modes
274
def import_git_commit(repo, mapping, head, lookup_object,
275
target_git_object_retriever, parent_invs_cache):
276
o = lookup_object(head)
277
rev = mapping.import_commit(o)
278
# We have to do this here, since we have to walk the tree and
279
# we need to make sure to import the blobs / trees with the right
280
# path; this may involve adding them more than once.
281
parent_invs = parent_invs_cache.get_inventories(rev.parent_ids)
282
if parent_invs == []:
283
base_inv = Inventory(root_id=None)
287
base_inv = parent_invs[0]
288
base_tree = lookup_object(o.parents[0]).tree
289
base_mode = stat.S_IFDIR
290
store_updater = target_git_object_retriever._get_updater(rev)
291
store_updater.add_object(o, None)
292
inv_delta, unusual_modes = import_git_tree(repo.texts,
293
mapping, "", u"", (base_tree, o.tree), base_inv,
294
None, rev.revision_id, parent_invs, lookup_object,
295
(base_mode, stat.S_IFDIR), store_updater,
296
allow_submodules=getattr(repo._format, "supports_tree_reference", False))
297
store_updater.finish()
298
if unusual_modes != {}:
299
for path, mode in unusual_modes.iteritems():
300
warn_unusual_mode(rev.foreign_revid, path, mode)
301
mapping.import_unusual_file_modes(rev, unusual_modes)
303
basis_id = rev.parent_ids[0]
305
basis_id = NULL_REVISION
307
rev.inventory_sha1, inv = repo.add_inventory_by_delta(basis_id,
308
inv_delta, rev.revision_id, rev.parent_ids,
310
parent_invs_cache.add(rev.revision_id, inv)
311
repo.add_revision(rev.revision_id, rev)
312
if "verify" in debug.debug_flags:
313
new_unusual_modes = mapping.export_unusual_file_modes(rev)
314
if new_unusual_modes != unusual_modes:
315
raise AssertionError("unusual modes don't match: %r != %r" % (unusual_modes, new_unusual_modes))
316
objs = inventory_to_tree_and_blobs(inv, repo.texts, mapping, unusual_modes)
317
for newsha1, newobj, path in objs:
318
assert path is not None
322
(oldmode, oldsha1) = tree_lookup_path(lookup_object, o.tree, path)
323
if oldsha1 != newsha1:
324
raise AssertionError("%r != %r in %s" % (oldsha1, newsha1, path))
327
def import_git_objects(repo, mapping, object_iter,
328
target_git_object_retriever, heads, pb=None, limit=None):
105
text_revision = inv.revision_id
106
repo.texts.add_lines((file_id, text_revision),
107
[(file_id, p[file_id].revision) for p in parent_invs if file_id in p],
109
ie = inv.add_path(path, "directory", file_id)
110
ie.revision = text_revision
111
for mode, name, hexsha in tree.entries():
112
entry_kind = (mode & 0700000) / 0100000
113
basename = name.decode("utf-8")
117
child_path = urlutils.join(path, name)
119
tree = lookup_object(hexsha)
120
import_git_tree(repo, mapping, child_path, tree, inv, parent_invs, lookup_object)
121
elif entry_kind == 1:
122
blob = lookup_object(hexsha)
123
fs_mode = mode & 0777
124
import_git_blob(repo, mapping, child_path, blob, inv, parent_invs, bool(fs_mode & 0111))
126
raise AssertionError("Unknown blob kind, perms=%r." % (mode,))
129
def import_git_objects(repo, mapping, num_objects, object_iter, pb=None):
329
130
"""Import a set of git objects into a bzr repository.
331
:param repo: Target Bazaar repository
132
:param repo: Bazaar repository
332
133
:param mapping: Mapping to use
134
:param num_objects: Number of objects.
333
135
:param object_iter: Iterator over Git objects.
334
:return: Tuple with pack hints and last imported revision id
336
def lookup_object(sha):
338
return object_iter[sha]
340
return target_git_object_retriever[sha]
137
# TODO: a more (memory-)efficient implementation of this
139
for i, (o, _) in enumerate(object_iter):
141
pb.update("fetching objects", i, num_objects)
343
heads = list(set(heads))
344
parent_invs_cache = LRUInventoryCache(repo)
345
146
# Find and convert commit objects
348
pb.update("finding revisions to fetch", len(graph), None)
350
assert isinstance(head, str)
352
o = lookup_object(head)
147
for o in objects.itervalues():
355
148
if isinstance(o, Commit):
356
149
rev = mapping.import_commit(o)
357
if repo.has_revision(rev.revision_id):
359
squash_revision(repo, rev)
360
graph.append((o.id, o.parents))
361
heads.extend([p for p in o.parents if p not in checked])
362
elif isinstance(o, Tag):
363
if o.object[1] not in checked:
364
heads.append(o.object[1])
366
trace.warning("Unable to import head object %r" % o)
150
root_trees[rev.revision_id] = objects[o.tree]
151
revisions[rev.revision_id] = rev
152
graph.append((rev.revision_id, rev.parent_ids))
369
153
# Order the revisions
370
154
# Create the inventory objects
372
revision_ids = topo_sort(graph)
374
if limit is not None:
375
revision_ids = revision_ids[:limit]
377
for offset in range(0, len(revision_ids), batch_size):
378
target_git_object_retriever.start_write_group()
380
repo.start_write_group()
382
for i, head in enumerate(
383
revision_ids[offset:offset+batch_size]):
385
pb.update("fetching revisions", offset+i,
387
import_git_commit(repo, mapping, head, lookup_object,
388
target_git_object_retriever,
392
repo.abort_write_group()
395
hint = repo.commit_write_group()
397
pack_hints.extend(hint)
399
target_git_object_retriever.abort_write_group()
402
target_git_object_retriever.commit_write_group()
403
return pack_hints, last_imported
406
class InterGitRepository(InterRepository):
408
_matching_repo_format = GitRepositoryFormat()
155
for i, revid in enumerate(topo_sort(graph)):
157
pb.update("fetching revisions", i, len(graph))
158
root_tree = root_trees[revid]
159
rev = revisions[revid]
160
# We have to do this here, since we have to walk the tree and
161
# we need to make sure to import the blobs / trees with the riht
162
# path; this may involve adding them more than once.
164
inv.revision_id = rev.revision_id
165
def lookup_object(sha):
168
return reconstruct_git_object(repo, mapping, sha)
169
parent_invs = [repo.get_inventory(r) for r in rev.parent_ids]
170
import_git_tree(repo, mapping, "", root_tree, inv, parent_invs,
172
repo.add_revision(rev.revision_id, rev, inv)
175
def reconstruct_git_commit(repo, rev):
176
raise NotImplementedError(self.reconstruct_git_commit)
179
def reconstruct_git_object(repo, mapping, sha):
181
revid = mapping.revision_id_foreign_to_bzr(sha)
183
rev = repo.get_revision(revid)
184
except NoSuchRevision:
187
return reconstruct_git_commit(rev)
191
raise KeyError("No such object %s" % sha)
194
class InterGitNonGitRepository(InterRepository):
196
_matching_repo_format = GitFormat()
411
199
def _get_repo_format_to_test():
415
203
"""See InterRepository.copy_content."""
416
204
self.fetch(revision_id, pb, find_ghosts=False)
418
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
419
mapping=None, fetch_spec=None):
420
self.fetch_refs(revision_id=revision_id, pb=pb,
421
find_ghosts=find_ghosts, mapping=mapping, fetch_spec=fetch_spec)
424
class InterGitNonGitRepository(InterGitRepository):
425
"""Base InterRepository that copies revisions from a Git into a non-Git
428
def fetch_refs(self, revision_id=None, pb=None, find_ghosts=False,
429
mapping=None, fetch_spec=None):
206
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
430
208
if mapping is None:
431
209
mapping = self.source.get_mapping()
432
if revision_id is not None:
433
interesting_heads = [revision_id]
434
elif fetch_spec is not None:
435
interesting_heads = fetch_spec.heads
437
interesting_heads = None
439
def determine_wants(refs):
441
if interesting_heads is None:
442
ret = [sha for (ref, sha) in refs.iteritems() if not ref.endswith("^{}")]
211
pb.update("git: %s" % text.rstrip("\r\n"), 0, 0)
212
def determine_wants(heads):
213
if revision_id is None:
444
ret = [mapping.revision_id_bzr_to_foreign(revid)[0] for revid in interesting_heads if revid not in (None, NULL_REVISION)]
216
ret = [mapping.revision_id_bzr_to_foreign(revision_id)[0]]
445
217
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
446
(pack_hint, _) = self.fetch_objects(determine_wants, mapping, pb)
447
if pack_hint is not None and self.target._format.pack_compresses:
448
self.target.pack(hint=pack_hint)
449
if interesting_heads is not None:
450
present_interesting_heads = self.target.has_revisions(interesting_heads)
451
missing_interesting_heads = set(interesting_heads) - present_interesting_heads
452
if missing_interesting_heads:
453
raise AssertionError("Missing interesting heads: %r" % missing_interesting_heads)
457
_GIT_PROGRESS_RE = re.compile(r"(.*?): +(\d+)% \((\d+)/(\d+)\)")
458
def report_git_progress(pb, text):
459
text = text.rstrip("\r\n")
460
g = _GIT_PROGRESS_RE.match(text)
462
(text, pct, current, total) = g.groups()
463
pb.update(text, int(current), int(total))
465
pb.update(text, 0, 0)
468
class InterRemoteGitNonGitRepository(InterGitNonGitRepository):
469
"""InterRepository that copies revisions from a remote Git into a non-Git
472
def get_target_heads(self):
473
# FIXME: This should be more efficient
474
all_revs = self.target.all_revision_ids()
475
parent_map = self.target.get_parent_map(all_revs)
477
map(all_parents.update, parent_map.itervalues())
478
return set(all_revs) - all_parents
480
def fetch_objects(self, determine_wants, mapping, pb=None, limit=None):
482
report_git_progress(pb, text)
483
store = BazaarObjectStore(self.target, mapping)
484
self.target.lock_write()
486
heads = self.get_target_heads()
487
graph_walker = store.get_graph_walker(
488
[store._lookup_revision_sha1(head) for head in heads])
491
def record_determine_wants(heads):
492
wants = determine_wants(heads)
493
recorded_wants.extend(wants)
498
create_pb = pb = ui.ui_factory.nested_progress_bar()
500
objects_iter = self.source.fetch_objects(
501
record_determine_wants, graph_walker,
502
store.get_raw, progress)
503
return import_git_objects(self.target, mapping,
504
objects_iter, store, recorded_wants, pb, limit)
512
def is_compatible(source, target):
513
"""Be compatible with GitRepository."""
514
# FIXME: Also check target uses VersionedFile
515
return (isinstance(source, RemoteGitRepository) and
516
target.supports_rich_root() and
517
not isinstance(target, GitRepository))
520
class InterLocalGitNonGitRepository(InterGitNonGitRepository):
521
"""InterRepository that copies revisions from a local Git into a non-Git
524
def fetch_objects(self, determine_wants, mapping, pb=None, limit=None):
527
wants = determine_wants(self.source._git.get_refs())
218
graph_walker = BzrFetchGraphWalker(self.target, mapping)
530
221
create_pb = pb = ui.ui_factory.nested_progress_bar()
531
target_git_object_retriever = BazaarObjectStore(self.target, mapping)
533
223
self.target.lock_write()
535
return import_git_objects(self.target, mapping,
536
self.source._git.object_store,
537
target_git_object_retriever, wants, pb, limit)
225
self.target.start_write_group()
227
(num_objects, objects_iter) = \
228
self.source.fetch_objects(determine_wants,
229
graph_walker, progress)
230
import_git_objects(self.target, mapping, num_objects,
233
self.target.commit_write_group()
539
235
self.target.unlock()