13
15
# You should have received a copy of the GNU General Public License
14
16
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
19
"""Converters, etc for going between Bazaar and Git ids."""
19
from bzrlib import errors, foreign, urlutils
20
from bzrlib.inventory import ROOT_ID
21
from bzrlib.foreign import (
21
from __future__ import absolute_import
33
from ..foreign import (
38
from ..revision import (
49
from .roundtrip import (
56
DEFAULT_FILE_MODE = stat.S_IFREG | 0o644
57
HG_RENAME_SOURCE = b"HG:rename-source"
58
HG_EXTRA = b"HG:extra"
60
# This HG extra is used to indicate the commit that this commit was based on.
61
HG_EXTRA_AMEND_SOURCE = b"amend_source"
63
FILE_ID_PREFIX = b'git:'
66
ROOT_ID = b"TREE_ROOT"
69
class UnknownCommitExtra(errors.BzrError):
70
_fmt = "Unknown extra fields in %(object)r: %(fields)r."
72
def __init__(self, object, fields):
73
errors.BzrError.__init__(self)
75
self.fields = ",".join(fields)
78
class UnknownMercurialCommitExtra(errors.BzrError):
79
_fmt = "Unknown mercurial extra fields in %(object)r: %(fields)r."
81
def __init__(self, object, fields):
82
errors.BzrError.__init__(self)
84
self.fields = b",".join(fields)
27
87
def escape_file_id(file_id):
28
return file_id.replace('_', '__').replace(' ', '_s')
88
file_id = file_id.replace(b'_', b'__')
89
file_id = file_id.replace(b' ', b'_s')
90
file_id = file_id.replace(b'\x0c', b'_c')
31
94
def unescape_file_id(file_id):
32
return file_id.replace("_s", " ").replace("__", "_")
97
while i < len(file_id):
98
if file_id[i:i + 1] != b'_':
99
ret.append(file_id[i])
101
if file_id[i + 1:i + 2] == b'_':
103
elif file_id[i + 1:i + 2] == b's':
105
elif file_id[i + 1:i + 2] == b'c':
106
ret.append(b"\x0c"[0])
108
raise ValueError("unknown escape character %s" %
109
file_id[i + 1:i + 2])
115
def fix_person_identifier(text):
116
if b"<" not in text and b">" not in text:
120
if text.rindex(b">") < text.rindex(b"<"):
121
raise ValueError(text)
122
username, email = text.split(b"<", 2)[-2:]
123
email = email.split(b">", 1)[0]
124
if username.endswith(b" "):
125
username = username[:-1]
126
return b"%s <%s>" % (username, email)
129
def warn_escaped(commit, num_escaped):
130
trace.warning("Escaped %d XML-invalid characters in %s. Will be unable "
131
"to regenerate the SHA map.", num_escaped, commit)
134
def warn_unusual_mode(commit, path, mode):
135
trace.mutter("Unusual file mode %o for %s in %s. Storing as revision "
136
"property. ", mode, path, commit)
35
139
class BzrGitMapping(foreign.VcsMapping):
36
140
"""Class that maps between Git and Bazaar semantics."""
37
141
experimental = False
143
BZR_DUMMY_FILE = None
145
def is_special_file(self, filename):
146
return (filename in (self.BZR_DUMMY_FILE, ))
39
148
def __init__(self):
40
super(BzrGitMapping, self).__init__(foreign_git)
149
super(BzrGitMapping, self).__init__(foreign_vcs_git)
42
151
def __eq__(self, other):
43
return type(self) == type(other) and self.revid_prefix == other.revid_prefix
152
return (type(self) == type(other)
153
and self.revid_prefix == other.revid_prefix)
46
156
def revision_id_foreign_to_bzr(cls, git_rev_id):
47
157
"""Convert a git revision id handle to a Bazaar revision id."""
48
return "%s:%s" % (cls.revid_prefix, git_rev_id)
158
from dulwich.protocol import ZERO_SHA
159
if git_rev_id == ZERO_SHA:
161
return b"%s:%s" % (cls.revid_prefix, git_rev_id)
51
164
def revision_id_bzr_to_foreign(cls, bzr_rev_id):
52
165
"""Convert a Bazaar revision id to a git revision id handle."""
53
if not bzr_rev_id.startswith("%s:" % cls.revid_prefix):
166
if not bzr_rev_id.startswith(b"%s:" % cls.revid_prefix):
54
167
raise errors.InvalidRevisionId(bzr_rev_id, cls)
55
return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
168
return bzr_rev_id[len(cls.revid_prefix) + 1:], cls()
57
170
def generate_file_id(self, path):
171
# Git paths are just bytestrings
172
# We must just hope they are valid UTF-8..
173
if isinstance(path, str):
174
path = path.encode("utf-8")
60
return escape_file_id(path.encode('utf-8'))
62
def import_commit(self, commit):
177
return FILE_ID_PREFIX + escape_file_id(path)
179
def parse_file_id(self, file_id):
180
if file_id == ROOT_ID:
182
if not file_id.startswith(FILE_ID_PREFIX):
184
return unescape_file_id(file_id[len(FILE_ID_PREFIX):]).decode('utf-8')
186
def revid_as_refname(self, revid):
187
if not isinstance(revid, bytes):
188
raise TypeError(revid)
189
revid = revid.decode('utf-8')
190
quoted_revid = urlutils.quote(revid)
191
return b"refs/bzr/" + quoted_revid.encode('utf-8')
193
def import_unusual_file_modes(self, rev, unusual_file_modes):
194
if unusual_file_modes:
195
ret = [(path, unusual_file_modes[path])
196
for path in sorted(unusual_file_modes.keys())]
197
rev.properties[u'file-modes'] = bencode.bencode(ret)
199
def export_unusual_file_modes(self, rev):
201
file_modes = rev.properties[u'file-modes']
205
return dict(bencode.bdecode(file_modes.encode("utf-8")))
207
def _generate_git_svn_metadata(self, rev, encoding):
209
git_svn_id = rev.properties[u"git-svn-id"]
213
return "\ngit-svn-id: %s\n" % git_svn_id.encode(encoding)
215
def _generate_hg_message_tail(self, rev):
219
for name in rev.properties:
220
if name == u'hg:extra:branch':
221
branch = rev.properties[u'hg:extra:branch']
222
elif name.startswith(u'hg:extra'):
223
extra[name[len(u'hg:extra:'):]] = base64.b64decode(
224
rev.properties[name])
225
elif name == u'hg:renames':
226
renames = bencode.bdecode(base64.b64decode(
227
rev.properties[u'hg:renames']))
228
# TODO: Export other properties as 'bzr:' extras?
229
ret = format_hg_metadata(renames, branch, extra)
230
if not isinstance(ret, bytes):
234
def _extract_git_svn_metadata(self, rev, message):
235
lines = message.split("\n")
236
if not (lines[-1] == "" and len(lines) >= 2 and
237
lines[-2].startswith("git-svn-id:")):
239
git_svn_id = lines[-2].split(": ", 1)[1]
240
rev.properties[u'git-svn-id'] = git_svn_id
241
(url, rev, uuid) = parse_git_svn_id(git_svn_id)
242
# FIXME: Convert this to converted-from property somehow..
243
return "\n".join(lines[:-2])
245
def _extract_hg_metadata(self, rev, message):
246
(message, renames, branch, extra) = extract_hg_metadata(message)
247
if branch is not None:
248
rev.properties[u'hg:extra:branch'] = branch
249
for name, value in extra.items():
250
rev.properties[u'hg:extra:' + name] = base64.b64encode(value)
252
rev.properties[u'hg:renames'] = base64.b64encode(bencode.bencode(
253
[(new, old) for (old, new) in renames.items()]))
256
def _extract_bzr_metadata(self, rev, message):
257
(message, metadata) = extract_bzr_metadata(message)
258
return message, metadata
260
def _decode_commit_message(self, rev, message, encoding):
261
return message.decode(encoding), CommitSupplement()
263
def _encode_commit_message(self, rev, message, encoding):
264
return message.encode(encoding)
266
def export_commit(self, rev, tree_sha, parent_lookup, lossy,
268
"""Turn a Bazaar revision in to a Git commit
270
:param tree_sha: Tree sha for the commit
271
:param parent_lookup: Function for looking up the GIT sha equiv of a
273
:param lossy: Whether to store roundtripping information.
274
:param verifiers: Verifiers info
275
:return dulwich.objects.Commit represent the revision:
277
from dulwich.objects import Commit, Tag
279
commit.tree = tree_sha
281
metadata = CommitSupplement()
282
metadata.verifiers = verifiers
286
for p in rev.parent_ids:
288
git_p = parent_lookup(p)
291
if metadata is not None:
292
metadata.explicit_parent_ids = rev.parent_ids
293
if git_p is not None:
295
raise AssertionError("unexpected length for %r" % git_p)
296
parents.append(git_p)
297
commit.parents = parents
299
encoding = rev.properties[u'git-explicit-encoding']
301
encoding = rev.properties.get(u'git-implicit-encoding', 'utf-8')
303
commit.encoding = rev.properties[u'git-explicit-encoding'].encode(
307
commit.committer = fix_person_identifier(rev.committer.encode(
309
commit.author = fix_person_identifier(
310
rev.get_apparent_authors()[0].encode(encoding))
311
# TODO(jelmer): Don't use this hack.
312
long = getattr(__builtins__, 'long', int)
313
commit.commit_time = long(rev.timestamp)
314
if u'author-timestamp' in rev.properties:
315
commit.author_time = long(rev.properties[u'author-timestamp'])
317
commit.author_time = commit.commit_time
318
commit._commit_timezone_neg_utc = (
319
u"commit-timezone-neg-utc" in rev.properties)
320
commit.commit_timezone = rev.timezone
321
commit._author_timezone_neg_utc = (
322
u"author-timezone-neg-utc" in rev.properties)
323
if u'author-timezone' in rev.properties:
324
commit.author_timezone = int(rev.properties[u'author-timezone'])
326
commit.author_timezone = commit.commit_timezone
327
if u'git-gpg-signature' in rev.properties:
328
commit.gpgsig = rev.properties[u'git-gpg-signature'].encode(
330
commit.message = self._encode_commit_message(rev, rev.message,
332
if not isinstance(commit.message, bytes):
333
raise TypeError(commit.message)
334
if metadata is not None:
336
mapping_registry.parse_revision_id(rev.revision_id)
337
except errors.InvalidRevisionId:
338
metadata.revision_id = rev.revision_id
339
mapping_properties = set(
340
[u'author', u'author-timezone', u'author-timezone-neg-utc',
341
u'commit-timezone-neg-utc', u'git-implicit-encoding',
342
u'git-gpg-signature', u'git-explicit-encoding',
343
u'author-timestamp', u'file-modes'])
344
for k, v in rev.properties.items():
345
if k not in mapping_properties:
346
metadata.properties[k] = v
347
if not lossy and metadata:
348
if self.roundtripping:
349
commit.message = inject_bzr_metadata(commit.message, metadata,
353
None, None, self, revision_id=rev.revision_id)
354
if not isinstance(commit.message, bytes):
355
raise TypeError(commit.message)
357
propname = u'git-mergetag-0'
358
while propname in rev.properties:
359
commit.mergetag.append(Tag.from_string(rev.properties[propname]))
361
propname = u'git-mergetag-%d' % i
362
if u'git-extra' in rev.properties:
365
for l in rev.properties[u'git-extra'].splitlines()])
368
def get_revision_id(self, commit):
370
encoding = commit.encoding.decode('ascii')
374
message, metadata = self._decode_commit_message(
375
None, commit.message, encoding)
376
except UnicodeDecodeError:
379
if metadata.revision_id:
380
return metadata.revision_id
381
return self.revision_id_foreign_to_bzr(commit.id)
383
def import_commit(self, commit, lookup_parent_revid, strict=True):
63
384
"""Convert a git commit to a bzr revision.
65
:return: a `bzrlib.revision.Revision` object.
386
:return: a `breezy.revision.Revision` object, foreign revid and a
67
389
if commit is None:
68
390
raise AssertionError("Commit object can't be None")
69
rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
70
rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
71
rev.message = commit.message.decode("utf-8", "replace")
72
rev.committer = str(commit.committer).decode("utf-8", "replace")
73
if commit.committer != commit.author:
74
rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
391
rev = ForeignRevision(commit.id, self,
392
self.revision_id_foreign_to_bzr(commit.id))
393
rev.git_metadata = None
395
def decode_using_encoding(rev, commit, encoding):
396
rev.committer = commit.committer.decode(encoding)
397
if commit.committer != commit.author:
398
rev.properties[u'author'] = commit.author.decode(encoding)
399
rev.message, rev.git_metadata = self._decode_commit_message(
400
rev, commit.message, encoding)
401
if commit.encoding is not None:
402
rev.properties[u'git-explicit-encoding'] = commit.encoding.decode(
404
decode_using_encoding(rev, commit, commit.encoding.decode('ascii'))
406
for encoding in ('utf-8', 'latin1'):
408
decode_using_encoding(rev, commit, encoding)
409
except UnicodeDecodeError:
412
if encoding != 'utf-8':
413
rev.properties[u'git-implicit-encoding'] = encoding
415
if commit.commit_time != commit.author_time:
416
rev.properties[u'author-timestamp'] = str(commit.author_time)
417
if commit.commit_timezone != commit.author_timezone:
418
rev.properties[u'author-timezone'] = "%d" % commit.author_timezone
419
if commit._author_timezone_neg_utc:
420
rev.properties[u'author-timezone-neg-utc'] = ""
421
if commit._commit_timezone_neg_utc:
422
rev.properties[u'commit-timezone-neg-utc'] = ""
424
rev.properties[u'git-gpg-signature'] = commit.gpgsig.decode(
427
for i, tag in enumerate(commit.mergetag):
428
rev.properties[u'git-mergetag-%d' % i] = tag.as_raw_string()
75
429
rev.timestamp = commit.commit_time
430
rev.timezone = commit.commit_timezone
431
rev.parent_ids = None
432
if rev.git_metadata is not None:
433
md = rev.git_metadata
434
roundtrip_revid = md.revision_id
435
if md.explicit_parent_ids:
436
rev.parent_ids = md.explicit_parent_ids
437
rev.properties.update(md.properties)
438
verifiers = md.verifiers
440
roundtrip_revid = None
442
if rev.parent_ids is None:
444
for p in commit.parents:
446
parents.append(lookup_parent_revid(p))
448
parents.append(self.revision_id_foreign_to_bzr(p))
449
rev.parent_ids = list(parents)
450
unknown_extra_fields = []
452
for k, v in commit.extra:
453
if k == HG_RENAME_SOURCE:
454
extra_lines.append(k + b' ' + v + b'\n')
456
hgk, hgv = v.split(b':', 1)
457
if hgk not in (HG_EXTRA_AMEND_SOURCE, ) and strict:
458
raise UnknownMercurialCommitExtra(commit, [hgk])
459
extra_lines.append(k + b' ' + v + b'\n')
461
unknown_extra_fields.append(k)
462
if unknown_extra_fields and strict:
463
raise UnknownCommitExtra(
465
[f.decode('ascii', 'replace') for f in unknown_extra_fields])
467
rev.properties[u'git-extra'] = b''.join(extra_lines)
468
return rev, roundtrip_revid, verifiers
80
471
class BzrGitMappingv1(BzrGitMapping):
81
revid_prefix = 'git-v1'
472
revid_prefix = b'git-v1'
82
473
experimental = False
476
return self.revid_prefix
85
479
class BzrGitMappingExperimental(BzrGitMappingv1):
86
revid_prefix = 'git-experimental'
480
revid_prefix = b'git-experimental'
87
481
experimental = True
482
roundtripping = False
484
BZR_DUMMY_FILE = '.bzrdummy'
486
def _decode_commit_message(self, rev, message, encoding):
489
message = self._extract_hg_metadata(rev, message)
490
message = self._extract_git_svn_metadata(rev, message)
491
message, metadata = self._extract_bzr_metadata(rev, message)
492
return message.decode(encoding), metadata
494
def _encode_commit_message(self, rev, message, encoding):
495
ret = message.encode(encoding)
496
ret += self._generate_hg_message_tail(rev)
497
ret += self._generate_git_svn_metadata(rev, encoding)
500
def import_commit(self, commit, lookup_parent_revid, strict=True):
501
rev, roundtrip_revid, verifiers = super(
502
BzrGitMappingExperimental, self).import_commit(
503
commit, lookup_parent_revid, strict)
504
rev.properties[u'converted_revision'] = "git %s\n" % commit.id
505
return rev, roundtrip_revid, verifiers
90
508
class GitMappingRegistry(VcsMappingRegistry):
509
"""Registry with available git mappings."""
92
511
def revision_id_bzr_to_foreign(self, bzr_revid):
93
if not bzr_revid.startswith("git-"):
512
if bzr_revid == NULL_REVISION:
513
from dulwich.protocol import ZERO_SHA
514
return ZERO_SHA, None
515
if not bzr_revid.startswith(b"git-"):
94
516
raise errors.InvalidRevisionId(bzr_revid, None)
95
(mapping_version, git_sha) = bzr_revid.split(":", 1)
517
(mapping_version, git_sha) = bzr_revid.split(b":", 1)
96
518
mapping = self.get(mapping_version)
97
519
return mapping.revision_id_bzr_to_foreign(bzr_revid)
102
524
mapping_registry = GitMappingRegistry()
103
mapping_registry.register_lazy('git-v1', "bzrlib.plugins.git.mapping",
105
mapping_registry.register_lazy('git-experimental', "bzrlib.plugins.git.mapping",
106
"BzrGitMappingExperimental")
525
mapping_registry.register_lazy(b'git-v1', __name__,
527
mapping_registry.register_lazy(b'git-experimental',
528
__name__, "BzrGitMappingExperimental")
529
# Uncomment the next line to enable the experimental bzr-git mappings.
530
# This will make sure all bzr metadata is pushed into git, allowing for
531
# full roundtripping later.
532
# NOTE: THIS IS EXPERIMENTAL. IT MAY EAT YOUR DATA OR CORRUPT
533
# YOUR BZR OR GIT REPOSITORIES. USE WITH CARE.
534
# mapping_registry.set_default('git-experimental')
535
mapping_registry.set_default(b'git-v1')
109
538
class ForeignGit(ForeignVcs):
539
"""The Git Stupid Content Tracker"""
542
def branch_format(self):
543
from .branch import LocalGitBranchFormat
544
return LocalGitBranchFormat()
547
def repository_format(self):
548
from .repository import GitRepositoryFormat
549
return GitRepositoryFormat()
112
551
def __init__(self):
113
552
super(ForeignGit, self).__init__(mapping_registry)
553
self.abbreviation = "git"
556
def serialize_foreign_revid(self, foreign_revid):
116
560
def show_foreign_revid(cls, foreign_revid):
117
return { "git commit": foreign_revid }
120
foreign_git = ForeignGit()
121
default_mapping = BzrGitMappingv1()
124
def inventory_to_tree_and_blobs(repo, mapping, revision_id):
125
from dulwich.objects import Tree, Blob
126
from bzrlib.inventory import InventoryDirectory, InventoryFile
132
inv = repo.get_inventory(revision_id)
134
# stack contains the set of trees that we haven't
135
# finished constructing
137
for path, entry in inv.iter_entries():
138
while stack and not path.startswith(cur):
140
sha = tree.sha().hexdigest()
142
t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
143
cur, tree = stack.pop()
146
if type(entry) == InventoryDirectory:
147
stack.append((cur, tree))
151
if type(entry) == InventoryFile:
152
#FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff
153
# and having all these objects in memory at once
155
_, blob._text = repo.iter_files_bytes([(entry.file_id, entry.revision, path)]).next()
156
sha = blob.sha().hexdigest()
157
yield sha, blob, path
159
name = urlutils.basename(path).encode("utf-8")
160
mode = stat.S_IFREG | 0644
163
tree.add(mode, name, sha)
165
while len(stack) > 1:
167
sha = tree.sha().hexdigest()
169
t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
170
cur, tree = stack.pop()
174
yield tree.sha().hexdigest(), tree, cur
177
def revision_to_commit(rev, tree_sha, parent_lookup):
178
"""Turn a Bazaar revision in to a Git commit
180
:param tree_sha: Tree sha for the commit
181
:param parent_lookup: Function for looking up the GIT sha equiv of a bzr revision
182
:return dulwich.objects.Commit represent the revision:
184
from dulwich.objects import Commit
186
commit._tree = tree_sha
187
for p in rev.parent_ids:
188
git_p = parent_lookup(p)
189
if git_p is not None:
190
commit._parents.append(git_p)
191
commit._message = rev.message.encode("utf-8")
192
commit._committer = rev.committer.encode("utf-8")
193
commit._author = rev.get_apparent_author().encode("utf-8")
194
commit._commit_time = long(rev.timestamp)
561
return {"git commit": foreign_revid.decode('utf-8')}
564
foreign_vcs_git = ForeignGit()
565
default_mapping = mapping_registry.get_default()()
568
def symlink_to_blob(symlink_target):
569
from dulwich.objects import Blob
571
if isinstance(symlink_target, str):
572
symlink_target = symlink_target.encode('utf-8')
573
blob.data = symlink_target
577
def mode_is_executable(mode):
578
"""Check if mode should be considered executable."""
579
return bool(mode & 0o111)
583
"""Determine the Bazaar inventory kind based on Unix file mode."""
586
entry_kind = (mode & 0o700000) / 0o100000
589
elif entry_kind == 1:
590
file_kind = (mode & 0o70000) / 0o10000
596
return 'tree-reference'
598
raise AssertionError(
599
"Unknown file kind %d, perms=%o." % (file_kind, mode,))
601
raise AssertionError(
602
"Unknown kind, perms=%r." % (mode,))
605
def object_mode(kind, executable):
606
if kind == 'directory':
608
elif kind == 'symlink':
614
mode = stat.S_IFREG | 0o644
618
elif kind == 'tree-reference':
619
from dulwich.objects import S_IFGITLINK
625
def entry_mode(entry):
626
"""Determine the git file mode for an inventory entry."""
627
return object_mode(entry.kind, getattr(entry, 'executable', False))
630
def extract_unusual_modes(rev):
632
foreign_revid, mapping = mapping_registry.parse_revision_id(
634
except errors.InvalidRevisionId:
637
return mapping.export_unusual_file_modes(rev)
640
def parse_git_svn_id(text):
641
(head, uuid) = text.rsplit(" ", 1)
642
(full_url, rev) = head.rsplit("@", 1)
643
return (full_url, int(rev), uuid)
646
def needs_roundtripping(repo, revid):
648
mapping_registry.parse_revision_id(revid)
649
except errors.InvalidRevisionId: