125
74
def generate_file_id(self, path):
126
75
# Git paths are just bytestrings
127
76
# We must just hope they are valid UTF-8..
77
assert isinstance(path, str)
130
if type(path) is unicode:
131
path = path.encode("utf-8")
132
80
return escape_file_id(path)
134
def is_control_file(self, path):
135
return path in (self.BZR_FILE_IDS_FILE, self.BZR_DUMMY_FILE)
137
82
def parse_file_id(self, file_id):
138
83
if file_id == ROOT_ID:
140
85
return unescape_file_id(file_id)
142
def revid_as_refname(self, revid):
144
return "refs/bzr/%s" % urllib.quote(revid)
146
def import_unusual_file_modes(self, rev, unusual_file_modes):
147
if unusual_file_modes:
148
ret = [(path, unusual_file_modes[path])
149
for path in sorted(unusual_file_modes.keys())]
150
rev.properties['file-modes'] = bencode.bencode(ret)
152
def export_unusual_file_modes(self, rev):
154
file_modes = rev.properties['file-modes']
158
return dict([(self.generate_file_id(path), mode) for (path, mode) in bencode.bdecode(file_modes.encode("utf-8"))])
160
def _generate_git_svn_metadata(self, rev, encoding):
162
git_svn_id = rev.properties["git-svn-id"]
166
return "\ngit-svn-id: %s\n" % git_svn_id.encode(encoding)
168
def _generate_hg_message_tail(self, rev):
172
for name in rev.properties:
173
if name == 'hg:extra:branch':
174
branch = rev.properties['hg:extra:branch']
175
elif name.startswith('hg:extra'):
176
extra[name[len('hg:extra:'):]] = base64.b64decode(
177
rev.properties[name])
178
elif name == 'hg:renames':
179
renames = bencode.bdecode(base64.b64decode(
180
rev.properties['hg:renames']))
181
# TODO: Export other properties as 'bzr:' extras?
182
ret = format_hg_metadata(renames, branch, extra)
183
assert isinstance(ret, str)
186
def _extract_git_svn_metadata(self, rev, message):
187
lines = message.split("\n")
188
if not (lines[-1] == "" and len(lines) >= 2 and lines[-2].startswith("git-svn-id:")):
190
git_svn_id = lines[-2].split(": ", 1)[1]
191
rev.properties['git-svn-id'] = git_svn_id
192
(url, rev, uuid) = parse_git_svn_id(git_svn_id)
193
# FIXME: Convert this to converted-from property somehow..
194
ret = "\n".join(lines[:-2])
195
assert isinstance(ret, str)
198
def _extract_hg_metadata(self, rev, message):
199
(message, renames, branch, extra) = extract_hg_metadata(message)
200
if branch is not None:
201
rev.properties['hg:extra:branch'] = branch
202
for name, value in extra.iteritems():
203
rev.properties['hg:extra:' + name] = base64.b64encode(value)
205
rev.properties['hg:renames'] = base64.b64encode(bencode.bencode(
206
[(new, old) for (old, new) in renames.iteritems()]))
209
def _extract_bzr_metadata(self, rev, message):
210
(message, metadata) = extract_bzr_metadata(message)
211
return message, metadata
213
def _decode_commit_message(self, rev, message, encoding):
214
return message.decode(encoding), BzrGitRevisionMetadata()
216
def _encode_commit_message(self, rev, message, encoding):
217
return message.encode(encoding)
219
def export_fileid_map(self, fileid_map):
220
"""Export a file id map to a fileid map.
222
:param fileid_map: File id map, mapping paths to file ids
223
:return: A Git blob object
225
from dulwich.objects import Blob
227
b.set_raw_chunks(serialize_fileid_map(fileid_map))
230
def export_commit(self, rev, tree_sha, parent_lookup, roundtrip,
232
"""Turn a Bazaar revision in to a Git commit
234
:param tree_sha: Tree sha for the commit
235
:param parent_lookup: Function for looking up the GIT sha equiv of a
237
:param roundtrip: Whether to store roundtripping information.
238
:param verifiers: Verifiers info
239
:return dulwich.objects.Commit represent the revision:
241
from dulwich.objects import Commit
243
commit.tree = tree_sha
245
metadata = BzrGitRevisionMetadata()
246
metadata.verifiers = verifiers
250
for p in rev.parent_ids:
252
git_p = parent_lookup(p)
255
if metadata is not None:
256
metadata.explicit_parent_ids = rev.parent_ids
257
if git_p is not None:
258
assert len(git_p) == 40, "unexpected length for %r" % git_p
259
parents.append(git_p)
260
commit.parents = parents
262
encoding = rev.properties['git-explicit-encoding']
264
encoding = rev.properties.get('git-implicit-encoding', 'utf-8')
265
commit.encoding = rev.properties.get('git-explicit-encoding')
266
commit.committer = fix_person_identifier(rev.committer.encode(
268
commit.author = fix_person_identifier(
269
rev.get_apparent_authors()[0].encode(encoding))
270
commit.commit_time = long(rev.timestamp)
271
if 'author-timestamp' in rev.properties:
272
commit.author_time = long(rev.properties['author-timestamp'])
274
commit.author_time = commit.commit_time
275
commit._commit_timezone_neg_utc = "commit-timezone-neg-utc" in rev.properties
276
commit.commit_timezone = rev.timezone
277
commit._author_timezone_neg_utc = "author-timezone-neg-utc" in rev.properties
278
if 'author-timezone' in rev.properties:
279
commit.author_timezone = int(rev.properties['author-timezone'])
281
commit.author_timezone = commit.commit_timezone
282
commit.message = self._encode_commit_message(rev, rev.message,
284
assert type(commit.message) == str
285
if metadata is not None:
287
mapping_registry.parse_revision_id(rev.revision_id)
288
except errors.InvalidRevisionId:
289
metadata.revision_id = rev.revision_id
290
mapping_properties = set(
291
['author', 'author-timezone', 'author-timezone-neg-utc',
292
'commit-timezone-neg-utc', 'git-implicit-encoding',
293
'git-explicit-encoding', 'author-timestamp', 'file-modes'])
294
for k, v in rev.properties.iteritems():
295
if not k in mapping_properties:
296
metadata.properties[k] = v
297
if self.roundtripping:
298
commit.message = inject_bzr_metadata(commit.message, metadata,
300
assert type(commit.message) == str
303
def import_fileid_map(self, blob):
304
"""Convert a git file id map blob.
306
:param blob: Git blob object with fileid map
307
:return: Dictionary mapping paths to file ids
309
return deserialize_fileid_map(blob.data)
311
def import_commit(self, commit, lookup_parent_revid):
87
def import_commit(self, commit):
312
88
"""Convert a git commit to a bzr revision.
314
:return: a `bzrlib.revision.Revision` object, foreign revid and a
90
:return: a `bzrlib.revision.Revision` object.
317
92
if commit is None:
318
93
raise AssertionError("Commit object can't be None")
319
rev = ForeignRevision(commit.id, self,
320
self.revision_id_foreign_to_bzr(commit.id))
321
rev.parent_ids = tuple([lookup_parent_revid(p) for p in commit.parents])
322
rev.git_metadata = None
323
def decode_using_encoding(rev, commit, encoding):
324
rev.committer = str(commit.committer).decode(encoding)
325
if commit.committer != commit.author:
326
rev.properties['author'] = str(commit.author).decode(encoding)
327
rev.message, rev.git_metadata = self._decode_commit_message(
328
rev, commit.message, encoding)
329
if commit.encoding is not None:
330
rev.properties['git-explicit-encoding'] = commit.encoding
331
decode_using_encoding(rev, commit, commit.encoding)
333
for encoding in ('utf-8', 'latin1'):
335
decode_using_encoding(rev, commit, encoding)
336
except UnicodeDecodeError:
339
if encoding != 'utf-8':
340
rev.properties['git-implicit-encoding'] = encoding
94
rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
95
rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
96
rev.message = escape_invalid_chars(commit.message.decode("utf-8", "replace"))[0]
97
rev.committer = escape_invalid_chars(str(commit.committer).decode("utf-8", "replace"))[0]
98
if commit.committer != commit.author:
99
rev.properties['author'] = escape_invalid_chars(str(commit.author).decode("utf-8", "replace"))[0]
342
101
if commit.commit_time != commit.author_time:
343
102
rev.properties['author-timestamp'] = str(commit.author_time)
344
if commit.commit_timezone != commit.author_timezone:
345
rev.properties['author-timezone'] = "%d" % commit.author_timezone
346
if commit._author_timezone_neg_utc:
347
rev.properties['author-timezone-neg-utc'] = ""
348
if commit._commit_timezone_neg_utc:
349
rev.properties['commit-timezone-neg-utc'] = ""
350
103
rev.timestamp = commit.commit_time
351
rev.timezone = commit.commit_timezone
352
if rev.git_metadata is not None:
353
md = rev.git_metadata
354
roundtrip_revid = md.revision_id
355
if md.explicit_parent_ids:
356
rev.parent_ids = md.explicit_parent_ids
357
rev.properties.update(md.properties)
358
verifiers = md.verifiers
360
roundtrip_revid = None
362
return rev, roundtrip_revid, verifiers
364
def get_fileid_map(self, lookup_object, tree_sha):
365
"""Obtain a fileid map for a particular tree.
367
:param lookup_object: Function for looking up an object
368
:param tree_sha: SHA of the root tree
369
:return: GitFileIdMap instance
372
file_id_map_sha = lookup_object(tree_sha)[self.BZR_FILE_IDS_FILE][1]
376
file_ids = self.import_fileid_map(lookup_object(file_id_map_sha))
377
return GitFileIdMap(file_ids, self)
380
108
class BzrGitMappingv1(BzrGitMapping):
381
109
revid_prefix = 'git-v1'
382
110
experimental = False
385
return self.revid_prefix
388
113
class BzrGitMappingExperimental(BzrGitMappingv1):
389
114
revid_prefix = 'git-experimental'
390
115
experimental = True
393
BZR_FILE_IDS_FILE = '.bzrfileids'
395
BZR_DUMMY_FILE = '.bzrdummy'
397
def _decode_commit_message(self, rev, message, encoding):
398
message = self._extract_hg_metadata(rev, message)
399
message = self._extract_git_svn_metadata(rev, message)
400
message, metadata = self._extract_bzr_metadata(rev, message)
401
return message.decode(encoding), metadata
403
def _encode_commit_message(self, rev, message, encoding):
404
ret = message.encode(encoding)
405
ret += self._generate_hg_message_tail(rev)
406
ret += self._generate_git_svn_metadata(rev, encoding)
409
def import_commit(self, commit, lookup_parent_revid):
410
rev, roundtrip_revid, verifiers = super(BzrGitMappingExperimental, self).import_commit(commit, lookup_parent_revid)
411
rev.properties['converted_revision'] = "git %s\n" % commit.id
412
return rev, roundtrip_revid, verifiers
415
118
class GitMappingRegistry(VcsMappingRegistry):
416
"""Registry with available git mappings."""
418
120
def revision_id_bzr_to_foreign(self, bzr_revid):
419
if bzr_revid == NULL_REVISION:
420
from dulwich.protocol import ZERO_SHA
421
return ZERO_SHA, None
422
121
if not bzr_revid.startswith("git-"):
423
122
raise errors.InvalidRevisionId(bzr_revid, None)
424
123
(mapping_version, git_sha) = bzr_revid.split(":", 1)
465
148
foreign_git = ForeignGit()
466
default_mapping = mapping_registry.get_default()()
149
default_mapping = BzrGitMappingv1()
152
def text_to_blob(text):
153
from dulwich.objects import Blob
469
159
def symlink_to_blob(entry):
470
160
from dulwich.objects import Blob
472
symlink_target = entry.symlink_target
473
if type(symlink_target) == unicode:
474
symlink_target = symlink_target.encode('utf-8')
475
blob.data = symlink_target
162
blob._text = entry.symlink_target
479
def mode_is_executable(mode):
480
"""Check if mode should be considered executable."""
481
return bool(mode & 0111)
485
"""Determine the Bazaar inventory kind based on Unix file mode."""
486
entry_kind = (mode & 0700000) / 0100000
489
elif entry_kind == 1:
490
file_kind = (mode & 070000) / 010000
496
return 'tree-reference'
498
raise AssertionError(
499
"Unknown file kind %d, perms=%o." % (file_kind, mode,))
501
raise AssertionError(
502
"Unknown kind, perms=%r." % (mode,))
505
def object_mode(kind, executable):
506
if kind == 'directory':
508
elif kind == 'symlink':
514
mode = stat.S_IFREG | 0644
518
elif kind == 'tree-reference':
519
from dulwich.objects import S_IFGITLINK
525
def entry_mode(entry):
526
"""Determine the git file mode for an inventory entry."""
527
return object_mode(entry.kind, entry.executable)
530
def directory_to_tree(entry, lookup_ie_sha1, unusual_modes, empty_file_name):
531
"""Create a Git Tree object from a Bazaar directory.
533
:param entry: Inventory entry
534
:param lookup_ie_sha1: Lookup the Git SHA1 for a inventory entry
535
:param unusual_modes: Dictionary with unusual file modes by file ids
536
:param empty_file_name: Name to use for dummy files in empty directories,
537
None to ignore empty directories.
166
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
167
"""Convert a Bazaar tree to a Git tree.
169
:return: Yields tuples with object sha1, object and path
539
from dulwich.objects import Blob, Tree
171
from dulwich.objects import Tree
172
from bzrlib.inventory import InventoryDirectory, InventoryFile
541
for name, value in entry.children.iteritems():
542
ie = entry.children[name]
544
mode = unusual_modes[ie.file_id]
546
mode = entry_mode(ie)
547
hexsha = lookup_ie_sha1(ie)
548
if hexsha is not None:
549
tree.add(mode, name.encode("utf-8"), hexsha)
550
if entry.parent_id is not None and len(tree) == 0:
551
# Only the root can be an empty tree
552
if empty_file_name is not None:
553
tree.add(stat.S_IFREG | 0644, empty_file_name,
179
# stack contains the set of trees that we haven't
180
# finished constructing
181
for path, entry in inventory.iter_entries():
182
while stack and not path.startswith(osutils.pathjoin(cur, "")):
183
# We've hit a file that's not a child of the previous path
187
t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
188
cur, tree = stack.pop()
191
if entry.kind == "directory":
192
stack.append((cur, tree))
195
elif entry.kind == "file":
196
#FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff
197
# and having all these objects in memory at once
198
text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
199
blob = text_to_blob(text)
201
yield sha, blob, path
203
name = urlutils.basename(path).encode("utf-8")
204
mode = stat.S_IFREG | 0644
207
tree.add(mode, name, sha)
208
elif entry.kind == "symlink":
209
blob = symlink_to_blob(entry)
211
yield sha, blob, path
212
name = urlutils.basename(path).encode("utf-8")
213
tree.add(stat.S_IFLNK, name, sha)
560
def extract_unusual_modes(rev):
562
foreign_revid, mapping = mapping_registry.parse_revision_id(
564
except errors.InvalidRevisionId:
215
raise AssertionError("Unknown kind %s" % entry.kind)
217
while len(stack) > 1:
221
t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
222
cur, tree = stack.pop()
226
yield tree.id, tree, cur
229
def revision_to_commit(rev, tree_sha, parent_lookup):
230
"""Turn a Bazaar revision in to a Git commit
232
:param tree_sha: Tree sha for the commit
233
:param parent_lookup: Function for looking up the GIT sha equiv of a bzr revision
234
:return dulwich.objects.Commit represent the revision:
236
from dulwich.objects import Commit
238
commit._tree = tree_sha
239
for p in rev.parent_ids:
240
git_p = parent_lookup(p)
241
if git_p is not None:
242
assert len(git_p) == 40, "unexpected length for %r" % git_p
243
commit._parents.append(git_p)
244
commit._message = rev.message.encode("utf-8")
245
commit._committer = rev.committer.encode("utf-8")
246
commit._author = rev.get_apparent_authors()[0].encode("utf-8")
247
commit._commit_time = long(rev.timestamp)
248
if 'author-timestamp' in rev.properties:
249
commit._author_time = long(rev.properties['author-timestamp'])
567
return mapping.export_unusual_file_modes(rev)
570
def parse_git_svn_id(text):
571
(head, uuid) = text.rsplit(" ", 1)
572
(full_url, rev) = head.rsplit("@", 1)
573
return (full_url, int(rev), uuid)
576
class GitFileIdMap(object):
578
def __init__(self, file_ids, mapping):
579
self.file_ids = file_ids
581
self.mapping = mapping
583
def lookup_file_id(self, path):
584
assert type(path) is str
586
file_id = self.file_ids[path]
588
file_id = self.mapping.generate_file_id(path)
589
assert type(file_id) is str
592
def lookup_path(self, file_id):
593
if self.paths is None:
595
for k, v in self.file_ids.iteritems():
598
path = self.paths[file_id]
600
return self.mapping.parse_file_id(file_id)
602
assert type(path) is str
251
commit._author_time = commit._commit_time