1
# Copyright (C) 2007-2008 Canonical Ltd
1
# Copyright (C) 2007 Canonical Ltd
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
3
# Copyright (C) 2008 John Carr
3
5
# This program is free software; you can redistribute it and/or modify
4
6
# it under the terms of the GNU General Public License as published by
17
19
"""Converters, etc for going between Bazaar and Git ids."""
19
from bzrlib import errors, foreign, urlutils
20
from bzrlib.inventory import ROOT_ID
27
from bzrlib.inventory import (
21
30
from bzrlib.foreign import (
35
from bzrlib.xml_serializer import (
39
DEFAULT_TREE_MODE = 0040000
40
DEFAULT_FILE_MODE = 0100644
41
DEFAULT_SYMLINK_MODE = 0120000
27
44
def escape_file_id(file_id):
28
45
return file_id.replace('_', '__').replace(' ', '_s')
55
72
return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
57
74
def generate_file_id(self, path):
75
# Git paths are just bytestrings
76
# We must just hope they are valid UTF-8..
77
assert isinstance(path, str)
60
return escape_file_id(path.encode('utf-8'))
80
return escape_file_id(path)
82
def parse_file_id(self, file_id):
83
if file_id == ROOT_ID:
85
return unescape_file_id(file_id)
62
87
def import_commit(self, commit):
63
88
"""Convert a git commit to a bzr revision.
68
93
raise AssertionError("Commit object can't be None")
69
94
rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
70
95
rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
71
rev.message = commit.message.decode("utf-8", "replace")
72
rev.committer = str(commit.committer).decode("utf-8", "replace")
96
rev.message = escape_invalid_chars(commit.message.decode("utf-8", "replace"))[0]
97
rev.committer = escape_invalid_chars(str(commit.committer).decode("utf-8", "replace"))[0]
73
98
if commit.committer != commit.author:
74
rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
99
rev.properties['author'] = escape_invalid_chars(str(commit.author).decode("utf-8", "replace"))[0]
101
if commit.commit_time != commit.author_time:
102
rev.properties['author-timestamp'] = str(commit.author_time)
75
103
rev.timestamp = commit.commit_time
121
149
default_mapping = BzrGitMappingv1()
124
def inventory_to_tree_and_blobs(repo, mapping, revision_id):
125
from dulwich.objects import Tree, Blob
152
def text_to_blob(text):
153
from dulwich.objects import Blob
159
def symlink_to_blob(entry):
160
from dulwich.objects import Blob
162
blob._text = entry.symlink_target
166
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
167
"""Convert a Bazaar tree to a Git tree.
169
:return: Yields tuples with object sha1, object and path
171
from dulwich.objects import Tree
126
172
from bzrlib.inventory import InventoryDirectory, InventoryFile
132
inv = repo.get_inventory(revision_id)
134
179
# stack contains the set of trees that we haven't
135
180
# finished constructing
137
for path, entry in inv.iter_entries():
138
while stack and not path.startswith(cur):
181
for path, entry in inventory.iter_entries():
182
while stack and not path.startswith(osutils.pathjoin(cur, "")):
183
# We've hit a file that's not a child of the previous path
140
sha = tree.sha().hexdigest()
141
186
yield sha, tree, cur
142
187
t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
143
188
cur, tree = stack.pop()
146
if type(entry) == InventoryDirectory:
191
if entry.kind == "directory":
147
192
stack.append((cur, tree))
151
if type(entry) == InventoryFile:
195
elif entry.kind == "file":
152
196
#FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff
153
197
# and having all these objects in memory at once
155
_, blob._text = repo.iter_files_bytes([(entry.file_id, entry.revision, path)]).next()
156
sha = blob.sha().hexdigest()
198
text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
199
blob = text_to_blob(text)
157
201
yield sha, blob, path
159
203
name = urlutils.basename(path).encode("utf-8")
161
205
if entry.executable:
163
207
tree.add(mode, name, sha)
208
elif entry.kind == "symlink":
209
blob = symlink_to_blob(entry)
211
yield sha, blob, path
212
name = urlutils.basename(path).encode("utf-8")
213
tree.add(stat.S_IFLNK, name, sha)
215
raise AssertionError("Unknown kind %s" % entry.kind)
165
217
while len(stack) > 1:
167
sha = tree.sha().hexdigest()
168
220
yield sha, tree, cur
169
221
t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
170
222
cur, tree = stack.pop()
174
yield tree.sha().hexdigest(), tree, cur
226
yield tree.id, tree, cur
177
229
def revision_to_commit(rev, tree_sha, parent_lookup):
187
239
for p in rev.parent_ids:
188
240
git_p = parent_lookup(p)
189
241
if git_p is not None:
242
assert len(git_p) == 40, "unexpected length for %r" % git_p
190
243
commit._parents.append(git_p)
191
244
commit._message = rev.message.encode("utf-8")
192
245
commit._committer = rev.committer.encode("utf-8")
193
commit._author = rev.get_apparent_author().encode("utf-8")
246
commit._author = rev.get_apparent_authors()[0].encode("utf-8")
194
247
commit._commit_time = long(rev.timestamp)
248
if 'author-timestamp' in rev.properties:
249
commit._author_time = long(rev.properties['author-timestamp'])
251
commit._author_time = commit._commit_time
195
252
commit.serialize()