1
# Copyright (C) 2007-2008 Canonical Ltd
1
# Copyright (C) 2007 Canonical Ltd
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
3
# Copyright (C) 2008 John Carr
3
5
# This program is free software; you can redistribute it and/or modify
4
6
# it under the terms of the GNU General Public License as published by
17
19
"""Converters, etc for going between Bazaar and Git ids."""
19
from bzrlib import errors, foreign, urlutils
20
from bzrlib.inventory import ROOT_ID
26
from bzrlib.inventory import (
21
29
from bzrlib.foreign import (
34
from bzrlib.xml_serializer import (
38
DEFAULT_TREE_MODE = 0040000
39
DEFAULT_FILE_MODE = 0100644
40
DEFAULT_SYMLINK_MODE = 0120000
27
43
def escape_file_id(file_id):
28
44
return file_id.replace('_', '__').replace(' ', '_s')
55
71
return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
57
73
def generate_file_id(self, path):
74
# Git paths are just bytestrings
75
# We must just hope they are valid UTF-8..
76
assert isinstance(path, str)
60
return escape_file_id(path.encode('utf-8'))
79
return escape_file_id(path)
81
def parse_file_id(self, file_id):
82
if file_id == ROOT_ID:
84
return unescape_file_id(file_id)
62
86
def import_commit(self, commit):
63
87
"""Convert a git commit to a bzr revision.
68
92
raise AssertionError("Commit object can't be None")
69
93
rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
70
94
rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
71
rev.message = commit.message.decode("utf-8", "replace")
72
rev.committer = str(commit.committer).decode("utf-8", "replace")
95
rev.message = escape_invalid_chars(commit.message.decode("utf-8", "replace"))[0]
96
rev.committer = escape_invalid_chars(str(commit.committer).decode("utf-8", "replace"))[0]
73
97
if commit.committer != commit.author:
74
rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
98
rev.properties['author'] = escape_invalid_chars(str(commit.author).decode("utf-8", "replace"))[0]
100
if commit.commit_time != commit.author_time:
101
rev.properties['author-timestamp'] = str(commit.author_time)
75
102
rev.timestamp = commit.commit_time
121
148
default_mapping = BzrGitMappingv1()
124
def inventory_to_tree_and_blobs(repo, mapping, revision_id):
125
from dulwich.objects import Tree, Blob
151
def text_to_blob(text):
152
from dulwich.objects import Blob
158
def symlink_to_blob(entry):
159
from dulwich.objects import Blob
161
blob._text = entry.symlink_target
165
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
166
"""Convert a Bazaar tree to a Git tree.
168
:return: Yields tuples with object sha1, object and path
170
from dulwich.objects import Tree
126
171
from bzrlib.inventory import InventoryDirectory, InventoryFile
132
inv = repo.get_inventory(revision_id)
134
178
# stack contains the set of trees that we haven't
135
179
# finished constructing
137
for path, entry in inv.iter_entries():
180
for path, entry in inventory.iter_entries():
138
181
while stack and not path.startswith(cur):
140
sha = tree.sha().hexdigest()
141
184
yield sha, tree, cur
142
185
t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
143
186
cur, tree = stack.pop()
146
if type(entry) == InventoryDirectory:
189
if entry.kind == "directory":
147
190
stack.append((cur, tree))
151
if type(entry) == InventoryFile:
193
elif entry.kind == "file":
152
194
#FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff
153
195
# and having all these objects in memory at once
155
_, blob._text = repo.iter_files_bytes([(entry.file_id, entry.revision, path)]).next()
156
sha = blob.sha().hexdigest()
196
text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
197
blob = text_to_blob(text)
157
199
yield sha, blob, path
159
201
name = urlutils.basename(path).encode("utf-8")
161
203
if entry.executable:
163
205
tree.add(mode, name, sha)
206
elif entry.kind == "symlink":
207
blob = symlink_to_blob(entry)
209
yield sha, blob, path
210
name = urlutils.basename(path).encode("utf-8")
211
tree.add(stat.S_IFLNK, name, sha)
213
raise AssertionError("Unknown kind %s" % entry.kind)
165
215
while len(stack) > 1:
167
sha = tree.sha().hexdigest()
168
218
yield sha, tree, cur
169
219
t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
170
220
cur, tree = stack.pop()
174
yield tree.sha().hexdigest(), tree, cur
224
yield tree.id, tree, cur
177
227
def revision_to_commit(rev, tree_sha, parent_lookup):
187
237
for p in rev.parent_ids:
188
238
git_p = parent_lookup(p)
189
239
if git_p is not None:
240
assert len(git_p) == 40, "unexpected length for %r" % git_p
190
241
commit._parents.append(git_p)
191
242
commit._message = rev.message.encode("utf-8")
192
243
commit._committer = rev.committer.encode("utf-8")
193
commit._author = rev.get_apparent_author().encode("utf-8")
244
commit._author = rev.get_apparent_authors()[0].encode("utf-8")
194
245
commit._commit_time = long(rev.timestamp)
246
if 'author-timestamp' in rev.properties:
247
commit._author_time = long(rev.properties['author-timestamp'])
249
commit._author_time = commit._commit_time
195
250
commit.serialize()