/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to mapping.py

Reduce number of round trips when fetching from Git.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
# Copyright (C) 2007 Canonical Ltd
 
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
 
3
# Copyright (C) 2008 John Carr
2
4
#
3
5
# This program is free software; you can redistribute it and/or modify
4
6
# it under the terms of the GNU General Public License as published by
16
18
 
17
19
"""Converters, etc for going between Bazaar and Git ids."""
18
20
 
19
 
from bzrlib import errors, foreign
20
 
from bzrlib.inventory import ROOT_ID
 
21
import stat
 
22
 
 
23
from bzrlib import (
 
24
    errors,
 
25
    foreign,
 
26
    osutils,
 
27
    urlutils,
 
28
    )
 
29
from bzrlib.inventory import (
 
30
    ROOT_ID,
 
31
    )
21
32
from bzrlib.foreign import (
22
 
        ForeignRevision,
23
 
        )
 
33
    ForeignVcs, 
 
34
    VcsMappingRegistry, 
 
35
    ForeignRevision,
 
36
    )
 
37
from bzrlib.xml_serializer import (
 
38
    escape_invalid_chars,
 
39
    )
 
40
 
 
41
DEFAULT_FILE_MODE = stat.S_IFREG | 0644
24
42
 
25
43
 
26
44
def escape_file_id(file_id):
28
46
 
29
47
 
30
48
def unescape_file_id(file_id):
31
 
    return file_id.replace("_s", " ").replace("__", "_")
 
49
    ret = []
 
50
    i = 0
 
51
    while i < len(file_id):
 
52
        if file_id[i] != '_':
 
53
            ret.append(file_id[i])
 
54
        else:
 
55
            if file_id[i+1] == '_':
 
56
                ret.append("_")
 
57
            elif file_id[i+1] == 's':
 
58
                ret.append(" ")
 
59
            else:
 
60
                raise AssertionError("unknown escape character %s" % file_id[i+1])
 
61
            i += 1
 
62
        i += 1
 
63
    return "".join(ret)
 
64
 
 
65
 
 
66
def fix_person_identifier(text):
 
67
    if "<" in text and ">" in text:
 
68
        return text
 
69
    return "%s <%s>" % (text, text)
32
70
 
33
71
 
34
72
class BzrGitMapping(foreign.VcsMapping):
35
73
    """Class that maps between Git and Bazaar semantics."""
36
74
    experimental = False
37
75
 
38
 
    def revision_id_foreign_to_bzr(self, git_rev_id):
 
76
    def __init__(self):
 
77
        super(BzrGitMapping, self).__init__(foreign_git)
 
78
 
 
79
    def __eq__(self, other):
 
80
        return type(self) == type(other) and self.revid_prefix == other.revid_prefix
 
81
 
 
82
    @classmethod
 
83
    def revision_id_foreign_to_bzr(cls, git_rev_id):
39
84
        """Convert a git revision id handle to a Bazaar revision id."""
40
 
        return "%s:%s" % (self.revid_prefix, git_rev_id)
 
85
        return "%s:%s" % (cls.revid_prefix, git_rev_id)
41
86
 
42
 
    def revision_id_bzr_to_foreign(self, bzr_rev_id):
 
87
    @classmethod
 
88
    def revision_id_bzr_to_foreign(cls, bzr_rev_id):
43
89
        """Convert a Bazaar revision id to a git revision id handle."""
44
 
        if not bzr_rev_id.startswith("%s:" % self.revid_prefix):
45
 
            raise errors.InvalidRevisionId(bzr_rev_id, self)
46
 
        return bzr_rev_id[len(self.revid_prefix)+1:]
47
 
 
48
 
    def show_foreign_revid(self, foreign_revid):
49
 
        return { "git commit": foreign_revid }
 
90
        if not bzr_rev_id.startswith("%s:" % cls.revid_prefix):
 
91
            raise errors.InvalidRevisionId(bzr_rev_id, cls)
 
92
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
50
93
 
51
94
    def generate_file_id(self, path):
 
95
        # Git paths are just bytestrings
 
96
        # We must just hope they are valid UTF-8..
52
97
        if path == "":
53
98
            return ROOT_ID
54
 
        return escape_file_id(path.encode('utf-8'))
 
99
        return escape_file_id(path)
 
100
 
 
101
    def parse_file_id(self, file_id):
 
102
        if file_id == ROOT_ID:
 
103
            return ""
 
104
        return unescape_file_id(file_id)
55
105
 
56
106
    def import_commit(self, commit):
57
107
        """Convert a git commit to a bzr revision.
62
112
            raise AssertionError("Commit object can't be None")
63
113
        rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
64
114
        rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
65
 
        rev.message = commit.message.decode("utf-8", "replace")
66
 
        rev.committer = str(commit.committer).decode("utf-8", "replace")
 
115
        rev.message = escape_invalid_chars(commit.message.decode("utf-8", "replace"))[0]
 
116
        rev.committer = escape_invalid_chars(str(commit.committer).decode("utf-8", "replace"))[0]
67
117
        if commit.committer != commit.author:
68
 
            rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
 
118
            rev.properties['author'] = escape_invalid_chars(str(commit.author).decode("utf-8", "replace"))[0]
 
119
 
 
120
        if commit.commit_time != commit.author_time:
 
121
            rev.properties['author-timestamp'] = str(commit.author_time)
 
122
        if commit.commit_timezone != commit.author_timezone:
 
123
            rev.properties['author-timezone'] = "%d" % (commit.author_timezone, )
69
124
        rev.timestamp = commit.commit_time
70
 
        rev.timezone = 0
 
125
        rev.timezone = commit.commit_timezone
71
126
        return rev
72
127
 
73
128
 
74
 
class BzrGitMappingExperimental(BzrGitMapping):
 
129
class BzrGitMappingv1(BzrGitMapping):
 
130
    revid_prefix = 'git-v1'
 
131
    experimental = False
 
132
 
 
133
    def __str__(self):
 
134
        return self.revid_prefix
 
135
 
 
136
 
 
137
class BzrGitMappingExperimental(BzrGitMappingv1):
75
138
    revid_prefix = 'git-experimental'
76
139
    experimental = True
77
140
 
78
141
 
79
 
default_mapping = BzrGitMappingExperimental()
 
142
class GitMappingRegistry(VcsMappingRegistry):
 
143
 
 
144
    def revision_id_bzr_to_foreign(self, bzr_revid):
 
145
        if not bzr_revid.startswith("git-"):
 
146
            raise errors.InvalidRevisionId(bzr_revid, None)
 
147
        (mapping_version, git_sha) = bzr_revid.split(":", 1)
 
148
        mapping = self.get(mapping_version)
 
149
        return mapping.revision_id_bzr_to_foreign(bzr_revid)
 
150
 
 
151
    parse_revision_id = revision_id_bzr_to_foreign
 
152
 
 
153
 
 
154
mapping_registry = GitMappingRegistry()
 
155
mapping_registry.register_lazy('git-v1', "bzrlib.plugins.git.mapping",
 
156
                                   "BzrGitMappingv1")
 
157
mapping_registry.register_lazy('git-experimental', "bzrlib.plugins.git.mapping",
 
158
                                   "BzrGitMappingExperimental")
 
159
 
 
160
 
 
161
class ForeignGit(ForeignVcs):
 
162
    """The Git Stupid Content Tracker"""
 
163
 
 
164
    def __init__(self):
 
165
        super(ForeignGit, self).__init__(mapping_registry)
 
166
 
 
167
    @classmethod
 
168
    def show_foreign_revid(cls, foreign_revid):
 
169
        return { "git commit": foreign_revid }
 
170
 
 
171
 
 
172
foreign_git = ForeignGit()
 
173
default_mapping = BzrGitMappingv1()
 
174
 
 
175
 
 
176
def text_to_blob(texts, entry):
 
177
    from dulwich.objects import Blob
 
178
    text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
 
179
    blob = Blob()
 
180
    blob._text = text
 
181
    return blob
 
182
 
 
183
 
 
184
def symlink_to_blob(entry):
 
185
    from dulwich.objects import Blob
 
186
    blob = Blob()
 
187
    blob._text = entry.symlink_target
 
188
    return blob
 
189
 
 
190
 
 
191
def entry_mode(entry):
 
192
    if entry.kind == 'directory':
 
193
        return stat.S_IFDIR
 
194
    elif entry.kind == 'symlink':
 
195
        return stat.S_IFLNK
 
196
    elif entry.kind == 'file':
 
197
        mode = stat.S_IFREG | 0644
 
198
        if entry.executable:
 
199
            mode |= 0111
 
200
        return mode
 
201
    else:
 
202
        raise AssertionError
 
203
 
 
204
 
 
205
def directory_to_tree(entry, lookup_ie_sha1):
 
206
    from dulwich.objects import Tree
 
207
    tree = Tree()
 
208
    for name in sorted(entry.children.keys()):
 
209
        ie = entry.children[name]
 
210
        tree.add(entry_mode(ie), name.encode("utf-8"), lookup_ie_sha1(ie))
 
211
    tree.serialize()
 
212
    return tree
 
213
 
 
214
 
 
215
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
 
216
    """Convert a Bazaar tree to a Git tree.
 
217
 
 
218
    :return: Yields tuples with object sha1, object and path
 
219
    """
 
220
    from dulwich.objects import Tree
 
221
    import stat
 
222
    stack = []
 
223
    if cur is None:
 
224
        cur = ""
 
225
    tree = Tree()
 
226
 
 
227
    # stack contains the set of trees that we haven't 
 
228
    # finished constructing
 
229
    for path, entry in inventory.iter_entries():
 
230
        while stack and not path.startswith(osutils.pathjoin(cur, "")):
 
231
            # We've hit a file that's not a child of the previous path
 
232
            tree.serialize()
 
233
            sha = tree.id
 
234
            yield sha, tree, cur.encode("utf-8")
 
235
            t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
 
236
            cur, tree = stack.pop()
 
237
            tree.add(*t)
 
238
 
 
239
        if entry.kind == "directory":
 
240
            stack.append((cur, tree))
 
241
            cur = path
 
242
            tree = Tree()
 
243
        else:
 
244
            if entry.kind == "file":
 
245
                blob = text_to_blob(texts, entry)
 
246
            elif entry.kind == "symlink":
 
247
                blob = symlink_to_blob(entry)
 
248
            else:
 
249
                raise AssertionError("Unknown kind %s" % entry.kind)
 
250
            sha = blob.id
 
251
            yield sha, blob, path.encode("utf-8")
 
252
            name = urlutils.basename(path).encode("utf-8")
 
253
            tree.add(entry_mode(entry), name, sha)
 
254
 
 
255
    while len(stack) > 1:
 
256
        tree.serialize()
 
257
        sha = tree.id
 
258
        yield sha, tree, cur.encode("utf-8")
 
259
        t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
 
260
        cur, tree = stack.pop()
 
261
        tree.add(*t)
 
262
 
 
263
    tree.serialize()
 
264
    yield tree.id, tree, cur.encode("utf-8")
 
265
 
 
266
 
 
267
def revision_to_commit(rev, tree_sha, parent_lookup):
 
268
    """Turn a Bazaar revision in to a Git commit
 
269
 
 
270
    :param tree_sha: Tree sha for the commit
 
271
    :param parent_lookup: Function for looking up the GIT sha equiv of a bzr revision
 
272
    :return dulwich.objects.Commit represent the revision:
 
273
    """
 
274
    from dulwich.objects import Commit
 
275
    commit = Commit()
 
276
    commit.tree = tree_sha
 
277
    for p in rev.parent_ids:
 
278
        git_p = parent_lookup(p)
 
279
        if git_p is not None:
 
280
            assert len(git_p) == 40, "unexpected length for %r" % git_p
 
281
            commit.parents.append(git_p)
 
282
    commit.message = rev.message.encode("utf-8")
 
283
    commit.committer = fix_person_identifier(rev.committer.encode("utf-8"))
 
284
    commit.author = fix_person_identifier(rev.get_apparent_authors()[0].encode("utf-8"))
 
285
    commit.commit_time = long(rev.timestamp)
 
286
    if 'author-timestamp' in rev.properties:
 
287
        commit.author_time = long(rev.properties['author-timestamp'])
 
288
    else:
 
289
        commit.author_time = commit.commit_time
 
290
    commit.commit_timezone = rev.timezone
 
291
    if 'author-timezone' in rev.properties:
 
292
        commit.author_timezone = int(rev.properties['author-timezone'])
 
293
    else:
 
294
        commit.author_timezone = commit.commit_timezone 
 
295
    return commit