/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to mapping.py

Add more docstrings, support storing unusual file modes.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
# Copyright (C) 2007 Canonical Ltd
 
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
 
3
# Copyright (C) 2008 John Carr
2
4
#
3
5
# This program is free software; you can redistribute it and/or modify
4
6
# it under the terms of the GNU General Public License as published by
16
18
 
17
19
"""Converters, etc for going between Bazaar and Git ids."""
18
20
 
19
 
from bzrlib import errors, foreign
20
 
from bzrlib.inventory import ROOT_ID
 
21
import stat
 
22
 
 
23
from bzrlib import (
 
24
    bencode,
 
25
    errors,
 
26
    foreign,
 
27
    osutils,
 
28
    trace,
 
29
    urlutils,
 
30
    )
 
31
from bzrlib.inventory import (
 
32
    ROOT_ID,
 
33
    )
21
34
from bzrlib.foreign import (
22
 
        ForeignRevision,
23
 
        )
 
35
    ForeignVcs, 
 
36
    VcsMappingRegistry, 
 
37
    ForeignRevision,
 
38
    )
 
39
 
 
40
DEFAULT_FILE_MODE = stat.S_IFREG | 0644
24
41
 
25
42
 
26
43
def escape_file_id(file_id):
28
45
 
29
46
 
30
47
def unescape_file_id(file_id):
31
 
    return file_id.replace("_s", " ").replace("__", "_")
 
48
    ret = []
 
49
    i = 0
 
50
    while i < len(file_id):
 
51
        if file_id[i] != '_':
 
52
            ret.append(file_id[i])
 
53
        else:
 
54
            if file_id[i+1] == '_':
 
55
                ret.append("_")
 
56
            elif file_id[i+1] == 's':
 
57
                ret.append(" ")
 
58
            else:
 
59
                raise AssertionError("unknown escape character %s" % file_id[i+1])
 
60
            i += 1
 
61
        i += 1
 
62
    return "".join(ret)
 
63
 
 
64
 
 
65
def fix_person_identifier(text):
 
66
    if "<" in text and ">" in text:
 
67
        return text
 
68
    return "%s <%s>" % (text, text)
 
69
 
 
70
 
 
71
def warn_escaped(commit, num_escaped):
 
72
    trace.warning("Escaped %d XML-invalid characters in %s. Will be unable "
 
73
                  "to regenerate the SHA map.", num_escaped, commit)
 
74
 
 
75
 
 
76
def warn_unusual_mode(commit, path, mode):
 
77
    trace.warning("Unusual file mode %o for %s in %s. Will be unable to "
 
78
                  "regenerate the SHA map.", mode, path, commit)
 
79
 
 
80
 
 
81
def squash_revision(target_repo, rev):
 
82
    """Remove characters that can't be stored from a revision, if necessary.
 
83
    
 
84
    :param target_repo: Repository in which the revision will be stored
 
85
    :param rev: Revision object, will be modified in-place
 
86
    """
 
87
    if not getattr(target_repo._serializer, "squashes_xml_invalid_characters", True):
 
88
        return
 
89
    from bzrlib.xml_serializer import escape_invalid_chars
 
90
    rev.message, num_escaped = escape_invalid_chars(rev.message)
 
91
    if num_escaped:
 
92
        warn_escaped(rev.foreign_revid, num_escaped)
 
93
    if 'author' in rev.properties:
 
94
        rev.properties['author'], num_escaped = escape_invalid_chars(
 
95
            rev.properties['author'])
 
96
        if num_escaped:
 
97
            warn_escaped(rev.foreign_revid, num_escaped)
 
98
    rev.committer, num_escaped = escape_invalid_chars(rev.committer)
 
99
    if num_escaped:
 
100
        warn_escaped(rev.foreign_revid, num_escaped)
32
101
 
33
102
 
34
103
class BzrGitMapping(foreign.VcsMapping):
35
104
    """Class that maps between Git and Bazaar semantics."""
36
105
    experimental = False
37
106
 
38
 
    def revision_id_foreign_to_bzr(self, git_rev_id):
 
107
    def __init__(self):
 
108
        super(BzrGitMapping, self).__init__(foreign_git)
 
109
 
 
110
    def __eq__(self, other):
 
111
        return type(self) == type(other) and self.revid_prefix == other.revid_prefix
 
112
 
 
113
    @classmethod
 
114
    def revision_id_foreign_to_bzr(cls, git_rev_id):
39
115
        """Convert a git revision id handle to a Bazaar revision id."""
40
 
        return "%s:%s" % (self.revid_prefix, git_rev_id)
 
116
        return "%s:%s" % (cls.revid_prefix, git_rev_id)
41
117
 
42
 
    def revision_id_bzr_to_foreign(self, bzr_rev_id):
 
118
    @classmethod
 
119
    def revision_id_bzr_to_foreign(cls, bzr_rev_id):
43
120
        """Convert a Bazaar revision id to a git revision id handle."""
44
 
        if not bzr_rev_id.startswith("%s:" % self.revid_prefix):
45
 
            raise errors.InvalidRevisionId(bzr_rev_id, self)
46
 
        return bzr_rev_id[len(self.revid_prefix)+1:]
47
 
 
48
 
    def show_foreign_revid(self, foreign_revid):
49
 
        return { "git commit": foreign_revid }
 
121
        if not bzr_rev_id.startswith("%s:" % cls.revid_prefix):
 
122
            raise errors.InvalidRevisionId(bzr_rev_id, cls)
 
123
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
50
124
 
51
125
    def generate_file_id(self, path):
 
126
        # Git paths are just bytestrings
 
127
        # We must just hope they are valid UTF-8..
52
128
        if path == "":
53
129
            return ROOT_ID
54
 
        return escape_file_id(path.encode('utf-8'))
 
130
        return escape_file_id(path)
 
131
 
 
132
    def parse_file_id(self, file_id):
 
133
        if file_id == ROOT_ID:
 
134
            return ""
 
135
        return unescape_file_id(file_id)
 
136
 
 
137
    def import_unusual_file_modes(self, rev, unusual_file_modes):
 
138
        if unusual_file_modes:
 
139
            rev.properties['file-modes'] = bencode.bencode(unusual_file_modes)
55
140
 
56
141
    def import_commit(self, commit):
57
142
        """Convert a git commit to a bzr revision.
66
151
        rev.committer = str(commit.committer).decode("utf-8", "replace")
67
152
        if commit.committer != commit.author:
68
153
            rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
 
154
 
 
155
        if commit.commit_time != commit.author_time:
 
156
            rev.properties['author-timestamp'] = str(commit.author_time)
 
157
        if commit.commit_timezone != commit.author_timezone:
 
158
            rev.properties['author-timezone'] = "%d" % (commit.author_timezone, )
69
159
        rev.timestamp = commit.commit_time
70
 
        rev.timezone = 0
 
160
        rev.timezone = commit.commit_timezone
71
161
        return rev
72
162
 
73
163
 
74
 
class BzrGitMappingExperimental(BzrGitMapping):
 
164
class BzrGitMappingv1(BzrGitMapping):
 
165
    revid_prefix = 'git-v1'
 
166
    experimental = False
 
167
 
 
168
    def __str__(self):
 
169
        return self.revid_prefix
 
170
 
 
171
 
 
172
class BzrGitMappingExperimental(BzrGitMappingv1):
75
173
    revid_prefix = 'git-experimental'
76
174
    experimental = True
77
175
 
78
176
 
79
 
default_mapping = BzrGitMappingExperimental()
 
177
class GitMappingRegistry(VcsMappingRegistry):
 
178
    """Registry with available git mappings."""
 
179
 
 
180
    def revision_id_bzr_to_foreign(self, bzr_revid):
 
181
        if not bzr_revid.startswith("git-"):
 
182
            raise errors.InvalidRevisionId(bzr_revid, None)
 
183
        (mapping_version, git_sha) = bzr_revid.split(":", 1)
 
184
        mapping = self.get(mapping_version)
 
185
        return mapping.revision_id_bzr_to_foreign(bzr_revid)
 
186
 
 
187
    parse_revision_id = revision_id_bzr_to_foreign
 
188
 
 
189
 
 
190
mapping_registry = GitMappingRegistry()
 
191
mapping_registry.register_lazy('git-v1', "bzrlib.plugins.git.mapping",
 
192
                                   "BzrGitMappingv1")
 
193
mapping_registry.register_lazy('git-experimental', "bzrlib.plugins.git.mapping",
 
194
                                   "BzrGitMappingExperimental")
 
195
 
 
196
 
 
197
class ForeignGit(ForeignVcs):
 
198
    """The Git Stupid Content Tracker"""
 
199
 
 
200
    def __init__(self):
 
201
        super(ForeignGit, self).__init__(mapping_registry)
 
202
 
 
203
    @classmethod
 
204
    def show_foreign_revid(cls, foreign_revid):
 
205
        return { "git commit": foreign_revid }
 
206
 
 
207
 
 
208
foreign_git = ForeignGit()
 
209
default_mapping = BzrGitMappingv1()
 
210
 
 
211
 
 
212
def text_to_blob(texts, entry):
 
213
    from dulwich.objects import Blob
 
214
    text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
 
215
    blob = Blob()
 
216
    blob._text = text
 
217
    return blob
 
218
 
 
219
 
 
220
def symlink_to_blob(entry):
 
221
    from dulwich.objects import Blob
 
222
    blob = Blob()
 
223
    blob._text = entry.symlink_target
 
224
    return blob
 
225
 
 
226
 
 
227
def mode_is_executable(mode):
 
228
    """Check if mode should be considered executable."""
 
229
    return bool(mode & 0111)
 
230
 
 
231
 
 
232
def mode_kind(mode):
 
233
    """Determine the Bazaar inventory kind based on Unix file mode."""
 
234
    entry_kind = (mode & 0700000) / 0100000
 
235
    if entry_kind == 0:
 
236
        return 'directory'
 
237
    elif entry_kind == 1:
 
238
        file_kind = (mode & 070000) / 010000
 
239
        if file_kind == 0:
 
240
            return 'file'
 
241
        elif file_kind == 2:
 
242
            return 'symlink'
 
243
        elif file_kind == 6:
 
244
            return 'tree-reference'
 
245
        else:
 
246
            raise AssertionError(
 
247
                "Unknown file kind %d, perms=%o." % (file_kind, mode,))
 
248
    else:
 
249
        raise AssertionError(
 
250
            "Unknown kind, perms=%r." % (mode,))
 
251
 
 
252
 
 
253
def entry_mode(entry):
 
254
    """Determine the git file mode for an inventory entry."""
 
255
    if entry.kind == 'directory':
 
256
        return stat.S_IFDIR
 
257
    elif entry.kind == 'symlink':
 
258
        return stat.S_IFLNK
 
259
    elif entry.kind == 'file':
 
260
        mode = stat.S_IFREG | 0644
 
261
        if entry.executable:
 
262
            mode |= 0111
 
263
        return mode
 
264
    else:
 
265
        raise AssertionError
 
266
 
 
267
 
 
268
def directory_to_tree(entry, lookup_ie_sha1):
 
269
    from dulwich.objects import Tree
 
270
    tree = Tree()
 
271
    for name in sorted(entry.children.keys()):
 
272
        ie = entry.children[name]
 
273
        tree.add(entry_mode(ie), name.encode("utf-8"), lookup_ie_sha1(ie))
 
274
    tree.serialize()
 
275
    return tree
 
276
 
 
277
 
 
278
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
 
279
    """Convert a Bazaar tree to a Git tree.
 
280
 
 
281
    :return: Yields tuples with object sha1, object and path
 
282
    """
 
283
    from dulwich.objects import Tree
 
284
    import stat
 
285
    stack = []
 
286
    if cur is None:
 
287
        cur = ""
 
288
    tree = Tree()
 
289
 
 
290
    # stack contains the set of trees that we haven't 
 
291
    # finished constructing
 
292
    for path, entry in inventory.iter_entries():
 
293
        while stack and not path.startswith(osutils.pathjoin(cur, "")):
 
294
            # We've hit a file that's not a child of the previous path
 
295
            tree.serialize()
 
296
            sha = tree.id
 
297
            yield sha, tree, cur.encode("utf-8")
 
298
            t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
 
299
            cur, tree = stack.pop()
 
300
            tree.add(*t)
 
301
 
 
302
        if entry.kind == "directory":
 
303
            stack.append((cur, tree))
 
304
            cur = path
 
305
            tree = Tree()
 
306
        else:
 
307
            if entry.kind == "file":
 
308
                blob = text_to_blob(texts, entry)
 
309
            elif entry.kind == "symlink":
 
310
                blob = symlink_to_blob(entry)
 
311
            else:
 
312
                raise AssertionError("Unknown kind %s" % entry.kind)
 
313
            sha = blob.id
 
314
            yield sha, blob, path.encode("utf-8")
 
315
            name = urlutils.basename(path).encode("utf-8")
 
316
            tree.add(entry_mode(entry), name, sha)
 
317
 
 
318
    while len(stack) > 1:
 
319
        tree.serialize()
 
320
        sha = tree.id
 
321
        yield sha, tree, cur.encode("utf-8")
 
322
        t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
 
323
        cur, tree = stack.pop()
 
324
        tree.add(*t)
 
325
 
 
326
    tree.serialize()
 
327
    yield tree.id, tree, cur.encode("utf-8")
 
328
 
 
329
 
 
330
def revision_to_commit(rev, tree_sha, parent_lookup):
 
331
    """Turn a Bazaar revision in to a Git commit
 
332
 
 
333
    :param tree_sha: Tree sha for the commit
 
334
    :param parent_lookup: Function for looking up the GIT sha equiv of a bzr revision
 
335
    :return dulwich.objects.Commit represent the revision:
 
336
    """
 
337
    from dulwich.objects import Commit
 
338
    commit = Commit()
 
339
    commit.tree = tree_sha
 
340
    for p in rev.parent_ids:
 
341
        git_p = parent_lookup(p)
 
342
        if git_p is not None:
 
343
            assert len(git_p) == 40, "unexpected length for %r" % git_p
 
344
            commit.parents.append(git_p)
 
345
    commit.message = rev.message.encode("utf-8")
 
346
    commit.committer = fix_person_identifier(rev.committer.encode("utf-8"))
 
347
    commit.author = fix_person_identifier(rev.get_apparent_authors()[0].encode("utf-8"))
 
348
    commit.commit_time = long(rev.timestamp)
 
349
    if 'author-timestamp' in rev.properties:
 
350
        commit.author_time = long(rev.properties['author-timestamp'])
 
351
    else:
 
352
        commit.author_time = commit.commit_time
 
353
    commit.commit_timezone = rev.timezone
 
354
    if 'author-timezone' in rev.properties:
 
355
        commit.author_timezone = int(rev.properties['author-timezone'])
 
356
    else:
 
357
        commit.author_timezone = commit.commit_timezone 
 
358
    return commit