/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to mapping.py

Extract unusual file modes from revision when reconstructing Trees.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
# Copyright (C) 2007 Canonical Ltd
 
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
 
3
# Copyright (C) 2008 John Carr
2
4
#
3
5
# This program is free software; you can redistribute it and/or modify
4
6
# it under the terms of the GNU General Public License as published by
16
18
 
17
19
"""Converters, etc for going between Bazaar and Git ids."""
18
20
 
19
 
from bzrlib import errors, foreign
20
 
from bzrlib.inventory import ROOT_ID
 
21
import stat
 
22
 
 
23
from bzrlib import (
 
24
    bencode,
 
25
    errors,
 
26
    foreign,
 
27
    osutils,
 
28
    trace,
 
29
    urlutils,
 
30
    )
 
31
from bzrlib.inventory import (
 
32
    ROOT_ID,
 
33
    )
21
34
from bzrlib.foreign import (
22
 
        ForeignRevision,
23
 
        )
 
35
    ForeignVcs, 
 
36
    VcsMappingRegistry, 
 
37
    ForeignRevision,
 
38
    )
 
39
 
 
40
DEFAULT_FILE_MODE = stat.S_IFREG | 0644
24
41
 
25
42
 
26
43
def escape_file_id(file_id):
28
45
 
29
46
 
30
47
def unescape_file_id(file_id):
31
 
    return file_id.replace("_s", " ").replace("__", "_")
 
48
    ret = []
 
49
    i = 0
 
50
    while i < len(file_id):
 
51
        if file_id[i] != '_':
 
52
            ret.append(file_id[i])
 
53
        else:
 
54
            if file_id[i+1] == '_':
 
55
                ret.append("_")
 
56
            elif file_id[i+1] == 's':
 
57
                ret.append(" ")
 
58
            else:
 
59
                raise AssertionError("unknown escape character %s" % file_id[i+1])
 
60
            i += 1
 
61
        i += 1
 
62
    return "".join(ret)
 
63
 
 
64
 
 
65
def fix_person_identifier(text):
 
66
    if "<" in text and ">" in text:
 
67
        return text
 
68
    return "%s <%s>" % (text, text)
 
69
 
 
70
 
 
71
def warn_escaped(commit, num_escaped):
 
72
    trace.warning("Escaped %d XML-invalid characters in %s. Will be unable "
 
73
                  "to regenerate the SHA map.", num_escaped, commit)
 
74
 
 
75
 
 
76
def warn_unusual_mode(commit, path, mode):
 
77
    trace.warning("Unusual file mode %o for %s in %s. Will be unable to "
 
78
                  "regenerate the SHA map.", mode, path, commit)
 
79
 
 
80
 
 
81
def squash_revision(target_repo, rev):
 
82
    """Remove characters that can't be stored from a revision, if necessary.
 
83
    
 
84
    :param target_repo: Repository in which the revision will be stored
 
85
    :param rev: Revision object, will be modified in-place
 
86
    """
 
87
    if not getattr(target_repo._serializer, "squashes_xml_invalid_characters", True):
 
88
        return
 
89
    from bzrlib.xml_serializer import escape_invalid_chars
 
90
    rev.message, num_escaped = escape_invalid_chars(rev.message)
 
91
    if num_escaped:
 
92
        warn_escaped(rev.foreign_revid, num_escaped)
 
93
    if 'author' in rev.properties:
 
94
        rev.properties['author'], num_escaped = escape_invalid_chars(
 
95
            rev.properties['author'])
 
96
        if num_escaped:
 
97
            warn_escaped(rev.foreign_revid, num_escaped)
 
98
    rev.committer, num_escaped = escape_invalid_chars(rev.committer)
 
99
    if num_escaped:
 
100
        warn_escaped(rev.foreign_revid, num_escaped)
32
101
 
33
102
 
34
103
class BzrGitMapping(foreign.VcsMapping):
35
104
    """Class that maps between Git and Bazaar semantics."""
36
105
    experimental = False
37
106
 
38
 
    def revision_id_foreign_to_bzr(self, git_rev_id):
 
107
    def __init__(self):
 
108
        super(BzrGitMapping, self).__init__(foreign_git)
 
109
 
 
110
    def __eq__(self, other):
 
111
        return type(self) == type(other) and self.revid_prefix == other.revid_prefix
 
112
 
 
113
    @classmethod
 
114
    def revision_id_foreign_to_bzr(cls, git_rev_id):
39
115
        """Convert a git revision id handle to a Bazaar revision id."""
40
 
        return "%s:%s" % (self.revid_prefix, git_rev_id)
 
116
        return "%s:%s" % (cls.revid_prefix, git_rev_id)
41
117
 
42
 
    def revision_id_bzr_to_foreign(self, bzr_rev_id):
 
118
    @classmethod
 
119
    def revision_id_bzr_to_foreign(cls, bzr_rev_id):
43
120
        """Convert a Bazaar revision id to a git revision id handle."""
44
 
        if not bzr_rev_id.startswith("%s:" % self.revid_prefix):
45
 
            raise errors.InvalidRevisionId(bzr_rev_id, self)
46
 
        return bzr_rev_id[len(self.revid_prefix)+1:]
47
 
 
48
 
    def show_foreign_revid(self, foreign_revid):
49
 
        return { "git commit": foreign_revid }
 
121
        if not bzr_rev_id.startswith("%s:" % cls.revid_prefix):
 
122
            raise errors.InvalidRevisionId(bzr_rev_id, cls)
 
123
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
50
124
 
51
125
    def generate_file_id(self, path):
 
126
        # Git paths are just bytestrings
 
127
        # We must just hope they are valid UTF-8..
52
128
        if path == "":
53
129
            return ROOT_ID
54
 
        return escape_file_id(path.encode('utf-8'))
 
130
        return escape_file_id(path)
 
131
 
 
132
    def parse_file_id(self, file_id):
 
133
        if file_id == ROOT_ID:
 
134
            return ""
 
135
        return unescape_file_id(file_id)
 
136
 
 
137
    def import_unusual_file_modes(self, rev, unusual_file_modes):
 
138
        if unusual_file_modes:
 
139
            rev.properties['file-modes'] = bencode.bencode(unusual_file_modes)
 
140
 
 
141
    def export_unusual_file_modes(self, rev):
 
142
        try:
 
143
            return bencode.bdecode(rev.properties['file-modes'])
 
144
        except KeyError:
 
145
            return {}
55
146
 
56
147
    def import_commit(self, commit):
57
148
        """Convert a git commit to a bzr revision.
66
157
        rev.committer = str(commit.committer).decode("utf-8", "replace")
67
158
        if commit.committer != commit.author:
68
159
            rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
 
160
 
 
161
        if commit.commit_time != commit.author_time:
 
162
            rev.properties['author-timestamp'] = str(commit.author_time)
 
163
        if commit.commit_timezone != commit.author_timezone:
 
164
            rev.properties['author-timezone'] = "%d" % (commit.author_timezone, )
69
165
        rev.timestamp = commit.commit_time
70
 
        rev.timezone = 0
 
166
        rev.timezone = commit.commit_timezone
71
167
        return rev
72
168
 
73
169
 
74
 
class BzrGitMappingExperimental(BzrGitMapping):
 
170
class BzrGitMappingv1(BzrGitMapping):
 
171
    revid_prefix = 'git-v1'
 
172
    experimental = False
 
173
 
 
174
    def __str__(self):
 
175
        return self.revid_prefix
 
176
 
 
177
 
 
178
class BzrGitMappingExperimental(BzrGitMappingv1):
75
179
    revid_prefix = 'git-experimental'
76
180
    experimental = True
77
181
 
78
182
 
79
 
default_mapping = BzrGitMappingExperimental()
 
183
class GitMappingRegistry(VcsMappingRegistry):
 
184
    """Registry with available git mappings."""
 
185
 
 
186
    def revision_id_bzr_to_foreign(self, bzr_revid):
 
187
        if not bzr_revid.startswith("git-"):
 
188
            raise errors.InvalidRevisionId(bzr_revid, None)
 
189
        (mapping_version, git_sha) = bzr_revid.split(":", 1)
 
190
        mapping = self.get(mapping_version)
 
191
        return mapping.revision_id_bzr_to_foreign(bzr_revid)
 
192
 
 
193
    parse_revision_id = revision_id_bzr_to_foreign
 
194
 
 
195
 
 
196
mapping_registry = GitMappingRegistry()
 
197
mapping_registry.register_lazy('git-v1', "bzrlib.plugins.git.mapping",
 
198
                                   "BzrGitMappingv1")
 
199
mapping_registry.register_lazy('git-experimental', "bzrlib.plugins.git.mapping",
 
200
                                   "BzrGitMappingExperimental")
 
201
 
 
202
 
 
203
class ForeignGit(ForeignVcs):
 
204
    """The Git Stupid Content Tracker"""
 
205
 
 
206
    def __init__(self):
 
207
        super(ForeignGit, self).__init__(mapping_registry)
 
208
 
 
209
    @classmethod
 
210
    def show_foreign_revid(cls, foreign_revid):
 
211
        return { "git commit": foreign_revid }
 
212
 
 
213
 
 
214
foreign_git = ForeignGit()
 
215
default_mapping = BzrGitMappingv1()
 
216
 
 
217
 
 
218
def text_to_blob(texts, entry):
 
219
    from dulwich.objects import Blob
 
220
    text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
 
221
    blob = Blob()
 
222
    blob._text = text
 
223
    return blob
 
224
 
 
225
 
 
226
def symlink_to_blob(entry):
 
227
    from dulwich.objects import Blob
 
228
    blob = Blob()
 
229
    blob._text = entry.symlink_target
 
230
    return blob
 
231
 
 
232
 
 
233
def mode_is_executable(mode):
 
234
    """Check if mode should be considered executable."""
 
235
    return bool(mode & 0111)
 
236
 
 
237
 
 
238
def mode_kind(mode):
 
239
    """Determine the Bazaar inventory kind based on Unix file mode."""
 
240
    entry_kind = (mode & 0700000) / 0100000
 
241
    if entry_kind == 0:
 
242
        return 'directory'
 
243
    elif entry_kind == 1:
 
244
        file_kind = (mode & 070000) / 010000
 
245
        if file_kind == 0:
 
246
            return 'file'
 
247
        elif file_kind == 2:
 
248
            return 'symlink'
 
249
        elif file_kind == 6:
 
250
            return 'tree-reference'
 
251
        else:
 
252
            raise AssertionError(
 
253
                "Unknown file kind %d, perms=%o." % (file_kind, mode,))
 
254
    else:
 
255
        raise AssertionError(
 
256
            "Unknown kind, perms=%r." % (mode,))
 
257
 
 
258
 
 
259
def entry_mode(entry):
 
260
    """Determine the git file mode for an inventory entry."""
 
261
    if entry.kind == 'directory':
 
262
        return stat.S_IFDIR
 
263
    elif entry.kind == 'symlink':
 
264
        return stat.S_IFLNK
 
265
    elif entry.kind == 'file':
 
266
        mode = stat.S_IFREG | 0644
 
267
        if entry.executable:
 
268
            mode |= 0111
 
269
        return mode
 
270
    else:
 
271
        raise AssertionError
 
272
 
 
273
 
 
274
def directory_to_tree(entry, lookup_ie_sha1):
 
275
    from dulwich.objects import Tree
 
276
    tree = Tree()
 
277
    for name in sorted(entry.children.keys()):
 
278
        ie = entry.children[name]
 
279
        tree.add(entry_mode(ie), name.encode("utf-8"), lookup_ie_sha1(ie))
 
280
    tree.serialize()
 
281
    return tree
 
282
 
 
283
 
 
284
def extract_unusual_modes(rev):
 
285
    try:
 
286
        foreign_revid, mapping = mapping_registry.parse_revision_id(rev.revision_id)
 
287
    except errors.InvalidRevisionId:
 
288
        return {}
 
289
    else:
 
290
        return mapping.export_unusual_file_modes(rev)
 
291
 
 
292
 
 
293
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
 
294
    """Convert a Bazaar tree to a Git tree.
 
295
 
 
296
    :return: Yields tuples with object sha1, object and path
 
297
    """
 
298
    from dulwich.objects import Tree
 
299
    import stat
 
300
    stack = []
 
301
    if cur is None:
 
302
        cur = ""
 
303
    tree = Tree()
 
304
 
 
305
    # stack contains the set of trees that we haven't 
 
306
    # finished constructing
 
307
    for path, entry in inventory.iter_entries():
 
308
        while stack and not path.startswith(osutils.pathjoin(cur, "")):
 
309
            # We've hit a file that's not a child of the previous path
 
310
            tree.serialize()
 
311
            sha = tree.id
 
312
            yield sha, tree, cur.encode("utf-8")
 
313
            t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
 
314
            cur, tree = stack.pop()
 
315
            tree.add(*t)
 
316
 
 
317
        if entry.kind == "directory":
 
318
            stack.append((cur, tree))
 
319
            cur = path
 
320
            tree = Tree()
 
321
        else:
 
322
            if entry.kind == "file":
 
323
                blob = text_to_blob(texts, entry)
 
324
            elif entry.kind == "symlink":
 
325
                blob = symlink_to_blob(entry)
 
326
            else:
 
327
                raise AssertionError("Unknown kind %s" % entry.kind)
 
328
            sha = blob.id
 
329
            yield sha, blob, path.encode("utf-8")
 
330
            name = urlutils.basename(path).encode("utf-8")
 
331
            tree.add(entry_mode(entry), name, sha)
 
332
 
 
333
    while len(stack) > 1:
 
334
        tree.serialize()
 
335
        sha = tree.id
 
336
        yield sha, tree, cur.encode("utf-8")
 
337
        t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
 
338
        cur, tree = stack.pop()
 
339
        tree.add(*t)
 
340
 
 
341
    tree.serialize()
 
342
    yield tree.id, tree, cur.encode("utf-8")
 
343
 
 
344
 
 
345
def revision_to_commit(rev, tree_sha, parent_lookup):
 
346
    """Turn a Bazaar revision in to a Git commit
 
347
 
 
348
    :param tree_sha: Tree sha for the commit
 
349
    :param parent_lookup: Function for looking up the GIT sha equiv of a bzr revision
 
350
    :return dulwich.objects.Commit represent the revision:
 
351
    """
 
352
    from dulwich.objects import Commit
 
353
    commit = Commit()
 
354
    commit.tree = tree_sha
 
355
    for p in rev.parent_ids:
 
356
        git_p = parent_lookup(p)
 
357
        if git_p is not None:
 
358
            assert len(git_p) == 40, "unexpected length for %r" % git_p
 
359
            commit.parents.append(git_p)
 
360
    commit.message = rev.message.encode("utf-8")
 
361
    commit.committer = fix_person_identifier(rev.committer.encode("utf-8"))
 
362
    commit.author = fix_person_identifier(rev.get_apparent_authors()[0].encode("utf-8"))
 
363
    commit.commit_time = long(rev.timestamp)
 
364
    if 'author-timestamp' in rev.properties:
 
365
        commit.author_time = long(rev.properties['author-timestamp'])
 
366
    else:
 
367
        commit.author_time = commit.commit_time
 
368
    commit.commit_timezone = rev.timezone
 
369
    if 'author-timezone' in rev.properties:
 
370
        commit.author_timezone = int(rev.properties['author-timezone'])
 
371
    else:
 
372
        commit.author_timezone = commit.commit_timezone 
 
373
    return commit