/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to mapping.py

Fix branch cloning.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
# Copyright (C) 2007 Canonical Ltd
2
 
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
3
 
# Copyright (C) 2008 John Carr
4
2
#
5
3
# This program is free software; you can redistribute it and/or modify
6
4
# it under the terms of the GNU General Public License as published by
18
16
 
19
17
"""Converters, etc for going between Bazaar and Git ids."""
20
18
 
21
 
import stat
22
 
 
23
 
from bzrlib import (
24
 
    errors,
25
 
    foreign,
26
 
    osutils,
27
 
    trace,
28
 
    urlutils,
29
 
    )
30
 
try:
31
 
    from bzrlib import bencode
32
 
except ImportError:
33
 
    from bzrlib.util import bencode
34
 
from bzrlib.inventory import (
35
 
    ROOT_ID,
36
 
    )
 
19
from bzrlib import errors, foreign
 
20
from bzrlib.inventory import ROOT_ID
37
21
from bzrlib.foreign import (
38
 
    ForeignVcs, 
39
 
    VcsMappingRegistry, 
40
 
    ForeignRevision,
41
 
    )
42
 
 
43
 
DEFAULT_FILE_MODE = stat.S_IFREG | 0644
 
22
        ForeignRevision,
 
23
        )
44
24
 
45
25
 
46
26
def escape_file_id(file_id):
48
28
 
49
29
 
50
30
def unescape_file_id(file_id):
51
 
    ret = []
52
 
    i = 0
53
 
    while i < len(file_id):
54
 
        if file_id[i] != '_':
55
 
            ret.append(file_id[i])
56
 
        else:
57
 
            if file_id[i+1] == '_':
58
 
                ret.append("_")
59
 
            elif file_id[i+1] == 's':
60
 
                ret.append(" ")
61
 
            else:
62
 
                raise AssertionError("unknown escape character %s" % file_id[i+1])
63
 
            i += 1
64
 
        i += 1
65
 
    return "".join(ret)
66
 
 
67
 
 
68
 
def fix_person_identifier(text):
69
 
    if "<" in text and ">" in text:
70
 
        return text
71
 
    return "%s <%s>" % (text, text)
72
 
 
73
 
 
74
 
def warn_escaped(commit, num_escaped):
75
 
    trace.warning("Escaped %d XML-invalid characters in %s. Will be unable "
76
 
                  "to regenerate the SHA map.", num_escaped, commit)
77
 
 
78
 
 
79
 
def warn_unusual_mode(commit, path, mode):
80
 
    trace.mutter("Unusual file mode %o for %s in %s. Storing as revision property. ",
81
 
                 mode, path, commit)
82
 
 
83
 
 
84
 
def squash_revision(target_repo, rev):
85
 
    """Remove characters that can't be stored from a revision, if necessary.
86
 
    
87
 
    :param target_repo: Repository in which the revision will be stored
88
 
    :param rev: Revision object, will be modified in-place
89
 
    """
90
 
    if not getattr(target_repo._serializer, "squashes_xml_invalid_characters", True):
91
 
        return
92
 
    from bzrlib.xml_serializer import escape_invalid_chars
93
 
    rev.message, num_escaped = escape_invalid_chars(rev.message)
94
 
    if num_escaped:
95
 
        warn_escaped(rev.foreign_revid, num_escaped)
96
 
    if 'author' in rev.properties:
97
 
        rev.properties['author'], num_escaped = escape_invalid_chars(
98
 
            rev.properties['author'])
99
 
        if num_escaped:
100
 
            warn_escaped(rev.foreign_revid, num_escaped)
101
 
    rev.committer, num_escaped = escape_invalid_chars(rev.committer)
102
 
    if num_escaped:
103
 
        warn_escaped(rev.foreign_revid, num_escaped)
 
31
    return file_id.replace("_s", " ").replace("__", "_")
104
32
 
105
33
 
106
34
class BzrGitMapping(foreign.VcsMapping):
107
35
    """Class that maps between Git and Bazaar semantics."""
108
36
    experimental = False
109
37
 
110
 
    def __init__(self):
111
 
        super(BzrGitMapping, self).__init__(foreign_git)
112
 
 
113
 
    def __eq__(self, other):
114
 
        return type(self) == type(other) and self.revid_prefix == other.revid_prefix
115
 
 
116
 
    @classmethod
117
 
    def revision_id_foreign_to_bzr(cls, git_rev_id):
 
38
    def revision_id_foreign_to_bzr(self, git_rev_id):
118
39
        """Convert a git revision id handle to a Bazaar revision id."""
119
 
        return "%s:%s" % (cls.revid_prefix, git_rev_id)
 
40
        return "%s:%s" % (self.revid_prefix, git_rev_id)
120
41
 
121
 
    @classmethod
122
 
    def revision_id_bzr_to_foreign(cls, bzr_rev_id):
 
42
    def revision_id_bzr_to_foreign(self, bzr_rev_id):
123
43
        """Convert a Bazaar revision id to a git revision id handle."""
124
 
        if not bzr_rev_id.startswith("%s:" % cls.revid_prefix):
125
 
            raise errors.InvalidRevisionId(bzr_rev_id, cls)
126
 
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
 
44
        if not bzr_rev_id.startswith("%s:" % self.revid_prefix):
 
45
            raise errors.InvalidRevisionId(bzr_rev_id, self)
 
46
        return bzr_rev_id[len(self.revid_prefix)+1:]
 
47
 
 
48
    def show_foreign_revid(self, foreign_revid):
 
49
        return { "git commit": foreign_revid }
127
50
 
128
51
    def generate_file_id(self, path):
129
 
        # Git paths are just bytestrings
130
 
        # We must just hope they are valid UTF-8..
131
52
        if path == "":
132
53
            return ROOT_ID
133
 
        return escape_file_id(path)
134
 
 
135
 
    def parse_file_id(self, file_id):
136
 
        if file_id == ROOT_ID:
137
 
            return ""
138
 
        return unescape_file_id(file_id)
139
 
 
140
 
    def import_unusual_file_modes(self, rev, unusual_file_modes):
141
 
        if unusual_file_modes:
142
 
            ret = [(name, unusual_file_modes[name]) for name in sorted(unusual_file_modes.keys())]
143
 
            rev.properties['file-modes'] = bencode.bencode(ret)
144
 
 
145
 
    def export_unusual_file_modes(self, rev):
146
 
        try:
147
 
            return dict([(self.generate_file_id(path), mode) for (path, mode) in bencode.bdecode(rev.properties['file-modes'])])
148
 
        except KeyError:
149
 
            return {}
 
54
        return escape_file_id(path.encode('utf-8'))
150
55
 
151
56
    def import_commit(self, commit):
152
57
        """Convert a git commit to a bzr revision.
161
66
        rev.committer = str(commit.committer).decode("utf-8", "replace")
162
67
        if commit.committer != commit.author:
163
68
            rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
164
 
 
165
 
        if commit.commit_time != commit.author_time:
166
 
            rev.properties['author-timestamp'] = str(commit.author_time)
167
 
        if commit.commit_timezone != commit.author_timezone:
168
 
            rev.properties['author-timezone'] = "%d" % (commit.author_timezone, )
169
69
        rev.timestamp = commit.commit_time
170
 
        rev.timezone = commit.commit_timezone
 
70
        rev.timezone = 0
171
71
        return rev
172
72
 
173
73
 
174
 
class BzrGitMappingv1(BzrGitMapping):
175
 
    revid_prefix = 'git-v1'
176
 
    experimental = False
177
 
 
178
 
    def __str__(self):
179
 
        return self.revid_prefix
180
 
 
181
 
 
182
 
class BzrGitMappingExperimental(BzrGitMappingv1):
 
74
class BzrGitMappingExperimental(BzrGitMapping):
183
75
    revid_prefix = 'git-experimental'
184
76
    experimental = True
185
77
 
186
78
 
187
 
class GitMappingRegistry(VcsMappingRegistry):
188
 
    """Registry with available git mappings."""
189
 
 
190
 
    def revision_id_bzr_to_foreign(self, bzr_revid):
191
 
        if not bzr_revid.startswith("git-"):
192
 
            raise errors.InvalidRevisionId(bzr_revid, None)
193
 
        (mapping_version, git_sha) = bzr_revid.split(":", 1)
194
 
        mapping = self.get(mapping_version)
195
 
        return mapping.revision_id_bzr_to_foreign(bzr_revid)
196
 
 
197
 
    parse_revision_id = revision_id_bzr_to_foreign
198
 
 
199
 
 
200
 
mapping_registry = GitMappingRegistry()
201
 
mapping_registry.register_lazy('git-v1', "bzrlib.plugins.git.mapping",
202
 
                                   "BzrGitMappingv1")
203
 
mapping_registry.register_lazy('git-experimental', "bzrlib.plugins.git.mapping",
204
 
                                   "BzrGitMappingExperimental")
205
 
 
206
 
 
207
 
class ForeignGit(ForeignVcs):
208
 
    """The Git Stupid Content Tracker"""
209
 
 
210
 
    def __init__(self):
211
 
        super(ForeignGit, self).__init__(mapping_registry)
212
 
 
213
 
    @classmethod
214
 
    def show_foreign_revid(cls, foreign_revid):
215
 
        return { "git commit": foreign_revid }
216
 
 
217
 
 
218
 
foreign_git = ForeignGit()
219
 
default_mapping = BzrGitMappingv1()
220
 
 
221
 
 
222
 
def text_to_blob(texts, entry):
223
 
    from dulwich.objects import Blob
224
 
    text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
225
 
    blob = Blob()
226
 
    blob._text = text
227
 
    return blob
228
 
 
229
 
 
230
 
def symlink_to_blob(entry):
231
 
    from dulwich.objects import Blob
232
 
    blob = Blob()
233
 
    blob._text = entry.symlink_target
234
 
    return blob
235
 
 
236
 
 
237
 
def mode_is_executable(mode):
238
 
    """Check if mode should be considered executable."""
239
 
    return bool(mode & 0111)
240
 
 
241
 
 
242
 
def mode_kind(mode):
243
 
    """Determine the Bazaar inventory kind based on Unix file mode."""
244
 
    entry_kind = (mode & 0700000) / 0100000
245
 
    if entry_kind == 0:
246
 
        return 'directory'
247
 
    elif entry_kind == 1:
248
 
        file_kind = (mode & 070000) / 010000
249
 
        if file_kind == 0:
250
 
            return 'file'
251
 
        elif file_kind == 2:
252
 
            return 'symlink'
253
 
        elif file_kind == 6:
254
 
            return 'tree-reference'
255
 
        else:
256
 
            raise AssertionError(
257
 
                "Unknown file kind %d, perms=%o." % (file_kind, mode,))
258
 
    else:
259
 
        raise AssertionError(
260
 
            "Unknown kind, perms=%r." % (mode,))
261
 
 
262
 
 
263
 
def entry_mode(entry):
264
 
    """Determine the git file mode for an inventory entry."""
265
 
    if entry.kind == 'directory':
266
 
        return stat.S_IFDIR
267
 
    elif entry.kind == 'symlink':
268
 
        return stat.S_IFLNK
269
 
    elif entry.kind == 'file':
270
 
        mode = stat.S_IFREG | 0644
271
 
        if entry.executable:
272
 
            mode |= 0111
273
 
        return mode
274
 
    else:
275
 
        raise AssertionError
276
 
 
277
 
 
278
 
def directory_to_tree(entry, lookup_ie_sha1, unusual_modes):
279
 
    from dulwich.objects import Tree
280
 
    tree = Tree()
281
 
    for name in sorted(entry.children.keys()):
282
 
        ie = entry.children[name]
283
 
        try:
284
 
            mode = unusual_modes[ie.file_id]
285
 
        except KeyError:
286
 
            mode = entry_mode(ie)
287
 
        tree.add(mode, name.encode("utf-8"), lookup_ie_sha1(ie))
288
 
    tree.serialize()
289
 
    return tree
290
 
 
291
 
 
292
 
def extract_unusual_modes(rev):
293
 
    try:
294
 
        foreign_revid, mapping = mapping_registry.parse_revision_id(rev.revision_id)
295
 
    except errors.InvalidRevisionId:
296
 
        return {}
297
 
    else:
298
 
        return mapping.export_unusual_file_modes(rev)
299
 
 
300
 
 
301
 
def inventory_to_tree_and_blobs(inventory, texts, mapping, unusual_modes, cur=None):
302
 
    """Convert a Bazaar tree to a Git tree.
303
 
 
304
 
    :return: Yields tuples with object sha1, object and path
305
 
    """
306
 
    from dulwich.objects import Tree
307
 
    import stat
308
 
    stack = []
309
 
    if cur is None:
310
 
        cur = ""
311
 
    tree = Tree()
312
 
 
313
 
    # stack contains the set of trees that we haven't 
314
 
    # finished constructing
315
 
    for path, entry in inventory.iter_entries():
316
 
        while stack and not path.startswith(osutils.pathjoin(cur, "")):
317
 
            # We've hit a file that's not a child of the previous path
318
 
            tree.serialize()
319
 
            sha = tree.id
320
 
            yield sha, tree, cur.encode("utf-8")
321
 
            mode = unusual_modes.get(cur.encode("utf-8"), stat.S_IFDIR)
322
 
            t = (mode, urlutils.basename(cur).encode('UTF-8'), sha)
323
 
            cur, tree = stack.pop()
324
 
            tree.add(*t)
325
 
 
326
 
        if entry.kind == "directory":
327
 
            stack.append((cur, tree))
328
 
            cur = path
329
 
            tree = Tree()
330
 
        else:
331
 
            if entry.kind == "file":
332
 
                blob = text_to_blob(texts, entry)
333
 
            elif entry.kind == "symlink":
334
 
                blob = symlink_to_blob(entry)
335
 
            else:
336
 
                raise AssertionError("Unknown kind %s" % entry.kind)
337
 
            sha = blob.id
338
 
            yield sha, blob, path.encode("utf-8")
339
 
            name = urlutils.basename(path).encode("utf-8")
340
 
            mode = unusual_modes.get(path.encode("utf-8"), entry_mode(entry))
341
 
            tree.add(mode, name, sha)
342
 
 
343
 
    while len(stack) > 1:
344
 
        tree.serialize()
345
 
        sha = tree.id
346
 
        yield sha, tree, cur.encode("utf-8")
347
 
        mode = unusual_modes.get(cur.encode('utf-8'), stat.S_IFDIR)
348
 
        t = (mode, urlutils.basename(cur).encode('UTF-8'), sha)
349
 
        cur, tree = stack.pop()
350
 
        tree.add(*t)
351
 
 
352
 
    tree.serialize()
353
 
    yield tree.id, tree, cur.encode("utf-8")
354
 
 
355
 
 
356
 
def revision_to_commit(rev, tree_sha, parent_lookup):
357
 
    """Turn a Bazaar revision in to a Git commit
358
 
 
359
 
    :param tree_sha: Tree sha for the commit
360
 
    :param parent_lookup: Function for looking up the GIT sha equiv of a bzr revision
361
 
    :return dulwich.objects.Commit represent the revision:
362
 
    """
363
 
    from dulwich.objects import Commit
364
 
    commit = Commit()
365
 
    commit.tree = tree_sha
366
 
    for p in rev.parent_ids:
367
 
        git_p = parent_lookup(p)
368
 
        if git_p is not None:
369
 
            assert len(git_p) == 40, "unexpected length for %r" % git_p
370
 
            commit.parents.append(git_p)
371
 
    commit.message = rev.message.encode("utf-8")
372
 
    commit.committer = fix_person_identifier(rev.committer.encode("utf-8"))
373
 
    commit.author = fix_person_identifier(rev.get_apparent_authors()[0].encode("utf-8"))
374
 
    commit.commit_time = long(rev.timestamp)
375
 
    if 'author-timestamp' in rev.properties:
376
 
        commit.author_time = long(rev.properties['author-timestamp'])
377
 
    else:
378
 
        commit.author_time = commit.commit_time
379
 
    commit.commit_timezone = rev.timezone
380
 
    if 'author-timezone' in rev.properties:
381
 
        commit.author_timezone = int(rev.properties['author-timezone'])
382
 
    else:
383
 
        commit.author_timezone = commit.commit_timezone 
384
 
    return commit
 
79
default_mapping = BzrGitMappingExperimental()