/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to mapping.py

Allow paranoia checking with -Dverify.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007-2008 Canonical Ltd
 
1
# Copyright (C) 2007 Canonical Ltd
 
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
 
3
# Copyright (C) 2008 John Carr
2
4
#
3
5
# This program is free software; you can redistribute it and/or modify
4
6
# it under the terms of the GNU General Public License as published by
16
18
 
17
19
"""Converters, etc for going between Bazaar and Git ids."""
18
20
 
19
 
from bzrlib import errors, foreign, urlutils
20
 
from bzrlib.inventory import ROOT_ID
 
21
from bzrlib import (
 
22
    errors,
 
23
    foreign,
 
24
    urlutils,
 
25
    )
 
26
from bzrlib.inventory import (
 
27
    ROOT_ID,
 
28
    )
21
29
from bzrlib.foreign import (
22
 
        ForeignVcs, 
23
 
        VcsMappingRegistry, 
24
 
        ForeignRevision,
25
 
        )
 
30
    ForeignVcs, 
 
31
    VcsMappingRegistry, 
 
32
    ForeignRevision,
 
33
    )
 
34
from bzrlib.xml_serializer import (
 
35
    escape_invalid_chars,
 
36
    )
 
37
 
 
38
DEFAULT_TREE_MODE = 0040000
 
39
DEFAULT_FILE_MODE = 0100644
 
40
DEFAULT_SYMLINK_MODE = 0120000
 
41
 
26
42
 
27
43
def escape_file_id(file_id):
28
44
    return file_id.replace('_', '__').replace(' ', '_s')
55
71
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
56
72
 
57
73
    def generate_file_id(self, path):
 
74
        # Git paths are just bytestrings
 
75
        # We must just hope they are valid UTF-8..
 
76
        assert isinstance(path, str)
58
77
        if path == "":
59
78
            return ROOT_ID
60
 
        return escape_file_id(path.encode('utf-8'))
 
79
        return escape_file_id(path)
 
80
 
 
81
    def parse_file_id(self, file_id):
 
82
        if file_id == ROOT_ID:
 
83
            return ""
 
84
        return unescape_file_id(file_id)
61
85
 
62
86
    def import_commit(self, commit):
63
87
        """Convert a git commit to a bzr revision.
68
92
            raise AssertionError("Commit object can't be None")
69
93
        rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
70
94
        rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
71
 
        rev.message = commit.message.decode("utf-8", "replace")
72
 
        rev.committer = str(commit.committer).decode("utf-8", "replace")
 
95
        rev.message = escape_invalid_chars(commit.message.decode("utf-8", "replace"))[0]
 
96
        rev.committer = escape_invalid_chars(str(commit.committer).decode("utf-8", "replace"))[0]
73
97
        if commit.committer != commit.author:
74
 
            rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
 
98
            rev.properties['author'] = escape_invalid_chars(str(commit.author).decode("utf-8", "replace"))[0]
 
99
 
 
100
        if commit.commit_time != commit.author_time:
 
101
            rev.properties['author-timestamp'] = str(commit.author_time)
75
102
        rev.timestamp = commit.commit_time
76
103
        rev.timezone = 0
77
104
        return rev
121
148
default_mapping = BzrGitMappingv1()
122
149
 
123
150
 
124
 
def inventory_to_tree_and_blobs(repo, mapping, revision_id):
125
 
    from dulwich.objects import Tree, Blob
 
151
def text_to_blob(text):
 
152
    from dulwich.objects import Blob
 
153
    blob = Blob()
 
154
    blob._text = text
 
155
    return blob
 
156
 
 
157
 
 
158
def symlink_to_blob(entry):
 
159
    from dulwich.objects import Blob
 
160
    blob = Blob()
 
161
    blob._text = entry.symlink_target
 
162
    return blob
 
163
 
 
164
 
 
165
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
 
166
    """Convert a Bazaar tree to a Git tree.
 
167
 
 
168
    :return: Yields tuples with object sha1, object and path
 
169
    """
 
170
    from dulwich.objects import Tree
126
171
    from bzrlib.inventory import InventoryDirectory, InventoryFile
127
172
    import stat
128
173
    stack = []
129
 
    cur = ""
 
174
    if cur is None:
 
175
        cur = ""
130
176
    tree = Tree()
131
177
 
132
 
    inv = repo.get_inventory(revision_id)
133
 
 
134
178
    # stack contains the set of trees that we haven't 
135
179
    # finished constructing
136
 
 
137
 
    for path, entry in inv.iter_entries():
 
180
    for path, entry in inventory.iter_entries():
138
181
        while stack and not path.startswith(cur):
139
182
            tree.serialize()
140
 
            sha = tree.sha().hexdigest()
 
183
            sha = tree.id
141
184
            yield sha, tree, cur
142
185
            t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
143
186
            cur, tree = stack.pop()
144
187
            tree.add(*t)
145
188
 
146
 
        if type(entry) == InventoryDirectory:
 
189
        if entry.kind == "directory":
147
190
            stack.append((cur, tree))
148
191
            cur = path
149
192
            tree = Tree()
150
 
 
151
 
        if type(entry) == InventoryFile:
 
193
        elif entry.kind == "file":
152
194
            #FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff
153
195
            # and having all these objects in memory at once
154
 
            blob = Blob()
155
 
            _, blob._text = repo.iter_files_bytes([(entry.file_id, entry.revision, path)]).next()
156
 
            sha = blob.sha().hexdigest()
 
196
            text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
 
197
            blob = text_to_blob(text)
 
198
            sha = blob.id
157
199
            yield sha, blob, path
158
200
 
159
201
            name = urlutils.basename(path).encode("utf-8")
161
203
            if entry.executable:
162
204
                mode |= 0111
163
205
            tree.add(mode, name, sha)
 
206
        elif entry.kind == "symlink":
 
207
            blob = symlink_to_blob(entry)
 
208
            sha = blob.id
 
209
            yield sha, blob, path
 
210
            name = urlutils.basename(path).encode("utf-8")
 
211
            tree.add(stat.S_IFLNK, name, sha)
 
212
        else:
 
213
            raise AssertionError("Unknown kind %s" % entry.kind)
164
214
 
165
215
    while len(stack) > 1:
166
216
        tree.serialize()
167
 
        sha = tree.sha().hexdigest()
 
217
        sha = tree.id
168
218
        yield sha, tree, cur
169
219
        t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
170
220
        cur, tree = stack.pop()
171
221
        tree.add(*t)
172
222
 
173
223
    tree.serialize()
174
 
    yield tree.sha().hexdigest(), tree, cur
 
224
    yield tree.id, tree, cur
175
225
 
176
226
 
177
227
def revision_to_commit(rev, tree_sha, parent_lookup):
187
237
    for p in rev.parent_ids:
188
238
        git_p = parent_lookup(p)
189
239
        if git_p is not None:
 
240
            assert len(git_p) == 40, "unexpected length for %r" % git_p
190
241
            commit._parents.append(git_p)
191
242
    commit._message = rev.message.encode("utf-8")
192
243
    commit._committer = rev.committer.encode("utf-8")
193
 
    commit._author = rev.get_apparent_author().encode("utf-8")
 
244
    commit._author = rev.get_apparent_authors()[0].encode("utf-8")
194
245
    commit._commit_time = long(rev.timestamp)
 
246
    if 'author-timestamp' in rev.properties:
 
247
        commit._author_time = long(rev.properties['author-timestamp'])
 
248
    else:
 
249
        commit._author_time = commit._commit_time
195
250
    commit.serialize()
196
251
    return commit