/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to mapping.py

Share sha map cache connections inside threads.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007-2008 Canonical Ltd
 
1
# Copyright (C) 2007 Canonical Ltd
 
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
 
3
# Copyright (C) 2008 John Carr
2
4
#
3
5
# This program is free software; you can redistribute it and/or modify
4
6
# it under the terms of the GNU General Public License as published by
16
18
 
17
19
"""Converters, etc for going between Bazaar and Git ids."""
18
20
 
19
 
from bzrlib import errors, foreign, urlutils
20
 
from bzrlib.inventory import ROOT_ID
 
21
import stat
 
22
 
 
23
from bzrlib import (
 
24
    errors,
 
25
    foreign,
 
26
    osutils,
 
27
    urlutils,
 
28
    )
 
29
from bzrlib.inventory import (
 
30
    ROOT_ID,
 
31
    )
21
32
from bzrlib.foreign import (
22
 
        ForeignVcs, 
23
 
        VcsMappingRegistry, 
24
 
        ForeignRevision,
25
 
        )
 
33
    ForeignVcs, 
 
34
    VcsMappingRegistry, 
 
35
    ForeignRevision,
 
36
    )
 
37
from bzrlib.xml_serializer import (
 
38
    escape_invalid_chars,
 
39
    )
 
40
 
 
41
DEFAULT_FILE_MODE = stat.S_IFREG | 0644
 
42
 
26
43
 
27
44
def escape_file_id(file_id):
28
45
    return file_id.replace('_', '__').replace(' ', '_s')
55
72
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
56
73
 
57
74
    def generate_file_id(self, path):
 
75
        # Git paths are just bytestrings
 
76
        # We must just hope they are valid UTF-8..
 
77
        assert isinstance(path, str)
58
78
        if path == "":
59
79
            return ROOT_ID
60
 
        return escape_file_id(path.encode('utf-8'))
 
80
        return escape_file_id(path)
 
81
 
 
82
    def parse_file_id(self, file_id):
 
83
        if file_id == ROOT_ID:
 
84
            return ""
 
85
        return unescape_file_id(file_id)
61
86
 
62
87
    def import_commit(self, commit):
63
88
        """Convert a git commit to a bzr revision.
68
93
            raise AssertionError("Commit object can't be None")
69
94
        rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
70
95
        rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
71
 
        rev.message = commit.message.decode("utf-8", "replace")
72
 
        rev.committer = str(commit.committer).decode("utf-8", "replace")
 
96
        rev.message = escape_invalid_chars(commit.message.decode("utf-8", "replace"))[0]
 
97
        rev.committer = escape_invalid_chars(str(commit.committer).decode("utf-8", "replace"))[0]
73
98
        if commit.committer != commit.author:
74
 
            rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
 
99
            rev.properties['author'] = escape_invalid_chars(str(commit.author).decode("utf-8", "replace"))[0]
 
100
 
 
101
        if commit.commit_time != commit.author_time:
 
102
            rev.properties['author-timestamp'] = str(commit.author_time)
 
103
        if commit.commit_timezone != commit.author_timezone:
 
104
            rev.properties['author-timezone'] = str(commit.author_timezone)
75
105
        rev.timestamp = commit.commit_time
76
 
        rev.timezone = 0
 
106
        rev.timezone = commit.commit_timezone
77
107
        return rev
78
108
 
79
109
 
121
151
default_mapping = BzrGitMappingv1()
122
152
 
123
153
 
124
 
def inventory_to_tree_and_blobs(repo, mapping, revision_id):
125
 
    from dulwich.objects import Tree, Blob
126
 
    from bzrlib.inventory import InventoryDirectory, InventoryFile
 
154
def text_to_blob(texts, entry):
 
155
    from dulwich.objects import Blob
 
156
    text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
 
157
    blob = Blob()
 
158
    blob._text = text
 
159
    return blob
 
160
 
 
161
 
 
162
def symlink_to_blob(entry):
 
163
    from dulwich.objects import Blob
 
164
    blob = Blob()
 
165
    blob._text = entry.symlink_target
 
166
    return blob
 
167
 
 
168
 
 
169
def entry_mode(entry):
 
170
    if entry.kind == 'directory':
 
171
        return stat.S_IFDIR
 
172
    elif entry.kind == 'symlink':
 
173
        return stat.S_IFLNK
 
174
    elif entry.kind == 'file':
 
175
        mode = stat.S_IFREG | 0644
 
176
        if entry.executable:
 
177
            mode |= 0111
 
178
        return mode
 
179
    else:
 
180
        raise AssertionError
 
181
 
 
182
 
 
183
def directory_to_tree(entry, lookup_ie_sha1):
 
184
    from dulwich.objects import Tree
 
185
    tree = Tree()
 
186
    for name in sorted(entry.children.keys()):
 
187
        ie = entry.children[name]
 
188
        tree.add(entry_mode(ie), name.encode("utf-8"), lookup_ie_sha1(ie))
 
189
    tree.serialize()
 
190
    return tree
 
191
 
 
192
 
 
193
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
 
194
    """Convert a Bazaar tree to a Git tree.
 
195
 
 
196
    :return: Yields tuples with object sha1, object and path
 
197
    """
 
198
    from dulwich.objects import Tree
127
199
    import stat
128
200
    stack = []
129
 
    cur = ""
 
201
    if cur is None:
 
202
        cur = ""
130
203
    tree = Tree()
131
204
 
132
 
    inv = repo.get_inventory(revision_id)
133
 
 
134
205
    # stack contains the set of trees that we haven't 
135
206
    # finished constructing
136
 
 
137
 
    for path, entry in inv.iter_entries():
138
 
        while stack and not path.startswith(cur):
 
207
    for path, entry in inventory.iter_entries():
 
208
        while stack and not path.startswith(osutils.pathjoin(cur, "")):
 
209
            # We've hit a file that's not a child of the previous path
139
210
            tree.serialize()
140
 
            sha = tree.sha().hexdigest()
141
 
            yield sha, tree, cur
 
211
            sha = tree.id
 
212
            yield sha, tree, cur.encode("utf-8")
142
213
            t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
143
214
            cur, tree = stack.pop()
144
215
            tree.add(*t)
145
216
 
146
 
        if type(entry) == InventoryDirectory:
 
217
        if entry.kind == "directory":
147
218
            stack.append((cur, tree))
148
219
            cur = path
149
220
            tree = Tree()
150
 
 
151
 
        if type(entry) == InventoryFile:
152
 
            #FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff
153
 
            # and having all these objects in memory at once
154
 
            blob = Blob()
155
 
            _, blob._text = repo.iter_files_bytes([(entry.file_id, entry.revision, path)]).next()
156
 
            sha = blob.sha().hexdigest()
157
 
            yield sha, blob, path
158
 
 
 
221
        else:
 
222
            if entry.kind == "file":
 
223
                blob = text_to_blob(texts, entry)
 
224
            elif entry.kind == "symlink":
 
225
                blob = symlink_to_blob(entry)
 
226
            else:
 
227
                raise AssertionError("Unknown kind %s" % entry.kind)
 
228
            sha = blob.id
 
229
            yield sha, blob, path.encode("utf-8")
159
230
            name = urlutils.basename(path).encode("utf-8")
160
 
            mode = stat.S_IFREG | 0644
161
 
            if entry.executable:
162
 
                mode |= 0111
163
 
            tree.add(mode, name, sha)
 
231
            tree.add(entry_mode(entry), name, sha)
164
232
 
165
233
    while len(stack) > 1:
166
234
        tree.serialize()
167
 
        sha = tree.sha().hexdigest()
168
 
        yield sha, tree, cur
 
235
        sha = tree.id
 
236
        yield sha, tree, cur.encode("utf-8")
169
237
        t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
170
238
        cur, tree = stack.pop()
171
239
        tree.add(*t)
172
240
 
173
241
    tree.serialize()
174
 
    yield tree.sha().hexdigest(), tree, cur
 
242
    yield tree.id, tree, cur.encode("utf-8")
175
243
 
176
244
 
177
245
def revision_to_commit(rev, tree_sha, parent_lookup):
187
255
    for p in rev.parent_ids:
188
256
        git_p = parent_lookup(p)
189
257
        if git_p is not None:
 
258
            assert len(git_p) == 40, "unexpected length for %r" % git_p
190
259
            commit._parents.append(git_p)
191
260
    commit._message = rev.message.encode("utf-8")
192
261
    commit._committer = rev.committer.encode("utf-8")
193
 
    commit._author = rev.get_apparent_author().encode("utf-8")
 
262
    commit._author = rev.get_apparent_authors()[0].encode("utf-8")
194
263
    commit._commit_time = long(rev.timestamp)
 
264
    if 'author-timestamp' in rev.properties:
 
265
        commit._author_time = long(rev.properties['author-timestamp'])
 
266
    else:
 
267
        commit._author_time = commit._commit_time
 
268
    commit._commit_timezone = rev.timezone
 
269
    if 'author-timezone' in rev.properties:
 
270
        commit._author_timezone = int(rev.properties['author-timezone'])
 
271
    else:
 
272
        commit._author_timezone = commit._commit_timezone
195
273
    commit.serialize()
196
274
    return commit