/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to mapping.py

Update docs.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007-2008 Canonical Ltd
 
1
# Copyright (C) 2007 Canonical Ltd
 
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
 
3
# Copyright (C) 2008 John Carr
2
4
#
3
5
# This program is free software; you can redistribute it and/or modify
4
6
# it under the terms of the GNU General Public License as published by
16
18
 
17
19
"""Converters, etc for going between Bazaar and Git ids."""
18
20
 
19
 
from bzrlib import errors, foreign, urlutils
20
 
from bzrlib.inventory import ROOT_ID
 
21
import stat
 
22
 
 
23
from bzrlib import (
 
24
    errors,
 
25
    foreign,
 
26
    osutils,
 
27
    urlutils,
 
28
    )
 
29
from bzrlib.inventory import (
 
30
    ROOT_ID,
 
31
    )
21
32
from bzrlib.foreign import (
22
 
        ForeignVcs, 
23
 
        VcsMappingRegistry, 
24
 
        ForeignRevision,
25
 
        )
 
33
    ForeignVcs, 
 
34
    VcsMappingRegistry, 
 
35
    ForeignRevision,
 
36
    )
 
37
from bzrlib.xml_serializer import (
 
38
    escape_invalid_chars,
 
39
    )
 
40
 
 
41
DEFAULT_FILE_MODE = stat.S_IFREG | 0644
 
42
 
26
43
 
27
44
def escape_file_id(file_id):
28
45
    return file_id.replace('_', '__').replace(' ', '_s')
29
46
 
30
47
 
31
48
def unescape_file_id(file_id):
32
 
    return file_id.replace("_s", " ").replace("__", "_")
 
49
    ret = []
 
50
    i = 0
 
51
    while i < len(file_id):
 
52
        if file_id[i] != '_':
 
53
            ret.append(file_id[i])
 
54
        else:
 
55
            if file_id[i+1] == '_':
 
56
                ret.append("_")
 
57
            elif file_id[i+1] == 's':
 
58
                ret.append(" ")
 
59
            else:
 
60
                raise AssertionError("unknown escape character %s" % file_id[i+1])
 
61
            i += 1
 
62
        i += 1
 
63
    return "".join(ret)
 
64
 
 
65
 
 
66
def fix_person_identifier(text):
 
67
    if "<" in text and ">" in text:
 
68
        return text
 
69
    return "%s <%s>" % (text, text)
33
70
 
34
71
 
35
72
class BzrGitMapping(foreign.VcsMapping):
55
92
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
56
93
 
57
94
    def generate_file_id(self, path):
 
95
        # Git paths are just bytestrings
 
96
        # We must just hope they are valid UTF-8..
58
97
        if path == "":
59
98
            return ROOT_ID
60
 
        return escape_file_id(path.encode('utf-8'))
 
99
        return escape_file_id(path)
 
100
 
 
101
    def parse_file_id(self, file_id):
 
102
        if file_id == ROOT_ID:
 
103
            return ""
 
104
        return unescape_file_id(file_id)
61
105
 
62
106
    def import_commit(self, commit):
63
107
        """Convert a git commit to a bzr revision.
68
112
            raise AssertionError("Commit object can't be None")
69
113
        rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
70
114
        rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
71
 
        rev.message = commit.message.decode("utf-8", "replace")
72
 
        rev.committer = str(commit.committer).decode("utf-8", "replace")
 
115
        rev.message = escape_invalid_chars(commit.message.decode("utf-8", "replace"))[0]
 
116
        rev.committer = escape_invalid_chars(str(commit.committer).decode("utf-8", "replace"))[0]
73
117
        if commit.committer != commit.author:
74
 
            rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
 
118
            rev.properties['author'] = escape_invalid_chars(str(commit.author).decode("utf-8", "replace"))[0]
 
119
 
 
120
        if commit.commit_time != commit.author_time:
 
121
            rev.properties['author-timestamp'] = str(commit.author_time)
 
122
        if commit.commit_timezone != commit.author_timezone:
 
123
            rev.properties['author-timezone'] = "%d" % (commit.author_timezone, )
75
124
        rev.timestamp = commit.commit_time
76
 
        rev.timezone = 0
 
125
        rev.timezone = commit.commit_timezone
77
126
        return rev
78
127
 
79
128
 
81
130
    revid_prefix = 'git-v1'
82
131
    experimental = False
83
132
 
 
133
    def __str__(self):
 
134
        return self.revid_prefix
 
135
 
84
136
 
85
137
class BzrGitMappingExperimental(BzrGitMappingv1):
86
138
    revid_prefix = 'git-experimental'
107
159
 
108
160
 
109
161
class ForeignGit(ForeignVcs):
110
 
    """Foreign Git."""
 
162
    """The Git Stupid Content Tracker"""
111
163
 
112
164
    def __init__(self):
113
165
        super(ForeignGit, self).__init__(mapping_registry)
121
173
default_mapping = BzrGitMappingv1()
122
174
 
123
175
 
124
 
def inventory_to_tree_and_blobs(repo, mapping, revision_id):
125
 
    from dulwich.objects import Tree, Blob
126
 
    from bzrlib.inventory import InventoryDirectory, InventoryFile
 
176
def text_to_blob(texts, entry):
 
177
    from dulwich.objects import Blob
 
178
    text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
 
179
    blob = Blob()
 
180
    blob._text = text
 
181
    return blob
 
182
 
 
183
 
 
184
def symlink_to_blob(entry):
 
185
    from dulwich.objects import Blob
 
186
    blob = Blob()
 
187
    blob._text = entry.symlink_target
 
188
    return blob
 
189
 
 
190
 
 
191
def entry_mode(entry):
 
192
    if entry.kind == 'directory':
 
193
        return stat.S_IFDIR
 
194
    elif entry.kind == 'symlink':
 
195
        return stat.S_IFLNK
 
196
    elif entry.kind == 'file':
 
197
        mode = stat.S_IFREG | 0644
 
198
        if entry.executable:
 
199
            mode |= 0111
 
200
        return mode
 
201
    else:
 
202
        raise AssertionError
 
203
 
 
204
 
 
205
def directory_to_tree(entry, lookup_ie_sha1):
 
206
    from dulwich.objects import Tree
 
207
    tree = Tree()
 
208
    for name in sorted(entry.children.keys()):
 
209
        ie = entry.children[name]
 
210
        tree.add(entry_mode(ie), name.encode("utf-8"), lookup_ie_sha1(ie))
 
211
    tree.serialize()
 
212
    return tree
 
213
 
 
214
 
 
215
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
 
216
    """Convert a Bazaar tree to a Git tree.
 
217
 
 
218
    :return: Yields tuples with object sha1, object and path
 
219
    """
 
220
    from dulwich.objects import Tree
127
221
    import stat
128
222
    stack = []
129
 
    cur = ""
 
223
    if cur is None:
 
224
        cur = ""
130
225
    tree = Tree()
131
226
 
132
 
    inv = repo.get_inventory(revision_id)
133
 
 
134
227
    # stack contains the set of trees that we haven't 
135
228
    # finished constructing
136
 
 
137
 
    for path, entry in inv.iter_entries():
138
 
        while stack and not path.startswith(cur):
 
229
    for path, entry in inventory.iter_entries():
 
230
        while stack and not path.startswith(osutils.pathjoin(cur, "")):
 
231
            # We've hit a file that's not a child of the previous path
139
232
            tree.serialize()
140
 
            sha = tree.sha().hexdigest()
141
 
            yield sha, tree, cur
 
233
            sha = tree.id
 
234
            yield sha, tree, cur.encode("utf-8")
142
235
            t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
143
236
            cur, tree = stack.pop()
144
237
            tree.add(*t)
145
238
 
146
 
        if type(entry) == InventoryDirectory:
 
239
        if entry.kind == "directory":
147
240
            stack.append((cur, tree))
148
241
            cur = path
149
242
            tree = Tree()
150
 
 
151
 
        if type(entry) == InventoryFile:
152
 
            #FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff
153
 
            # and having all these objects in memory at once
154
 
            blob = Blob()
155
 
            _, blob._text = repo.iter_files_bytes([(entry.file_id, entry.revision, path)]).next()
156
 
            sha = blob.sha().hexdigest()
157
 
            yield sha, blob, path
158
 
 
 
243
        else:
 
244
            if entry.kind == "file":
 
245
                blob = text_to_blob(texts, entry)
 
246
            elif entry.kind == "symlink":
 
247
                blob = symlink_to_blob(entry)
 
248
            else:
 
249
                raise AssertionError("Unknown kind %s" % entry.kind)
 
250
            sha = blob.id
 
251
            yield sha, blob, path.encode("utf-8")
159
252
            name = urlutils.basename(path).encode("utf-8")
160
 
            mode = stat.S_IFREG | 0644
161
 
            if entry.executable:
162
 
                mode |= 0111
163
 
            tree.add(mode, name, sha)
 
253
            tree.add(entry_mode(entry), name, sha)
164
254
 
165
255
    while len(stack) > 1:
166
256
        tree.serialize()
167
 
        sha = tree.sha().hexdigest()
168
 
        yield sha, tree, cur
 
257
        sha = tree.id
 
258
        yield sha, tree, cur.encode("utf-8")
169
259
        t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
170
260
        cur, tree = stack.pop()
171
261
        tree.add(*t)
172
262
 
173
263
    tree.serialize()
174
 
    yield tree.sha().hexdigest(), tree, cur
 
264
    yield tree.id, tree, cur.encode("utf-8")
175
265
 
176
266
 
177
267
def revision_to_commit(rev, tree_sha, parent_lookup):
183
273
    """
184
274
    from dulwich.objects import Commit
185
275
    commit = Commit()
186
 
    commit._tree = tree_sha
 
276
    commit.tree = tree_sha
187
277
    for p in rev.parent_ids:
188
278
        git_p = parent_lookup(p)
189
279
        if git_p is not None:
190
 
            commit._parents.append(git_p)
191
 
    commit._message = rev.message.encode("utf-8")
192
 
    commit._committer = rev.committer.encode("utf-8")
193
 
    commit._author = rev.get_apparent_author().encode("utf-8")
194
 
    commit._commit_time = long(rev.timestamp)
195
 
    commit.serialize()
 
280
            assert len(git_p) == 40, "unexpected length for %r" % git_p
 
281
            commit.parents.append(git_p)
 
282
    commit.message = rev.message.encode("utf-8")
 
283
    commit.committer = fix_person_identifier(rev.committer.encode("utf-8"))
 
284
    commit.author = fix_person_identifier(rev.get_apparent_authors()[0].encode("utf-8"))
 
285
    commit.commit_time = long(rev.timestamp)
 
286
    if 'author-timestamp' in rev.properties:
 
287
        commit.author_time = long(rev.properties['author-timestamp'])
 
288
    else:
 
289
        commit.author_time = commit.commit_time
 
290
    commit.commit_timezone = rev.timezone
 
291
    if 'author-timezone' in rev.properties:
 
292
        commit.author_timezone = int(rev.properties['author-timezone'])
 
293
    else:
 
294
        commit.author_timezone = commit.commit_timezone 
196
295
    return commit