/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to mapping.py

  • Committer: Jelmer Vernooij
  • Author(s): Roland Mas
  • Date: 2009-05-13 20:12:06 UTC
  • mto: (0.312.1 master) (6883.23.1 bundle-git)
  • mto: This revision was merged to the branch mainline in revision 6960.
  • Revision ID: jelmer@samba.org-20090513201206-iduvuqr0mxhze7al
Fix missing import.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007-2008 Canonical Ltd
 
1
# Copyright (C) 2007 Canonical Ltd
 
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
 
3
# Copyright (C) 2008 John Carr
2
4
#
3
5
# This program is free software; you can redistribute it and/or modify
4
6
# it under the terms of the GNU General Public License as published by
16
18
 
17
19
"""Converters, etc for going between Bazaar and Git ids."""
18
20
 
19
 
from bzrlib import errors, foreign, urlutils
20
 
from bzrlib.inventory import ROOT_ID
 
21
import stat
 
22
 
 
23
from bzrlib import (
 
24
    errors,
 
25
    foreign,
 
26
    osutils,
 
27
    trace,
 
28
    urlutils,
 
29
    )
 
30
from bzrlib.inventory import (
 
31
    ROOT_ID,
 
32
    )
21
33
from bzrlib.foreign import (
22
 
        ForeignVcs, 
23
 
        VcsMappingRegistry, 
24
 
        ForeignRevision,
25
 
        )
 
34
    ForeignVcs, 
 
35
    VcsMappingRegistry, 
 
36
    ForeignRevision,
 
37
    )
 
38
from bzrlib.xml_serializer import (
 
39
    escape_invalid_chars,
 
40
    )
 
41
 
 
42
DEFAULT_FILE_MODE = stat.S_IFREG | 0644
 
43
 
26
44
 
27
45
def escape_file_id(file_id):
28
46
    return file_id.replace('_', '__').replace(' ', '_s')
29
47
 
30
48
 
31
49
def unescape_file_id(file_id):
32
 
    return file_id.replace("_s", " ").replace("__", "_")
 
50
    ret = []
 
51
    i = 0
 
52
    while i < len(file_id):
 
53
        if file_id[i] != '_':
 
54
            ret.append(file_id[i])
 
55
        else:
 
56
            if file_id[i+1] == '_':
 
57
                ret.append("_")
 
58
            elif file_id[i+1] == 's':
 
59
                ret.append(" ")
 
60
            else:
 
61
                raise AssertionError("unknown escape character %s" % file_id[i+1])
 
62
            i += 1
 
63
        i += 1
 
64
    return "".join(ret)
 
65
 
 
66
 
 
67
def fix_person_identifier(text):
 
68
    if "<" in text and ">" in text:
 
69
        return text
 
70
    return "%s <%s>" % (text, text)
 
71
 
 
72
 
 
73
def warn_escaped(commit, num_escaped):
 
74
    trace.warning("Escaped %d XML-invalid characters in %s. Will be unable "
 
75
                  "to regenerate the SHA map.", num_escaped, commit)
 
76
 
 
77
 
 
78
def warn_unusual_mode(commit, path, mode):
 
79
    trace.warning("Unusual file mode %o for %s in %s. Will be unable to "
 
80
                  "regenerate the SHA map.", mode, path, commit)
33
81
 
34
82
 
35
83
class BzrGitMapping(foreign.VcsMapping):
55
103
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
56
104
 
57
105
    def generate_file_id(self, path):
 
106
        # Git paths are just bytestrings
 
107
        # We must just hope they are valid UTF-8..
58
108
        if path == "":
59
109
            return ROOT_ID
60
 
        return escape_file_id(path.encode('utf-8'))
 
110
        return escape_file_id(path)
 
111
 
 
112
    def parse_file_id(self, file_id):
 
113
        if file_id == ROOT_ID:
 
114
            return ""
 
115
        return unescape_file_id(file_id)
61
116
 
62
117
    def import_commit(self, commit):
63
118
        """Convert a git commit to a bzr revision.
68
123
            raise AssertionError("Commit object can't be None")
69
124
        rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
70
125
        rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
71
 
        rev.message = commit.message.decode("utf-8", "replace")
72
 
        rev.committer = str(commit.committer).decode("utf-8", "replace")
 
126
        rev.message, num_escaped = escape_invalid_chars(commit.message.decode("utf-8", "replace"))
 
127
        if num_escaped:
 
128
            warn_escaped(commit.id, num_escaped)
 
129
        rev.committer, num_escaped = escape_invalid_chars(str(commit.committer).decode("utf-8", "replace"))
 
130
        if num_escaped:
 
131
            warn_escaped(commit.id, num_escaped)
73
132
        if commit.committer != commit.author:
74
 
            rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
 
133
            rev.properties['author'], num_escaped = escape_invalid_chars(str(commit.author).decode("utf-8", "replace"))
 
134
            if num_escaped:
 
135
                warn_escaped(commit.id, num_escaped)
 
136
 
 
137
        if commit.commit_time != commit.author_time:
 
138
            rev.properties['author-timestamp'] = str(commit.author_time)
 
139
        if commit.commit_timezone != commit.author_timezone:
 
140
            rev.properties['author-timezone'] = "%d" % (commit.author_timezone, )
75
141
        rev.timestamp = commit.commit_time
76
 
        rev.timezone = 0
 
142
        rev.timezone = commit.commit_timezone
77
143
        return rev
78
144
 
79
145
 
81
147
    revid_prefix = 'git-v1'
82
148
    experimental = False
83
149
 
 
150
    def __str__(self):
 
151
        return self.revid_prefix
 
152
 
84
153
 
85
154
class BzrGitMappingExperimental(BzrGitMappingv1):
86
155
    revid_prefix = 'git-experimental'
107
176
 
108
177
 
109
178
class ForeignGit(ForeignVcs):
110
 
    """Foreign Git."""
 
179
    """The Git Stupid Content Tracker"""
111
180
 
112
181
    def __init__(self):
113
182
        super(ForeignGit, self).__init__(mapping_registry)
121
190
default_mapping = BzrGitMappingv1()
122
191
 
123
192
 
124
 
def inventory_to_tree_and_blobs(repo, mapping, revision_id):
125
 
    from dulwich.objects import Tree, Blob
126
 
    from bzrlib.inventory import InventoryDirectory, InventoryFile
 
193
def text_to_blob(texts, entry):
 
194
    from dulwich.objects import Blob
 
195
    text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
 
196
    blob = Blob()
 
197
    blob._text = text
 
198
    return blob
 
199
 
 
200
 
 
201
def symlink_to_blob(entry):
 
202
    from dulwich.objects import Blob
 
203
    blob = Blob()
 
204
    blob._text = entry.symlink_target
 
205
    return blob
 
206
 
 
207
 
 
208
def entry_mode(entry):
 
209
    if entry.kind == 'directory':
 
210
        return stat.S_IFDIR
 
211
    elif entry.kind == 'symlink':
 
212
        return stat.S_IFLNK
 
213
    elif entry.kind == 'file':
 
214
        mode = stat.S_IFREG | 0644
 
215
        if entry.executable:
 
216
            mode |= 0111
 
217
        return mode
 
218
    else:
 
219
        raise AssertionError
 
220
 
 
221
 
 
222
def directory_to_tree(entry, lookup_ie_sha1):
 
223
    from dulwich.objects import Tree
 
224
    tree = Tree()
 
225
    for name in sorted(entry.children.keys()):
 
226
        ie = entry.children[name]
 
227
        tree.add(entry_mode(ie), name.encode("utf-8"), lookup_ie_sha1(ie))
 
228
    tree.serialize()
 
229
    return tree
 
230
 
 
231
 
 
232
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
 
233
    """Convert a Bazaar tree to a Git tree.
 
234
 
 
235
    :return: Yields tuples with object sha1, object and path
 
236
    """
 
237
    from dulwich.objects import Tree
127
238
    import stat
128
239
    stack = []
129
 
    cur = ""
 
240
    if cur is None:
 
241
        cur = ""
130
242
    tree = Tree()
131
243
 
132
 
    inv = repo.get_inventory(revision_id)
133
 
 
134
244
    # stack contains the set of trees that we haven't 
135
245
    # finished constructing
136
 
 
137
 
    for path, entry in inv.iter_entries():
138
 
        while stack and not path.startswith(cur):
 
246
    for path, entry in inventory.iter_entries():
 
247
        while stack and not path.startswith(osutils.pathjoin(cur, "")):
 
248
            # We've hit a file that's not a child of the previous path
139
249
            tree.serialize()
140
 
            sha = tree.sha().hexdigest()
141
 
            yield sha, tree, cur
 
250
            sha = tree.id
 
251
            yield sha, tree, cur.encode("utf-8")
142
252
            t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
143
253
            cur, tree = stack.pop()
144
254
            tree.add(*t)
145
255
 
146
 
        if type(entry) == InventoryDirectory:
 
256
        if entry.kind == "directory":
147
257
            stack.append((cur, tree))
148
258
            cur = path
149
259
            tree = Tree()
150
 
 
151
 
        if type(entry) == InventoryFile:
152
 
            #FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff
153
 
            # and having all these objects in memory at once
154
 
            blob = Blob()
155
 
            _, blob._text = repo.iter_files_bytes([(entry.file_id, entry.revision, path)]).next()
156
 
            sha = blob.sha().hexdigest()
157
 
            yield sha, blob, path
158
 
 
 
260
        else:
 
261
            if entry.kind == "file":
 
262
                blob = text_to_blob(texts, entry)
 
263
            elif entry.kind == "symlink":
 
264
                blob = symlink_to_blob(entry)
 
265
            else:
 
266
                raise AssertionError("Unknown kind %s" % entry.kind)
 
267
            sha = blob.id
 
268
            yield sha, blob, path.encode("utf-8")
159
269
            name = urlutils.basename(path).encode("utf-8")
160
 
            mode = stat.S_IFREG | 0644
161
 
            if entry.executable:
162
 
                mode |= 0111
163
 
            tree.add(mode, name, sha)
 
270
            tree.add(entry_mode(entry), name, sha)
164
271
 
165
272
    while len(stack) > 1:
166
273
        tree.serialize()
167
 
        sha = tree.sha().hexdigest()
168
 
        yield sha, tree, cur
 
274
        sha = tree.id
 
275
        yield sha, tree, cur.encode("utf-8")
169
276
        t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
170
277
        cur, tree = stack.pop()
171
278
        tree.add(*t)
172
279
 
173
280
    tree.serialize()
174
 
    yield tree.sha().hexdigest(), tree, cur
 
281
    yield tree.id, tree, cur.encode("utf-8")
175
282
 
176
283
 
177
284
def revision_to_commit(rev, tree_sha, parent_lookup):
183
290
    """
184
291
    from dulwich.objects import Commit
185
292
    commit = Commit()
186
 
    commit._tree = tree_sha
 
293
    commit.tree = tree_sha
187
294
    for p in rev.parent_ids:
188
295
        git_p = parent_lookup(p)
189
296
        if git_p is not None:
190
 
            commit._parents.append(git_p)
191
 
    commit._message = rev.message.encode("utf-8")
192
 
    commit._committer = rev.committer.encode("utf-8")
193
 
    commit._author = rev.get_apparent_author().encode("utf-8")
194
 
    commit._commit_time = long(rev.timestamp)
195
 
    commit.serialize()
 
297
            assert len(git_p) == 40, "unexpected length for %r" % git_p
 
298
            commit.parents.append(git_p)
 
299
    commit.message = rev.message.encode("utf-8")
 
300
    commit.committer = fix_person_identifier(rev.committer.encode("utf-8"))
 
301
    commit.author = fix_person_identifier(rev.get_apparent_authors()[0].encode("utf-8"))
 
302
    commit.commit_time = long(rev.timestamp)
 
303
    if 'author-timestamp' in rev.properties:
 
304
        commit.author_time = long(rev.properties['author-timestamp'])
 
305
    else:
 
306
        commit.author_time = commit.commit_time
 
307
    commit.commit_timezone = rev.timezone
 
308
    if 'author-timezone' in rev.properties:
 
309
        commit.author_timezone = int(rev.properties['author-timezone'])
 
310
    else:
 
311
        commit.author_timezone = commit.commit_timezone 
196
312
    return commit