/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to mapping.py

Cope with removed files.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007-2008 Canonical Ltd
 
1
# Copyright (C) 2007 Canonical Ltd
 
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
 
3
# Copyright (C) 2008 John Carr
2
4
#
3
5
# This program is free software; you can redistribute it and/or modify
4
6
# it under the terms of the GNU General Public License as published by
16
18
 
17
19
"""Converters, etc for going between Bazaar and Git ids."""
18
20
 
19
 
from bzrlib import errors, foreign, urlutils
20
 
from bzrlib.inventory import ROOT_ID
 
21
import stat
 
22
 
 
23
from bzrlib import (
 
24
    errors,
 
25
    foreign,
 
26
    osutils,
 
27
    urlutils,
 
28
    )
 
29
from bzrlib.inventory import (
 
30
    ROOT_ID,
 
31
    )
21
32
from bzrlib.foreign import (
22
 
        ForeignVcs, 
23
 
        VcsMappingRegistry, 
24
 
        ForeignRevision,
25
 
        )
 
33
    ForeignVcs, 
 
34
    VcsMappingRegistry, 
 
35
    ForeignRevision,
 
36
    )
 
37
from bzrlib.xml_serializer import (
 
38
    escape_invalid_chars,
 
39
    )
 
40
 
 
41
DEFAULT_FILE_MODE = stat.S_IFREG | 0644
 
42
 
26
43
 
27
44
def escape_file_id(file_id):
28
45
    return file_id.replace('_', '__').replace(' ', '_s')
32
49
    return file_id.replace("_s", " ").replace("__", "_")
33
50
 
34
51
 
 
52
def fix_person_identifier(text):
 
53
    if "<" in text and ">" in text:
 
54
        return text
 
55
    return "%s <%s>" % (text, text)
 
56
 
 
57
 
35
58
class BzrGitMapping(foreign.VcsMapping):
36
59
    """Class that maps between Git and Bazaar semantics."""
37
60
    experimental = False
55
78
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
56
79
 
57
80
    def generate_file_id(self, path):
 
81
        # Git paths are just bytestrings
 
82
        # We must just hope they are valid UTF-8..
 
83
        assert isinstance(path, str)
58
84
        if path == "":
59
85
            return ROOT_ID
60
 
        return escape_file_id(path.encode('utf-8'))
 
86
        return escape_file_id(path)
 
87
 
 
88
    def parse_file_id(self, file_id):
 
89
        if file_id == ROOT_ID:
 
90
            return ""
 
91
        return unescape_file_id(file_id)
61
92
 
62
93
    def import_commit(self, commit):
63
94
        """Convert a git commit to a bzr revision.
68
99
            raise AssertionError("Commit object can't be None")
69
100
        rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
70
101
        rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
71
 
        rev.message = commit.message.decode("utf-8", "replace")
72
 
        rev.committer = str(commit.committer).decode("utf-8", "replace")
 
102
        rev.message = escape_invalid_chars(commit.message.decode("utf-8", "replace"))[0]
 
103
        rev.committer = escape_invalid_chars(str(commit.committer).decode("utf-8", "replace"))[0]
73
104
        if commit.committer != commit.author:
74
 
            rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
 
105
            rev.properties['author'] = escape_invalid_chars(str(commit.author).decode("utf-8", "replace"))[0]
 
106
 
 
107
        if commit.commit_time != commit.author_time:
 
108
            rev.properties['author-timestamp'] = str(commit.author_time)
 
109
        if commit.commit_timezone != commit.author_timezone:
 
110
            rev.properties['author-timezone'] = "%f" % (commit.author_timezone * .6)
75
111
        rev.timestamp = commit.commit_time
76
 
        rev.timezone = 0
 
112
        rev.timezone = int(commit.commit_timezone * .6)
 
113
        if rev.timezone / .6 != commit.commit_timezone:
 
114
            rev.properties['commit-timezone'] = "%f" % (commit.commit_timezone * .6)
77
115
        return rev
78
116
 
79
117
 
121
159
default_mapping = BzrGitMappingv1()
122
160
 
123
161
 
124
 
def inventory_to_tree_and_blobs(repo, mapping, revision_id):
125
 
    from dulwich.objects import Tree, Blob
126
 
    from bzrlib.inventory import InventoryDirectory, InventoryFile
 
162
def text_to_blob(texts, entry):
 
163
    from dulwich.objects import Blob
 
164
    text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
 
165
    blob = Blob()
 
166
    blob._text = text
 
167
    return blob
 
168
 
 
169
 
 
170
def symlink_to_blob(entry):
 
171
    from dulwich.objects import Blob
 
172
    blob = Blob()
 
173
    blob._text = entry.symlink_target
 
174
    return blob
 
175
 
 
176
 
 
177
def entry_mode(entry):
 
178
    if entry.kind == 'directory':
 
179
        return stat.S_IFDIR
 
180
    elif entry.kind == 'symlink':
 
181
        return stat.S_IFLNK
 
182
    elif entry.kind == 'file':
 
183
        mode = stat.S_IFREG | 0644
 
184
        if entry.executable:
 
185
            mode |= 0111
 
186
        return mode
 
187
    else:
 
188
        raise AssertionError
 
189
 
 
190
 
 
191
def directory_to_tree(entry, lookup_ie_sha1):
 
192
    from dulwich.objects import Tree
 
193
    tree = Tree()
 
194
    for name in sorted(entry.children.keys()):
 
195
        ie = entry.children[name]
 
196
        tree.add(entry_mode(ie), name.encode("utf-8"), lookup_ie_sha1(ie))
 
197
    tree.serialize()
 
198
    return tree
 
199
 
 
200
 
 
201
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
 
202
    """Convert a Bazaar tree to a Git tree.
 
203
 
 
204
    :return: Yields tuples with object sha1, object and path
 
205
    """
 
206
    from dulwich.objects import Tree
127
207
    import stat
128
208
    stack = []
129
 
    cur = ""
 
209
    if cur is None:
 
210
        cur = ""
130
211
    tree = Tree()
131
212
 
132
 
    inv = repo.get_inventory(revision_id)
133
 
 
134
213
    # stack contains the set of trees that we haven't 
135
214
    # finished constructing
136
 
 
137
 
    for path, entry in inv.iter_entries():
138
 
        while stack and not path.startswith(cur):
 
215
    for path, entry in inventory.iter_entries():
 
216
        while stack and not path.startswith(osutils.pathjoin(cur, "")):
 
217
            # We've hit a file that's not a child of the previous path
139
218
            tree.serialize()
140
 
            sha = tree.sha().hexdigest()
141
 
            yield sha, tree, cur
 
219
            sha = tree.id
 
220
            yield sha, tree, cur.encode("utf-8")
142
221
            t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
143
222
            cur, tree = stack.pop()
144
223
            tree.add(*t)
145
224
 
146
 
        if type(entry) == InventoryDirectory:
 
225
        if entry.kind == "directory":
147
226
            stack.append((cur, tree))
148
227
            cur = path
149
228
            tree = Tree()
150
 
 
151
 
        if type(entry) == InventoryFile:
152
 
            #FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff
153
 
            # and having all these objects in memory at once
154
 
            blob = Blob()
155
 
            _, blob._text = repo.iter_files_bytes([(entry.file_id, entry.revision, path)]).next()
156
 
            sha = blob.sha().hexdigest()
157
 
            yield sha, blob, path
158
 
 
 
229
        else:
 
230
            if entry.kind == "file":
 
231
                blob = text_to_blob(texts, entry)
 
232
            elif entry.kind == "symlink":
 
233
                blob = symlink_to_blob(entry)
 
234
            else:
 
235
                raise AssertionError("Unknown kind %s" % entry.kind)
 
236
            sha = blob.id
 
237
            yield sha, blob, path.encode("utf-8")
159
238
            name = urlutils.basename(path).encode("utf-8")
160
 
            mode = stat.S_IFREG | 0644
161
 
            if entry.executable:
162
 
                mode |= 0111
163
 
            tree.add(mode, name, sha)
 
239
            tree.add(entry_mode(entry), name, sha)
164
240
 
165
241
    while len(stack) > 1:
166
242
        tree.serialize()
167
 
        sha = tree.sha().hexdigest()
168
 
        yield sha, tree, cur
 
243
        sha = tree.id
 
244
        yield sha, tree, cur.encode("utf-8")
169
245
        t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
170
246
        cur, tree = stack.pop()
171
247
        tree.add(*t)
172
248
 
173
249
    tree.serialize()
174
 
    yield tree.sha().hexdigest(), tree, cur
 
250
    yield tree.id, tree, cur.encode("utf-8")
175
251
 
176
252
 
177
253
def revision_to_commit(rev, tree_sha, parent_lookup):
187
263
    for p in rev.parent_ids:
188
264
        git_p = parent_lookup(p)
189
265
        if git_p is not None:
 
266
            assert len(git_p) == 40, "unexpected length for %r" % git_p
190
267
            commit._parents.append(git_p)
191
268
    commit._message = rev.message.encode("utf-8")
192
 
    commit._committer = rev.committer.encode("utf-8")
193
 
    commit._author = rev.get_apparent_author().encode("utf-8")
 
269
    commit._committer = fix_person_identifier(rev.committer.encode("utf-8"))
 
270
    commit._author = fix_person_identifier(rev.get_apparent_authors()[0].encode("utf-8"))
194
271
    commit._commit_time = long(rev.timestamp)
 
272
    if 'author-timestamp' in rev.properties:
 
273
        commit._author_time = long(rev.properties['author-timestamp'])
 
274
    else:
 
275
        commit._author_time = commit._commit_time
 
276
    if 'committer-timezone' in rev.properties:
 
277
        commit._commit_timezone = int(float(rev.properties['commit-timezone']) / .6)
 
278
    else:
 
279
        commit._commit_timezone = int(rev.timezone / .6) 
 
280
    if 'author-timezone' in rev.properties:
 
281
        commit._author_timezone = int(float(rev.properties['author-timezone']) / .6)
 
282
    else:
 
283
        commit._author_timezone = commit._commit_timezone 
195
284
    commit.serialize()
196
285
    return commit