/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to mapping.py

Store object hex sha's in InventoryEntry.text_id during fetch. 

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007-2008 Canonical Ltd
 
1
# Copyright (C) 2007 Canonical Ltd
 
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
 
3
# Copyright (C) 2008 John Carr
2
4
#
3
5
# This program is free software; you can redistribute it and/or modify
4
6
# it under the terms of the GNU General Public License as published by
16
18
 
17
19
"""Converters, etc for going between Bazaar and Git ids."""
18
20
 
19
 
from bzrlib import errors, foreign, urlutils
20
 
from bzrlib.inventory import ROOT_ID
 
21
import stat
 
22
 
 
23
from bzrlib import (
 
24
    errors,
 
25
    foreign,
 
26
    osutils,
 
27
    urlutils,
 
28
    )
 
29
from bzrlib.inventory import (
 
30
    ROOT_ID,
 
31
    )
21
32
from bzrlib.foreign import (
22
 
        ForeignVcs, 
23
 
        VcsMappingRegistry, 
24
 
        ForeignRevision,
25
 
        )
 
33
    ForeignVcs, 
 
34
    VcsMappingRegistry, 
 
35
    ForeignRevision,
 
36
    )
 
37
from bzrlib.xml_serializer import (
 
38
    escape_invalid_chars,
 
39
    )
 
40
 
 
41
DEFAULT_FILE_MODE = stat.S_IFREG | 0644
 
42
 
26
43
 
27
44
def escape_file_id(file_id):
28
45
    return file_id.replace('_', '__').replace(' ', '_s')
55
72
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
56
73
 
57
74
    def generate_file_id(self, path):
 
75
        # Git paths are just bytestrings
 
76
        # We must just hope they are valid UTF-8..
 
77
        assert isinstance(path, str)
58
78
        if path == "":
59
79
            return ROOT_ID
60
 
        return escape_file_id(path.encode('utf-8'))
 
80
        return escape_file_id(path)
 
81
 
 
82
    def parse_file_id(self, file_id):
 
83
        if file_id == ROOT_ID:
 
84
            return ""
 
85
        return unescape_file_id(file_id)
61
86
 
62
87
    def import_commit(self, commit):
63
88
        """Convert a git commit to a bzr revision.
68
93
            raise AssertionError("Commit object can't be None")
69
94
        rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
70
95
        rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
71
 
        rev.message = commit.message.decode("utf-8", "replace")
72
 
        rev.committer = str(commit.committer).decode("utf-8", "replace")
 
96
        rev.message = escape_invalid_chars(commit.message.decode("utf-8", "replace"))[0]
 
97
        rev.committer = escape_invalid_chars(str(commit.committer).decode("utf-8", "replace"))[0]
73
98
        if commit.committer != commit.author:
74
 
            rev.properties['author'] = str(commit.author).decode("utf-8", "replace")
 
99
            rev.properties['author'] = escape_invalid_chars(str(commit.author).decode("utf-8", "replace"))[0]
 
100
 
 
101
        if commit.commit_time != commit.author_time:
 
102
            rev.properties['author-timestamp'] = str(commit.author_time)
 
103
        if commit.commit_timezone != commit.author_timezone:
 
104
            rev.properties['author-timezone'] = "%f" % (commit.author_timezone * .6)
75
105
        rev.timestamp = commit.commit_time
76
 
        rev.timezone = 0
 
106
        rev.timezone = int(commit.commit_timezone * .6)
 
107
        if rev.timezone / .6 != commit.commit_timezone:
 
108
            rev.properties['commit-timezone'] = "%f" % (commit.commit_timezone * .6)
77
109
        return rev
78
110
 
79
111
 
121
153
default_mapping = BzrGitMappingv1()
122
154
 
123
155
 
124
 
def inventory_to_tree_and_blobs(repo, mapping, revision_id):
125
 
    from dulwich.objects import Tree, Blob
126
 
    from bzrlib.inventory import InventoryDirectory, InventoryFile
 
156
def text_to_blob(texts, entry):
 
157
    from dulwich.objects import Blob
 
158
    text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
 
159
    blob = Blob()
 
160
    blob._text = text
 
161
    return blob
 
162
 
 
163
 
 
164
def symlink_to_blob(entry):
 
165
    from dulwich.objects import Blob
 
166
    blob = Blob()
 
167
    blob._text = entry.symlink_target
 
168
    return blob
 
169
 
 
170
 
 
171
def entry_mode(entry):
 
172
    if entry.kind == 'directory':
 
173
        return stat.S_IFDIR
 
174
    elif entry.kind == 'symlink':
 
175
        return stat.S_IFLNK
 
176
    elif entry.kind == 'file':
 
177
        mode = stat.S_IFREG | 0644
 
178
        if entry.executable:
 
179
            mode |= 0111
 
180
        return mode
 
181
    else:
 
182
        raise AssertionError
 
183
 
 
184
 
 
185
def directory_to_tree(entry, lookup_ie_sha1):
 
186
    from dulwich.objects import Tree
 
187
    tree = Tree()
 
188
    for name in sorted(entry.children.keys()):
 
189
        ie = entry.children[name]
 
190
        tree.add(entry_mode(ie), name.encode("utf-8"), lookup_ie_sha1(ie))
 
191
    tree.serialize()
 
192
    return tree
 
193
 
 
194
 
 
195
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
 
196
    """Convert a Bazaar tree to a Git tree.
 
197
 
 
198
    :return: Yields tuples with object sha1, object and path
 
199
    """
 
200
    from dulwich.objects import Tree
127
201
    import stat
128
202
    stack = []
129
 
    cur = ""
 
203
    if cur is None:
 
204
        cur = ""
130
205
    tree = Tree()
131
206
 
132
 
    inv = repo.get_inventory(revision_id)
133
 
 
134
207
    # stack contains the set of trees that we haven't 
135
208
    # finished constructing
136
 
 
137
 
    for path, entry in inv.iter_entries():
138
 
        while stack and not path.startswith(cur):
 
209
    for path, entry in inventory.iter_entries():
 
210
        while stack and not path.startswith(osutils.pathjoin(cur, "")):
 
211
            # We've hit a file that's not a child of the previous path
139
212
            tree.serialize()
140
 
            sha = tree.sha().hexdigest()
141
 
            yield sha, tree, cur
 
213
            sha = tree.id
 
214
            yield sha, tree, cur.encode("utf-8")
142
215
            t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
143
216
            cur, tree = stack.pop()
144
217
            tree.add(*t)
145
218
 
146
 
        if type(entry) == InventoryDirectory:
 
219
        if entry.kind == "directory":
147
220
            stack.append((cur, tree))
148
221
            cur = path
149
222
            tree = Tree()
150
 
 
151
 
        if type(entry) == InventoryFile:
152
 
            #FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff
153
 
            # and having all these objects in memory at once
154
 
            blob = Blob()
155
 
            _, blob._text = repo.iter_files_bytes([(entry.file_id, entry.revision, path)]).next()
156
 
            sha = blob.sha().hexdigest()
157
 
            yield sha, blob, path
158
 
 
 
223
        else:
 
224
            if entry.kind == "file":
 
225
                blob = text_to_blob(texts, entry)
 
226
            elif entry.kind == "symlink":
 
227
                blob = symlink_to_blob(entry)
 
228
            else:
 
229
                raise AssertionError("Unknown kind %s" % entry.kind)
 
230
            sha = blob.id
 
231
            yield sha, blob, path.encode("utf-8")
159
232
            name = urlutils.basename(path).encode("utf-8")
160
 
            mode = stat.S_IFREG | 0644
161
 
            if entry.executable:
162
 
                mode |= 0111
163
 
            tree.add(mode, name, sha)
 
233
            tree.add(entry_mode(entry), name, sha)
164
234
 
165
235
    while len(stack) > 1:
166
236
        tree.serialize()
167
 
        sha = tree.sha().hexdigest()
168
 
        yield sha, tree, cur
 
237
        sha = tree.id
 
238
        yield sha, tree, cur.encode("utf-8")
169
239
        t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
170
240
        cur, tree = stack.pop()
171
241
        tree.add(*t)
172
242
 
173
243
    tree.serialize()
174
 
    yield tree.sha().hexdigest(), tree, cur
 
244
    yield tree.id, tree, cur.encode("utf-8")
175
245
 
176
246
 
177
247
def revision_to_commit(rev, tree_sha, parent_lookup):
187
257
    for p in rev.parent_ids:
188
258
        git_p = parent_lookup(p)
189
259
        if git_p is not None:
 
260
            assert len(git_p) == 40, "unexpected length for %r" % git_p
190
261
            commit._parents.append(git_p)
191
262
    commit._message = rev.message.encode("utf-8")
192
263
    commit._committer = rev.committer.encode("utf-8")
193
 
    commit._author = rev.get_apparent_author().encode("utf-8")
 
264
    commit._author = rev.get_apparent_authors()[0].encode("utf-8")
194
265
    commit._commit_time = long(rev.timestamp)
 
266
    if 'author-timestamp' in rev.properties:
 
267
        commit._author_time = long(rev.properties['author-timestamp'])
 
268
    else:
 
269
        commit._author_time = commit._commit_time
 
270
    if 'committer-timezone' in rev.properties:
 
271
        commit._commit_timezone = int(float(rev.properties['commit-timezone']) / .6)
 
272
    else:
 
273
        commit._commit_timezone = int(rev.timezone / .6) 
 
274
    if 'author-timezone' in rev.properties:
 
275
        commit._author_timezone = int(float(rev.properties['author-timezone']) / .6)
 
276
    else:
 
277
        commit._author_timezone = commit._commit_timezone 
195
278
    commit.serialize()
196
279
    return commit