/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2007 Canonical Ltd
2
# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
3
# Copyright (C) 2008 John Carr
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
19
"""Converters, etc for going between Bazaar and Git ids."""
20
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
21
import stat
22
0.200.292 by Jelmer Vernooij
Fix formatting.
23
from bzrlib import (
24
    errors,
25
    foreign,
0.200.356 by Jelmer Vernooij
Fix nasty bug in inventory_to_trees_and_blobs
26
    osutils,
0.200.490 by Jelmer Vernooij
Warn about unusual modes and escaped XML-invalid characters.
27
    trace,
0.200.292 by Jelmer Vernooij
Fix formatting.
28
    urlutils,
29
    )
30
from bzrlib.inventory import (
31
    ROOT_ID,
32
    )
0.200.152 by Jelmer Vernooij
Fix syntax errors.
33
from bzrlib.foreign import (
0.200.292 by Jelmer Vernooij
Fix formatting.
34
    ForeignVcs, 
35
    VcsMappingRegistry, 
36
    ForeignRevision,
37
    )
0.200.329 by Jelmer Vernooij
Fix imports.
38
from bzrlib.xml_serializer import (
39
    escape_invalid_chars,
0.200.309 by Jelmer Vernooij
Add XML escaping to work around serialization bug in bzr.
40
    )
41
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
42
DEFAULT_FILE_MODE = stat.S_IFREG | 0644
0.200.345 by Jelmer Vernooij
Keep track of file modes to use.
43
0.206.1 by Jelmer Vernooij
Use foreign utility functions.
44
0.200.150 by Jelmer Vernooij
Abstract away file id generation.
45
def escape_file_id(file_id):
46
    return file_id.replace('_', '__').replace(' ', '_s')
47
48
49
def unescape_file_id(file_id):
0.200.390 by Jelmer Vernooij
Fix file id unescape function when there are other underscores in the file id.
50
    ret = []
51
    i = 0
52
    while i < len(file_id):
53
        if file_id[i] != '_':
54
            ret.append(file_id[i])
55
        else:
56
            if file_id[i+1] == '_':
57
                ret.append("_")
58
            elif file_id[i+1] == 's':
59
                ret.append(" ")
60
            else:
61
                raise AssertionError("unknown escape character %s" % file_id[i+1])
62
            i += 1
63
        i += 1
64
    return "".join(ret)
0.200.150 by Jelmer Vernooij
Abstract away file id generation.
65
66
0.200.376 by Jelmer Vernooij
Make sure author and committer names pushed to git contain < and >, otherwise the git parser barfs.
67
def fix_person_identifier(text):
68
    if "<" in text and ">" in text:
69
        return text
70
    return "%s <%s>" % (text, text)
71
72
0.200.490 by Jelmer Vernooij
Warn about unusual modes and escaped XML-invalid characters.
73
def warn_escaped(commit, num_escaped):
74
    trace.warning("Escaped %d XML-invalid characters in %s. Will be unable "
75
                  "to regenerate the SHA map.", num_escaped, commit)
76
77
78
def warn_unusual_mode(commit, path, mode):
79
    trace.warning("Unusual file mode %o for %s in %s. Will be unable to "
80
                  "regenerate the SHA map.", mode, path, commit)
81
82
0.206.1 by Jelmer Vernooij
Use foreign utility functions.
83
class BzrGitMapping(foreign.VcsMapping):
0.200.97 by Jelmer Vernooij
use mapping object.
84
    """Class that maps between Git and Bazaar semantics."""
85
    experimental = False
86
0.200.198 by Jelmer Vernooij
Cope with move of show_foreign_revid.
87
    def __init__(self):
88
        super(BzrGitMapping, self).__init__(foreign_git)
89
0.200.195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
90
    def __eq__(self, other):
91
        return type(self) == type(other) and self.revid_prefix == other.revid_prefix
92
93
    @classmethod
94
    def revision_id_foreign_to_bzr(cls, git_rev_id):
0.200.97 by Jelmer Vernooij
use mapping object.
95
        """Convert a git revision id handle to a Bazaar revision id."""
0.200.195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
96
        return "%s:%s" % (cls.revid_prefix, git_rev_id)
0.200.97 by Jelmer Vernooij
use mapping object.
97
0.200.195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
98
    @classmethod
99
    def revision_id_bzr_to_foreign(cls, bzr_rev_id):
0.200.97 by Jelmer Vernooij
use mapping object.
100
        """Convert a Bazaar revision id to a git revision id handle."""
0.200.195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
101
        if not bzr_rev_id.startswith("%s:" % cls.revid_prefix):
102
            raise errors.InvalidRevisionId(bzr_rev_id, cls)
103
        return bzr_rev_id[len(cls.revid_prefix)+1:], cls()
0.200.97 by Jelmer Vernooij
use mapping object.
104
0.200.150 by Jelmer Vernooij
Abstract away file id generation.
105
    def generate_file_id(self, path):
0.200.297 by Jelmer Vernooij
Cope with non-ascii characters in filenames (needs a test..).
106
        # Git paths are just bytestrings
107
        # We must just hope they are valid UTF-8..
0.200.157 by Jelmer Vernooij
Fix some bit of fetching.
108
        if path == "":
109
            return ROOT_ID
0.200.297 by Jelmer Vernooij
Cope with non-ascii characters in filenames (needs a test..).
110
        return escape_file_id(path)
0.200.150 by Jelmer Vernooij
Abstract away file id generation.
111
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
112
    def parse_file_id(self, file_id):
113
        if file_id == ROOT_ID:
114
            return ""
115
        return unescape_file_id(file_id)
116
0.200.151 by Jelmer Vernooij
Support converting git objects to bzr objects.
117
    def import_commit(self, commit):
118
        """Convert a git commit to a bzr revision.
119
120
        :return: a `bzrlib.revision.Revision` object.
121
        """
122
        if commit is None:
123
            raise AssertionError("Commit object can't be None")
124
        rev = ForeignRevision(commit.id, self, self.revision_id_foreign_to_bzr(commit.id))
125
        rev.parent_ids = tuple([self.revision_id_foreign_to_bzr(p) for p in commit.parents])
0.200.490 by Jelmer Vernooij
Warn about unusual modes and escaped XML-invalid characters.
126
        rev.message, num_escaped = escape_invalid_chars(commit.message.decode("utf-8", "replace"))
127
        if num_escaped:
128
            warn_escaped(commit.id, num_escaped)
129
        rev.committer, num_escaped = escape_invalid_chars(str(commit.committer).decode("utf-8", "replace"))
130
        if num_escaped:
131
            warn_escaped(commit.id, num_escaped)
0.200.151 by Jelmer Vernooij
Support converting git objects to bzr objects.
132
        if commit.committer != commit.author:
0.200.490 by Jelmer Vernooij
Warn about unusual modes and escaped XML-invalid characters.
133
            rev.properties['author'], num_escaped = escape_invalid_chars(str(commit.author).decode("utf-8", "replace"))
134
            if num_escaped:
135
                warn_escaped(commit.id, num_escaped)
0.200.350 by Jelmer Vernooij
Support author_time
136
137
        if commit.commit_time != commit.author_time:
138
            rev.properties['author-timestamp'] = str(commit.author_time)
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
139
        if commit.commit_timezone != commit.author_timezone:
0.200.440 by Jelmer Vernooij
Remove silly mapping of timezones; dulwich uses offsets now as well.
140
            rev.properties['author-timezone'] = "%d" % (commit.author_timezone, )
0.200.151 by Jelmer Vernooij
Support converting git objects to bzr objects.
141
        rev.timestamp = commit.commit_time
0.200.440 by Jelmer Vernooij
Remove silly mapping of timezones; dulwich uses offsets now as well.
142
        rev.timezone = commit.commit_timezone
0.200.151 by Jelmer Vernooij
Support converting git objects to bzr objects.
143
        return rev
144
0.200.97 by Jelmer Vernooij
use mapping object.
145
0.200.190 by Jelmer Vernooij
Bless current mapping as v1.
146
class BzrGitMappingv1(BzrGitMapping):
147
    revid_prefix = 'git-v1'
148
    experimental = False
149
0.200.393 by Jelmer Vernooij
Provide __str__ implementation for mapping, fix docstring for ForeignGit.
150
    def __str__(self):
151
        return self.revid_prefix
152
0.200.190 by Jelmer Vernooij
Bless current mapping as v1.
153
154
class BzrGitMappingExperimental(BzrGitMappingv1):
0.200.104 by Jelmer Vernooij
Use bzr-foreign function names for converting between git and bzr revids.
155
    revid_prefix = 'git-experimental'
156
    experimental = True
0.200.97 by Jelmer Vernooij
use mapping object.
157
158
0.200.195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
159
class GitMappingRegistry(VcsMappingRegistry):
160
161
    def revision_id_bzr_to_foreign(self, bzr_revid):
162
        if not bzr_revid.startswith("git-"):
163
            raise errors.InvalidRevisionId(bzr_revid, None)
164
        (mapping_version, git_sha) = bzr_revid.split(":", 1)
165
        mapping = self.get(mapping_version)
166
        return mapping.revision_id_bzr_to_foreign(bzr_revid)
167
168
    parse_revision_id = revision_id_bzr_to_foreign
169
170
171
mapping_registry = GitMappingRegistry()
172
mapping_registry.register_lazy('git-v1', "bzrlib.plugins.git.mapping",
173
                                   "BzrGitMappingv1")
174
mapping_registry.register_lazy('git-experimental', "bzrlib.plugins.git.mapping",
175
                                   "BzrGitMappingExperimental")
176
177
178
class ForeignGit(ForeignVcs):
0.200.393 by Jelmer Vernooij
Provide __str__ implementation for mapping, fix docstring for ForeignGit.
179
    """The Git Stupid Content Tracker"""
0.200.195 by Jelmer Vernooij
Return mapping in revision_id_bzr_to_foreign() as required by the interface.
180
181
    def __init__(self):
182
        super(ForeignGit, self).__init__(mapping_registry)
183
0.200.198 by Jelmer Vernooij
Cope with move of show_foreign_revid.
184
    @classmethod
185
    def show_foreign_revid(cls, foreign_revid):
186
        return { "git commit": foreign_revid }
187
188
189
foreign_git = ForeignGit()
0.200.190 by Jelmer Vernooij
Bless current mapping as v1.
190
default_mapping = BzrGitMappingv1()
0.200.212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
191
192
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
193
def text_to_blob(texts, entry):
0.231.2 by Jelmer Vernooij
Add -Dverify flag (not fully implemented yet).
194
    from dulwich.objects import Blob
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
195
    text = texts.get_record_stream([(entry.file_id, entry.revision)], 'unordered', True).next().get_bytes_as('fulltext')
0.231.2 by Jelmer Vernooij
Add -Dverify flag (not fully implemented yet).
196
    blob = Blob()
197
    blob._text = text
198
    return blob
199
200
0.200.354 by Jelmer Vernooij
Support symlinks in conversion to git.
201
def symlink_to_blob(entry):
202
    from dulwich.objects import Blob
203
    blob = Blob()
204
    blob._text = entry.symlink_target
205
    return blob
206
0.200.521 by Jelmer Vernooij
Abstract out kind mapping a bit, initial work on support tree-references.
207
def mode_is_executable(mode):
208
    return bool(mode & 0111)
209
210
def mode_kind(mode):
211
    entry_kind = (mode & 0700000) / 0100000
212
    if entry_kind == 0:
213
        return 'directory'
214
    elif entry_kind == 1:
215
        file_kind = (mode & 070000) / 010000
216
        if file_kind == 0:
217
            return 'file'
218
        elif file_kind == 2:
219
            return 'symlink'
220
        elif file_kind == 6:
221
            return 'tree-reference'
222
        else:
223
            raise AssertionError(
224
                "Unknown file kind %d, perms=%o." % (file_kind, mode,))
225
    else:
226
        raise AssertionError(
227
            "Unknown kind, perms=%r." % (mode,))
228
0.200.354 by Jelmer Vernooij
Support symlinks in conversion to git.
229
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
230
def entry_mode(entry):
231
    if entry.kind == 'directory':
232
        return stat.S_IFDIR
233
    elif entry.kind == 'symlink':
234
        return stat.S_IFLNK
235
    elif entry.kind == 'file':
236
        mode = stat.S_IFREG | 0644
237
        if entry.executable:
238
            mode |= 0111
239
        return mode
240
    else:
241
        raise AssertionError
242
243
244
def directory_to_tree(entry, lookup_ie_sha1):
245
    from dulwich.objects import Tree
246
    tree = Tree()
247
    for name in sorted(entry.children.keys()):
248
        ie = entry.children[name]
249
        tree.add(entry_mode(ie), name.encode("utf-8"), lookup_ie_sha1(ie))
250
    tree.serialize()
251
    return tree
252
253
0.200.354 by Jelmer Vernooij
Support symlinks in conversion to git.
254
def inventory_to_tree_and_blobs(inventory, texts, mapping, cur=None):
0.200.355 by Jelmer Vernooij
Allow paranoia checking with -Dverify.
255
    """Convert a Bazaar tree to a Git tree.
256
257
    :return: Yields tuples with object sha1, object and path
258
    """
0.231.2 by Jelmer Vernooij
Add -Dverify flag (not fully implemented yet).
259
    from dulwich.objects import Tree
0.200.213 by Jelmer Vernooij
Move functions to mapping.
260
    import stat
0.200.212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
261
    stack = []
0.200.354 by Jelmer Vernooij
Support symlinks in conversion to git.
262
    if cur is None:
263
        cur = ""
0.200.212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
264
    tree = Tree()
265
0.200.220 by Jelmer Vernooij
yield the right path for the tree root.
266
    # stack contains the set of trees that we haven't 
267
    # finished constructing
0.200.349 by Jelmer Vernooij
Specify inventory and texts to inventory_to_tree_and_blobs rather than full repository.
268
    for path, entry in inventory.iter_entries():
0.200.356 by Jelmer Vernooij
Fix nasty bug in inventory_to_trees_and_blobs
269
        while stack and not path.startswith(osutils.pathjoin(cur, "")):
270
            # We've hit a file that's not a child of the previous path
0.200.212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
271
            tree.serialize()
0.200.318 by Jelmer Vernooij
Use .id rather than .sha().hexdigest().
272
            sha = tree.id
0.200.361 by Jelmer Vernooij
Fix existing object lookup issues when pulling from remote branches.
273
            yield sha, tree, cur.encode("utf-8")
0.200.219 by Jelmer Vernooij
Fix some issues in tree conversion functions.
274
            t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
0.200.212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
275
            cur, tree = stack.pop()
276
            tree.add(*t)
277
0.200.354 by Jelmer Vernooij
Support symlinks in conversion to git.
278
        if entry.kind == "directory":
0.200.212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
279
            stack.append((cur, tree))
280
            cur = path
281
            tree = Tree()
0.200.354 by Jelmer Vernooij
Support symlinks in conversion to git.
282
        else:
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
283
            if entry.kind == "file":
284
                blob = text_to_blob(texts, entry)
285
            elif entry.kind == "symlink":
286
                blob = symlink_to_blob(entry)
287
            else:
288
                raise AssertionError("Unknown kind %s" % entry.kind)
289
            sha = blob.id
0.200.361 by Jelmer Vernooij
Fix existing object lookup issues when pulling from remote branches.
290
            yield sha, blob, path.encode("utf-8")
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
291
            name = urlutils.basename(path).encode("utf-8")
292
            tree.add(entry_mode(entry), name, sha)
0.200.212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
293
294
    while len(stack) > 1:
295
        tree.serialize()
0.200.318 by Jelmer Vernooij
Use .id rather than .sha().hexdigest().
296
        sha = tree.id
0.200.361 by Jelmer Vernooij
Fix existing object lookup issues when pulling from remote branches.
297
        yield sha, tree, cur.encode("utf-8")
0.200.219 by Jelmer Vernooij
Fix some issues in tree conversion functions.
298
        t = (stat.S_IFDIR, urlutils.basename(cur).encode('UTF-8'), sha)
0.200.212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
299
        cur, tree = stack.pop()
300
        tree.add(*t)
301
302
    tree.serialize()
0.200.361 by Jelmer Vernooij
Fix existing object lookup issues when pulling from remote branches.
303
    yield tree.id, tree, cur.encode("utf-8")
0.200.212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
304
305
306
def revision_to_commit(rev, tree_sha, parent_lookup):
307
    """Turn a Bazaar revision in to a Git commit
308
309
    :param tree_sha: Tree sha for the commit
310
    :param parent_lookup: Function for looking up the GIT sha equiv of a bzr revision
311
    :return dulwich.objects.Commit represent the revision:
312
    """
313
    from dulwich.objects import Commit
314
    commit = Commit()
0.200.416 by Jelmer Vernooij
Use public properties to set git objects values.
315
    commit.tree = tree_sha
0.200.212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
316
    for p in rev.parent_ids:
0.200.222 by Jelmer Vernooij
Dpush works \o/
317
        git_p = parent_lookup(p)
318
        if git_p is not None:
0.200.281 by Jelmer Vernooij
Add extra assert to make sure we don't write invalid parents.
319
            assert len(git_p) == 40, "unexpected length for %r" % git_p
0.200.416 by Jelmer Vernooij
Use public properties to set git objects values.
320
            commit.parents.append(git_p)
321
    commit.message = rev.message.encode("utf-8")
322
    commit.committer = fix_person_identifier(rev.committer.encode("utf-8"))
323
    commit.author = fix_person_identifier(rev.get_apparent_authors()[0].encode("utf-8"))
324
    commit.commit_time = long(rev.timestamp)
0.200.351 by Jelmer Vernooij
Add roundtrip tests.
325
    if 'author-timestamp' in rev.properties:
0.200.416 by Jelmer Vernooij
Use public properties to set git objects values.
326
        commit.author_time = long(rev.properties['author-timestamp'])
0.200.350 by Jelmer Vernooij
Support author_time
327
    else:
0.200.416 by Jelmer Vernooij
Use public properties to set git objects values.
328
        commit.author_time = commit.commit_time
0.200.440 by Jelmer Vernooij
Remove silly mapping of timezones; dulwich uses offsets now as well.
329
    commit.commit_timezone = rev.timezone
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
330
    if 'author-timezone' in rev.properties:
0.200.440 by Jelmer Vernooij
Remove silly mapping of timezones; dulwich uses offsets now as well.
331
        commit.author_timezone = int(rev.properties['author-timezone'])
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
332
    else:
0.200.416 by Jelmer Vernooij
Use public properties to set git objects values.
333
        commit.author_timezone = commit.commit_timezone 
0.200.212 by Jelmer Vernooij
Move conversion functions to mapping, use fetch_objects() from repository if present.
334
    return commit