/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.128 by Ian Clatworthy
fix encoding issue in bzr_exporter (Teemu Likonen)
1
# -*- coding: utf-8 -*-
2
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
3
# Copyright (C) 2008 Canonical Ltd
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
16
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
17
#
0.64.333 by Jelmer Vernooij
Inline bzr-fast-export license.
18
# Based on bzr-fast-export
19
# Copyright (c) 2008 Adeodato Simó
20
#
21
# Permission is hereby granted, free of charge, to any person obtaining
22
# a copy of this software and associated documentation files (the
23
# "Software"), to deal in the Software without restriction, including
24
# without limitation the rights to use, copy, modify, merge, publish,
25
# distribute, sublicense, and/or sell copies of the Software, and to
26
# permit persons to whom the Software is furnished to do so, subject to
27
# the following conditions:
28
#
29
# The above copyright notice and this permission notice shall be included
30
# in all copies or substantial portions of the Software.
31
#
32
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
35
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
36
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
37
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
38
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0.79.10 by Ian Clatworthy
documentation clean-ups
39
#
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
40
# vim: fileencoding=utf-8
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
41
42
"""Core engine for the fast-export command."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
43
0.79.7 by Ian Clatworthy
trivial bzr_exporter clean-ups
44
# TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
45
# is not updated (because the parent of commit is already merged, so we don't
46
# set new_git_branch to the previously used name)
47
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
48
from email.Utils import parseaddr
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
49
import sys, time, re
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
50
51
import bzrlib.branch
52
import bzrlib.revision
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
53
from bzrlib import (
54
    builtins,
55
    errors as bazErrors,
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
56
    osutils,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
57
    progress,
58
    trace,
59
    )
0.79.4 by Ian Clatworthy
use note and warning APIs
60
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
61
from bzrlib.plugins.fastimport import (
62
    helpers,
63
    marks_file,
64
    )
0.79.2 by Ian Clatworthy
extend & use marks_file API
65
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
66
from fastimport import commands
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
67
from fastimport.helpers import (
68
    binary_stream,
69
    single_plural,
70
    )
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
71
72
73
def _get_output_stream(destination):
74
    if destination is None or destination == '-':
75
        return binary_stream(sys.stdout)
76
    elif destination.endswith('gz'):
77
        import gzip
78
        return gzip.open(destination, 'wb')
79
    else:
80
        return open(destination, 'wb')
81
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
82
# from dulwich.repo:
83
def check_ref_format(refname):
84
    """Check if a refname is correctly formatted.
85
86
    Implements all the same rules as git-check-ref-format[1].
87
88
    [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
89
90
    :param refname: The refname to check
91
    :return: True if refname is valid, False otherwise
92
    """
93
    # These could be combined into one big expression, but are listed separately
94
    # to parallel [1].
95
    if '/.' in refname or refname.startswith('.'):
96
        return False
97
    if '/' not in refname:
98
        return False
99
    if '..' in refname:
100
        return False
101
    for c in refname:
102
        if ord(c) < 040 or c in '\177 ~^:?*[':
103
            return False
104
    if refname[-1] in '/.':
105
        return False
106
    if refname.endswith('.lock'):
107
        return False
108
    if '@{' in refname:
109
        return False
110
    if '\\' in refname:
111
        return False
112
    return True
113
0.133.3 by Oleksandr Usov
Implement comments from patch review:
114
115
def sanitize_ref_name_for_git(refname):
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
116
    """Rewrite refname so that it will be accepted by git-fast-import.
117
    For the detailed rules see check_ref_format.
118
119
    By rewriting the refname we are breaking uniqueness guarantees provided by bzr
120
    so we have to manually
121
    verify that resulting ref names are unique.
122
123
    :param refname: refname to rewrite
124
    :return: new refname
125
    """
0.133.3 by Oleksandr Usov
Implement comments from patch review:
126
    new_refname = re.sub(
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
127
        # '/.' in refname or startswith '.'
128
        r"/\.|^\."
129
        # '..' in refname
130
        r"|\.\."
131
        # ord(c) < 040
132
        r"|[" + "".join([chr(x) for x in range(040)]) + r"]"
133
        # c in '\177 ~^:?*['
134
        r"|[\177 ~^:?*[]"
135
        # last char in "/."
136
        r"|[/.]$"
137
        # endswith '.lock'
138
        r"|.lock$"
139
        # "@{" in refname
140
        r"|@{"
141
        # "\\" in refname
142
        r"|\\",
143
        "_", refname)
0.133.3 by Oleksandr Usov
Implement comments from patch review:
144
    return new_refname
0.64.173 by Ian Clatworthy
add -r option to fast-export
145
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
146
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
147
class BzrFastExporter(object):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
148
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
149
    def __init__(self, source, outf, git_branch=None, checkpoint=-1,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
150
        import_marks_file=None, export_marks_file=None, revision=None,
0.135.1 by Andy Grimm
Add --baseline option
151
        verbose=False, plain_format=False, rewrite_tags=False,
152
        baseline=False):
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
153
        """Export branch data in fast import format.
154
155
        :param plain_format: if True, 'classic' fast-import format is
0.64.337 by Jelmer Vernooij
Merge support for --rewrite-tag-names.
156
            used without any extended features; if False, the generated
157
            data is richer and includes information like multiple
158
            authors, revision properties, etc.
159
        :param rewrite_tags: if True and if plain_format is set, tag names
160
            will be rewritten to be git-compatible.
161
            Otherwise tags which aren't valid for git will be skipped if
162
            plain_format is set.
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
163
        """
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
164
        self.branch = source
165
        self.outf = outf
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
166
        self.git_branch = git_branch
167
        self.checkpoint = checkpoint
168
        self.import_marks_file = import_marks_file
169
        self.export_marks_file = export_marks_file
0.64.173 by Ian Clatworthy
add -r option to fast-export
170
        self.revision = revision
171
        self.excluded_revisions = set()
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
172
        self.plain_format = plain_format
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
173
        self.rewrite_tags = rewrite_tags
0.135.1 by Andy Grimm
Add --baseline option
174
        self.baseline = baseline
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
175
        self._multi_author_api_available = hasattr(bzrlib.revision.Revision,
176
            'get_apparent_authors')
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
177
        self.properties_to_exclude = ['authors', 'author']
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
178
179
        # Progress reporting stuff
180
        self.verbose = verbose
181
        if verbose:
182
            self.progress_every = 100
183
        else:
184
            self.progress_every = 1000
185
        self._start_time = time.time()
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
186
        self._commit_total = 0
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
187
188
        # Load the marks and initialise things accordingly
0.68.1 by Pieter de Bie
Classify bzr-fast-export
189
        self.revid_to_mark = {}
190
        self.branch_names = {}
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
191
        if self.import_marks_file:
0.79.2 by Ian Clatworthy
extend & use marks_file API
192
            marks_info = marks_file.import_marks(self.import_marks_file)
193
            if marks_info is not None:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
194
                self.revid_to_mark = dict((r, m) for m, r in
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
195
                    marks_info.items())
196
                # These are no longer included in the marks file
197
                #self.branch_names = marks_info[1]
0.64.173 by Ian Clatworthy
add -r option to fast-export
198
 
199
    def interesting_history(self):
200
        if self.revision:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
201
            rev1, rev2 = builtins._get_revision_range(self.revision,
202
                self.branch, "fast-export")
0.64.173 by Ian Clatworthy
add -r option to fast-export
203
            start_rev_id = rev1.rev_id
204
            end_rev_id = rev2.rev_id
205
        else:
206
            start_rev_id = None
207
            end_rev_id = None
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
208
        self.note("Calculating the revisions to include ...")
0.64.341 by Jelmer Vernooij
Fix test, clarify help description for 'bzr fast-export'.
209
        view_revisions = [rev_id for rev_id, _, _, _ in
210
            self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)]
211
        view_revisions.reverse()
0.64.173 by Ian Clatworthy
add -r option to fast-export
212
        # If a starting point was given, we need to later check that we don't
213
        # start emitting revisions from before that point. Collect the
214
        # revisions to exclude now ...
215
        if start_rev_id is not None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
216
            self.note("Calculating the revisions to exclude ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
217
            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
218
                self.branch.iter_merge_sorted_revisions(start_rev_id)])
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
219
            if self.baseline:
220
                # needed so the first relative commit knows its parent
221
                self.excluded_revisions.remove(start_rev_id)
222
                view_revisions.insert(0, start_rev_id)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
223
        return list(view_revisions)
0.64.173 by Ian Clatworthy
add -r option to fast-export
224
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
225
    def run(self):
226
        # Export the data
0.68.1 by Pieter de Bie
Classify bzr-fast-export
227
        self.branch.repository.lock_read()
228
        try:
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
229
            interesting = self.interesting_history()
0.102.15 by Ian Clatworthy
add revision count to 'Starting export ...' message
230
            self._commit_total = len(interesting)
231
            self.note("Starting export of %d revisions ..." %
232
                self._commit_total)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
233
            if not self.plain_format:
234
                self.emit_features()
0.135.1 by Andy Grimm
Add --baseline option
235
            if self.baseline:
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
236
                self.emit_baseline(interesting.pop(0), self.git_branch)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
237
            for revid in interesting:
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
238
                self.emit_commit(revid, self.git_branch)
239
            if self.branch.supports_tags():
240
                self.emit_tags()
0.68.1 by Pieter de Bie
Classify bzr-fast-export
241
        finally:
242
            self.branch.repository.unlock()
243
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
244
        # Save the marks if requested
0.79.2 by Ian Clatworthy
extend & use marks_file API
245
        self._save_marks()
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
246
        self.dump_stats()
247
248
    def note(self, msg, *args):
249
        """Output a note but timestamp it."""
250
        msg = "%s %s" % (self._time_of_day(), msg)
251
        trace.note(msg, *args)
252
253
    def warning(self, msg, *args):
254
        """Output a warning but timestamp it."""
255
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
256
        trace.warning(msg, *args)
257
258
    def _time_of_day(self):
259
        """Time of day as a string."""
260
        # Note: this is a separate method so tests can patch in a fixed value
261
        return time.strftime("%H:%M:%S")
262
263
    def report_progress(self, commit_count, details=''):
264
        if commit_count and commit_count % self.progress_every == 0:
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
265
            if self._commit_total:
266
                counts = "%d/%d" % (commit_count, self._commit_total)
267
            else:
268
                counts = "%d" % (commit_count,)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
269
            minutes = (time.time() - self._start_time) / 60
270
            rate = commit_count * 1.0 / minutes
271
            if rate > 10:
272
                rate_str = "at %.0f/minute " % rate
273
            else:
274
                rate_str = "at %.1f/minute " % rate
275
            self.note("%s commits exported %s%s" % (counts, rate_str, details))
276
277
    def dump_stats(self):
278
        time_required = progress.str_tdelta(time.time() - self._start_time)
279
        rc = len(self.revid_to_mark)
280
        self.note("Exported %d %s in %s",
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
281
            rc, single_plural(rc, "revision", "revisions"),
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
282
            time_required)
0.79.2 by Ian Clatworthy
extend & use marks_file API
283
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
284
    def print_cmd(self, cmd):
285
        self.outf.write("%r\n" % cmd)
286
0.79.2 by Ian Clatworthy
extend & use marks_file API
287
    def _save_marks(self):
288
        if self.export_marks_file:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
289
            revision_ids = dict((m, r) for r, m in self.revid_to_mark.items())
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
290
            marks_file.export_marks(self.export_marks_file, revision_ids)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
291
 
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
292
    def is_empty_dir(self, tree, path):
293
        path_id = tree.path2id(path)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
294
        if path_id is None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
295
            self.warning("Skipping empty_dir detection - no file_id for %s" %
296
                (path,))
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
297
            return False
298
299
        # Continue if path is not a directory
300
        if tree.kind(path_id) != 'directory':
301
            return False
302
303
        # Use treewalk to find the contents of our directory
304
        contents = list(tree.walkdirs(prefix=path))[0]
305
        if len(contents[1]) == 0:
306
            return True
307
        else:
308
            return False
309
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
310
    def emit_features(self):
0.102.5 by Ian Clatworthy
Define feature names in one place
311
        for feature in sorted(commands.FEATURE_NAMES):
312
            self.print_cmd(commands.FeatureCommand(feature))
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
313
0.135.1 by Andy Grimm
Add --baseline option
314
    def emit_baseline(self, revid, git_branch):
315
        # Emit a full source tree of the first commit's parent
316
        git_ref = 'refs/heads/%s' % (git_branch,)
317
        revobj = self.branch.repository.get_revision(revid)
318
        mark = 1
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
319
        self.revid_to_mark[revid] = mark
320
        file_cmds = self._get_filecommands(bzrlib.revision.NULL_REVISION, revid)
321
        self.print_cmd(self._get_commit_command(git_ref, mark, revobj,
0.135.1 by Andy Grimm
Add --baseline option
322
            file_cmds))
323
0.68.1 by Pieter de Bie
Classify bzr-fast-export
324
    def emit_commit(self, revid, git_branch):
0.64.173 by Ian Clatworthy
add -r option to fast-export
325
        if revid in self.revid_to_mark or revid in self.excluded_revisions:
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
326
            return
327
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
328
        # Get the Revision object
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
329
        try:
330
            revobj = self.branch.repository.get_revision(revid)
331
        except bazErrors.NoSuchRevision:
332
            # This is a ghost revision. Mark it as not found and next!
333
            self.revid_to_mark[revid] = -1
334
            return
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
335
 
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
336
        # Get the primary parent
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
337
        # TODO: Consider the excluded revisions when deciding the parents.
338
        # Currently, a commit with parents that are excluded ought to be
339
        # triggering the git_branch calculation below (and it is not).
340
        # IGC 20090824
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
341
        ncommits = len(self.revid_to_mark)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
342
        nparents = len(revobj.parent_ids)
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
343
        if nparents == 0:
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
344
            if ncommits:
345
                # This is a parentless commit but it's not the first one
346
                # output. We need to create a new temporary branch for it
347
                # otherwise git-fast-import will assume the previous commit
348
                # was this one's parent
349
                git_branch = self._next_tmp_branch_name()
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
350
            parent = bzrlib.revision.NULL_REVISION
351
        else:
352
            parent = revobj.parent_ids[0]
353
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
354
        # Print the commit
355
        git_ref = 'refs/heads/%s' % (git_branch,)
0.98.1 by Gonéri Le Bouder
add the missing ":" since revid_to_mark are "committish"
356
        mark = ncommits + 1
0.64.221 by Ian Clatworthy
backout git-bzr fix as it was breaking fast-export
357
        self.revid_to_mark[revid] = mark
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
358
        file_cmds = self._get_filecommands(parent, revid)
359
        self.print_cmd(self._get_commit_command(git_ref, mark, revobj,
360
            file_cmds))
361
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
362
        # Report progress and checkpoint if it's time for that
363
        self.report_progress(ncommits)
364
        if (self.checkpoint > 0 and ncommits
365
            and ncommits % self.checkpoint == 0):
366
            self.note("Exported %i commits - adding checkpoint to output"
367
                % ncommits)
368
            self._save_marks()
369
            self.print_cmd(commands.CheckpointCommand())
370
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
371
    def _get_name_email(self, user):
372
        if user.find('<') == -1:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
373
            # If the email isn't inside <>, we need to use it as the name
374
            # in order for things to round-trip correctly.
375
            # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
376
            name = user
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
377
            email = ''
378
        else:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
379
            name, email = parseaddr(user)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
380
        return name.encode("utf-8"), email.encode("utf-8")
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
381
382
    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
383
        # Get the committer and author info
384
        committer = revobj.committer
385
        name, email = self._get_name_email(committer)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
386
        committer_info = (name, email, revobj.timestamp, revobj.timezone)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
387
        if self._multi_author_api_available:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
388
            more_authors = revobj.get_apparent_authors()
389
            author = more_authors.pop(0)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
390
        else:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
391
            more_authors = []
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
392
            author = revobj.get_apparent_author()
0.64.291 by Jelmer Vernooij
In plain mode, don't export multiple authors.
393
        if not self.plain_format and more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
394
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
395
            author_info = (name, email, revobj.timestamp, revobj.timezone)
396
            more_author_info = []
397
            for a in more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
398
                name, email = self._get_name_email(a)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
399
                more_author_info.append(
400
                    (name, email, revobj.timestamp, revobj.timezone))
401
        elif author != committer:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
402
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
403
            author_info = (name, email, revobj.timestamp, revobj.timezone)
404
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
405
        else:
406
            author_info = None
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
407
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
408
409
        # Get the parents in terms of marks
410
        non_ghost_parents = []
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
411
        for p in revobj.parent_ids:
0.64.173 by Ian Clatworthy
add -r option to fast-export
412
            if p in self.excluded_revisions:
413
                continue
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
414
            try:
415
                parent_mark = self.revid_to_mark[p]
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
416
                non_ghost_parents.append(":%s" % parent_mark)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
417
            except KeyError:
418
                # ghost - ignore
419
                continue
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
420
        if non_ghost_parents:
421
            from_ = non_ghost_parents[0]
422
            merges = non_ghost_parents[1:]
423
        else:
424
            from_ = None
425
            merges = None
426
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
427
        # Filter the revision properties. Some metadata (like the
428
        # author information) is already exposed in other ways so
429
        # don't repeat it here.
430
        if self.plain_format:
431
            properties = None
432
        else:
433
            properties = revobj.properties
434
            for prop in self.properties_to_exclude:
435
                try:
436
                    del properties[prop]
437
                except KeyError:
438
                    pass
439
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
440
        # Build and return the result
441
        return commands.CommitCommand(git_ref, mark, author_info,
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
442
            committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds),
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
443
            more_authors=more_author_info, properties=properties)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
444
445
    def _get_revision_trees(self, parent, revision_id):
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
446
        try:
447
            tree_old = self.branch.repository.revision_tree(parent)
448
        except bazErrors.UnexpectedInventoryFormat:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
449
            self.warning("Parent is malformed - diffing against previous parent")
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
450
            # We can't find the old parent. Let's diff against his parent
451
            pp = self.branch.repository.get_revision(parent)
452
            tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
453
        tree_new = None
454
        try:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
455
            tree_new = self.branch.repository.revision_tree(revision_id)
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
456
        except bazErrors.UnexpectedInventoryFormat:
457
            # We can't really do anything anymore
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
458
            self.warning("Revision %s is malformed - skipping" % revision_id)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
459
        return tree_old, tree_new
0.68.1 by Pieter de Bie
Classify bzr-fast-export
460
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
461
    def _get_filecommands(self, parent, revision_id):
462
        """Get the list of FileCommands for the changes between two revisions."""
463
        tree_old, tree_new = self._get_revision_trees(parent, revision_id)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
464
        if not(tree_old and tree_new):
465
            # Something is wrong with this revision - ignore the filecommands
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
466
            return []
0.68.1 by Pieter de Bie
Classify bzr-fast-export
467
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
468
        changes = tree_new.changes_from(tree_old)
469
470
        # Make "modified" have 3-tuples, as added does
471
        my_modified = [ x[0:3] for x in changes.modified ]
472
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
473
        # The potential interaction between renames and deletes is messy.
474
        # Handle it here ...
475
        file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
476
            changes.renamed, changes.removed, revision_id, tree_old)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
477
478
        # Map kind changes to a delete followed by an add
479
        for path, id_, kind1, kind2 in changes.kind_changed:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
480
            path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
481
            # IGC: I don't understand why a delete is needed here.
482
            # In fact, it seems harmful? If you uncomment this line,
483
            # please file a bug explaining why you needed to.
484
            #file_cmds.append(commands.FileDeleteCommand(path))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
485
            my_modified.append((path, id_, kind2))
486
487
        # Record modifications
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
488
        for path, id_, kind in changes.added + my_modified + rd_modifies:
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
489
            if kind == 'file':
490
                text = tree_new.get_file_text(id_)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
491
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
492
                    helpers.kind_to_mode('file', tree_new.is_executable(id_)),
493
                    None, text))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
494
            elif kind == 'symlink':
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
495
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
496
                    helpers.kind_to_mode('symlink', False),
497
                    None, tree_new.get_symlink_target(id_)))
0.102.14 by Ian Clatworthy
export and import empty directories
498
            elif kind == 'directory':
0.105.1 by John Whitley
Don't emit directory info when plain format is specified.
499
                if not self.plain_format:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
500
                    file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
501
                        helpers.kind_to_mode('directory', False),
502
                        None, None))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
503
            else:
0.102.14 by Ian Clatworthy
export and import empty directories
504
                self.warning("cannot export '%s' of kind %s yet - ignoring" %
505
                    (path, kind))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
506
        return file_cmds
507
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
508
    def _process_renames_and_deletes(self, renames, deletes,
509
        revision_id, tree_old):
510
        file_cmds = []
511
        modifies = []
512
        renamed = []
513
514
        # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933.
515
        # In a nutshell, there are several nasty cases:
516
        #
517
        # 1) bzr rm a; bzr mv b a; bzr commit
518
        # 2) bzr mv x/y z; bzr rm x; commmit
519
        #
520
        # The first must come out with the delete first like this:
521
        #
522
        # D a
523
        # R b a
524
        #
525
        # The second case must come out with the rename first like this:
526
        #
527
        # R x/y z
528
        # D x
529
        #
530
        # So outputting all deletes first or all renames first won't work.
531
        # Instead, we need to make multiple passes over the various lists to
532
        # get the ordering right.
533
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
534
        must_be_renamed = {}
535
        old_to_new = {}
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
536
        deleted_paths = set([p for p, _, _ in deletes])
537
        for (oldpath, newpath, id_, kind,
538
                text_modified, meta_modified) in renames:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
539
            emit = kind != 'directory' or not self.plain_format
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
540
            if newpath in deleted_paths:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
541
                if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
542
                    file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
543
                deleted_paths.remove(newpath)
544
            if (self.is_empty_dir(tree_old, oldpath)):
545
                self.note("Skipping empty dir %s in rev %s" % (oldpath,
546
                    revision_id))
547
                continue
548
            #oldpath = self._adjust_path_for_renames(oldpath, renamed,
549
            #    revision_id)
550
            renamed.append([oldpath, newpath])
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
551
            old_to_new[oldpath] = newpath
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
552
            if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
553
                file_cmds.append(
554
                    commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
555
            if text_modified or meta_modified:
556
                modifies.append((newpath, id_, kind))
557
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
558
            # Renaming a directory implies all children must be renamed.
559
            # Note: changes_from() doesn't handle this
0.64.329 by Jelmer Vernooij
Support exporting files that are changed into directories.
560
            if kind == 'directory' and tree_old.kind(id_) == 'directory':
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
561
                for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
562
                    if e.kind == 'directory' and self.plain_format:
563
                        continue
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
564
                    old_child_path = osutils.pathjoin(oldpath, p)
565
                    new_child_path = osutils.pathjoin(newpath, p)
566
                    must_be_renamed[old_child_path] = new_child_path
567
568
        # Add children not already renamed
569
        if must_be_renamed:
570
            renamed_already = set(old_to_new.keys())
571
            still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
572
            for old_child_path in sorted(still_to_be_renamed):
573
                new_child_path = must_be_renamed[old_child_path]
574
                if self.verbose:
575
                    self.note("implicitly renaming %s => %s" % (old_child_path,
576
                        new_child_path))
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
577
                file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"),
578
                    new_child_path.encode("utf-8")))
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
579
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
580
        # Record remaining deletes
581
        for path, id_, kind in deletes:
582
            if path not in deleted_paths:
583
                continue
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
584
            if kind == 'directory' and self.plain_format:
585
                continue
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
586
            #path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
587
            file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
588
        return file_cmds, modifies, renamed
589
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
590
    def _adjust_path_for_renames(self, path, renamed, revision_id):
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
591
        # If a previous rename is found, we should adjust the path
592
        for old, new in renamed:
593
            if path == old:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
594
                self.note("Changing path %s given rename to %s in revision %s"
595
                    % (path, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
596
                path = new
597
            elif path.startswith(old + '/'):
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
598
                self.note(
599
                    "Adjusting path %s given rename of %s to %s in revision %s"
600
                    % (path, old, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
601
                path = path.replace(old + "/", new + "/")
602
        return path
603
0.68.1 by Pieter de Bie
Classify bzr-fast-export
604
    def emit_tags(self):
605
        for tag, revid in self.branch.tags.get_tag_dict().items():
606
            try:
607
                mark = self.revid_to_mark[revid]
608
            except KeyError:
0.79.4 by Ian Clatworthy
use note and warning APIs
609
                self.warning('not creating tag %r pointing to non-existent '
610
                    'revision %s' % (tag, revid))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
611
            else:
0.64.288 by Jelmer Vernooij
Cope with non-ascii characters in tag names.
612
                git_ref = 'refs/tags/%s' % tag.encode("utf-8")
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
613
                if self.plain_format and not check_ref_format(git_ref):
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
614
                    if self.rewrite_tags:
0.133.3 by Oleksandr Usov
Implement comments from patch review:
615
                        new_ref = sanitize_ref_name_for_git(git_ref)
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
616
                        self.warning('tag %r is exported as %r to be valid in git.',
617
                                     git_ref, new_ref)
618
                        git_ref = new_ref
0.133.3 by Oleksandr Usov
Implement comments from patch review:
619
                    else:
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
620
                        self.warning('not creating tag %r as its name would not be '
621
                                     'valid in git.', git_ref)
622
                        continue
0.64.133 by Ian Clatworthy
Fix str + int concat in bzr-fast-export (Stéphane Raimbault)
623
                self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
624
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
625
    def _next_tmp_branch_name(self):
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
626
        """Return a unique branch name. The name will start with "tmp"."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
627
        prefix = 'tmp'
0.68.1 by Pieter de Bie
Classify bzr-fast-export
628
        if prefix not in self.branch_names:
629
            self.branch_names[prefix] = 0
630
        else:
631
            self.branch_names[prefix] += 1
632
            prefix = '%s.%d' % (prefix, self.branch_names[prefix])
633
        return prefix