/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.128 by Ian Clatworthy
fix encoding issue in bzr_exporter (Teemu Likonen)
1
# -*- coding: utf-8 -*-
2
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
3
# Copyright (C) 2008 Canonical Ltd
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
16
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
17
#
0.64.333 by Jelmer Vernooij
Inline bzr-fast-export license.
18
# Based on bzr-fast-export
19
# Copyright (c) 2008 Adeodato Simó
20
#
21
# Permission is hereby granted, free of charge, to any person obtaining
22
# a copy of this software and associated documentation files (the
23
# "Software"), to deal in the Software without restriction, including
24
# without limitation the rights to use, copy, modify, merge, publish,
25
# distribute, sublicense, and/or sell copies of the Software, and to
26
# permit persons to whom the Software is furnished to do so, subject to
27
# the following conditions:
28
#
29
# The above copyright notice and this permission notice shall be included
30
# in all copies or substantial portions of the Software.
31
#
32
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
35
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
36
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
37
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
38
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0.79.10 by Ian Clatworthy
documentation clean-ups
39
#
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
40
# vim: fileencoding=utf-8
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
41
42
"""Core engine for the fast-export command."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
43
0.79.7 by Ian Clatworthy
trivial bzr_exporter clean-ups
44
# TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
45
# is not updated (because the parent of commit is already merged, so we don't
46
# set new_git_branch to the previously used name)
47
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
48
from email.Utils import parseaddr
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
49
import sys, time, re
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
50
51
import bzrlib.branch
52
import bzrlib.revision
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
53
from bzrlib import (
54
    builtins,
55
    errors as bazErrors,
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
56
    osutils,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
57
    progress,
58
    trace,
59
    )
0.79.4 by Ian Clatworthy
use note and warning APIs
60
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
61
from bzrlib.plugins.fastimport import (
62
    helpers,
63
    marks_file,
64
    )
0.79.2 by Ian Clatworthy
extend & use marks_file API
65
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
66
from fastimport import commands
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
67
from fastimport.helpers import (
68
    binary_stream,
69
    single_plural,
70
    )
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
71
72
73
def _get_output_stream(destination):
74
    if destination is None or destination == '-':
75
        return binary_stream(sys.stdout)
76
    elif destination.endswith('gz'):
77
        import gzip
78
        return gzip.open(destination, 'wb')
79
    else:
80
        return open(destination, 'wb')
81
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
82
# from dulwich.repo:
83
def check_ref_format(refname):
84
    """Check if a refname is correctly formatted.
85
86
    Implements all the same rules as git-check-ref-format[1].
87
88
    [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
89
90
    :param refname: The refname to check
91
    :return: True if refname is valid, False otherwise
92
    """
93
    # These could be combined into one big expression, but are listed separately
94
    # to parallel [1].
95
    if '/.' in refname or refname.startswith('.'):
96
        return False
97
    if '/' not in refname:
98
        return False
99
    if '..' in refname:
100
        return False
101
    for c in refname:
102
        if ord(c) < 040 or c in '\177 ~^:?*[':
103
            return False
104
    if refname[-1] in '/.':
105
        return False
106
    if refname.endswith('.lock'):
107
        return False
108
    if '@{' in refname:
109
        return False
110
    if '\\' in refname:
111
        return False
112
    return True
113
0.133.3 by Oleksandr Usov
Implement comments from patch review:
114
115
def sanitize_ref_name_for_git(refname):
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
116
    """Rewrite refname so that it will be accepted by git-fast-import.
117
    For the detailed rules see check_ref_format.
118
119
    By rewriting the refname we are breaking uniqueness guarantees provided by bzr
120
    so we have to manually
121
    verify that resulting ref names are unique.
122
123
    :param refname: refname to rewrite
124
    :return: new refname
125
    """
0.133.3 by Oleksandr Usov
Implement comments from patch review:
126
    new_refname = re.sub(
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
127
        # '/.' in refname or startswith '.'
128
        r"/\.|^\."
129
        # '..' in refname
130
        r"|\.\."
131
        # ord(c) < 040
132
        r"|[" + "".join([chr(x) for x in range(040)]) + r"]"
133
        # c in '\177 ~^:?*['
134
        r"|[\177 ~^:?*[]"
135
        # last char in "/."
136
        r"|[/.]$"
137
        # endswith '.lock'
138
        r"|.lock$"
139
        # "@{" in refname
140
        r"|@{"
141
        # "\\" in refname
142
        r"|\\",
143
        "_", refname)
0.133.3 by Oleksandr Usov
Implement comments from patch review:
144
    return new_refname
0.64.173 by Ian Clatworthy
add -r option to fast-export
145
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
146
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
147
class BzrFastExporter(object):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
148
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
149
    def __init__(self, source, outf, ref=None, checkpoint=-1,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
150
        import_marks_file=None, export_marks_file=None, revision=None,
0.135.1 by Andy Grimm
Add --baseline option
151
        verbose=False, plain_format=False, rewrite_tags=False,
0.138.1 by Oleksandr Usov
Add --no-tags flag
152
        no_tags=False, baseline=False):
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
153
        """Export branch data in fast import format.
154
155
        :param plain_format: if True, 'classic' fast-import format is
0.64.337 by Jelmer Vernooij
Merge support for --rewrite-tag-names.
156
            used without any extended features; if False, the generated
157
            data is richer and includes information like multiple
158
            authors, revision properties, etc.
159
        :param rewrite_tags: if True and if plain_format is set, tag names
160
            will be rewritten to be git-compatible.
161
            Otherwise tags which aren't valid for git will be skipped if
162
            plain_format is set.
0.138.1 by Oleksandr Usov
Add --no-tags flag
163
        :param no_tags: if True tags won't be exported at all
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
164
        """
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
165
        self.branch = source
166
        self.outf = outf
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
167
        self.ref = ref
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
168
        self.checkpoint = checkpoint
169
        self.import_marks_file = import_marks_file
170
        self.export_marks_file = export_marks_file
0.64.173 by Ian Clatworthy
add -r option to fast-export
171
        self.revision = revision
172
        self.excluded_revisions = set()
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
173
        self.plain_format = plain_format
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
174
        self.rewrite_tags = rewrite_tags
0.138.1 by Oleksandr Usov
Add --no-tags flag
175
        self.no_tags = no_tags
0.135.1 by Andy Grimm
Add --baseline option
176
        self.baseline = baseline
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
177
        self._multi_author_api_available = hasattr(bzrlib.revision.Revision,
178
            'get_apparent_authors')
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
179
        self.properties_to_exclude = ['authors', 'author']
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
180
181
        # Progress reporting stuff
182
        self.verbose = verbose
183
        if verbose:
184
            self.progress_every = 100
185
        else:
186
            self.progress_every = 1000
187
        self._start_time = time.time()
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
188
        self._commit_total = 0
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
189
190
        # Load the marks and initialise things accordingly
0.68.1 by Pieter de Bie
Classify bzr-fast-export
191
        self.revid_to_mark = {}
192
        self.branch_names = {}
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
193
        if self.import_marks_file:
0.79.2 by Ian Clatworthy
extend & use marks_file API
194
            marks_info = marks_file.import_marks(self.import_marks_file)
195
            if marks_info is not None:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
196
                self.revid_to_mark = dict((r, m) for m, r in
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
197
                    marks_info.items())
198
                # These are no longer included in the marks file
199
                #self.branch_names = marks_info[1]
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
200
0.64.173 by Ian Clatworthy
add -r option to fast-export
201
    def interesting_history(self):
202
        if self.revision:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
203
            rev1, rev2 = builtins._get_revision_range(self.revision,
204
                self.branch, "fast-export")
0.64.173 by Ian Clatworthy
add -r option to fast-export
205
            start_rev_id = rev1.rev_id
206
            end_rev_id = rev2.rev_id
207
        else:
208
            start_rev_id = None
209
            end_rev_id = None
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
210
        self.note("Calculating the revisions to include ...")
0.64.341 by Jelmer Vernooij
Fix test, clarify help description for 'bzr fast-export'.
211
        view_revisions = [rev_id for rev_id, _, _, _ in
212
            self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)]
213
        view_revisions.reverse()
0.64.173 by Ian Clatworthy
add -r option to fast-export
214
        # If a starting point was given, we need to later check that we don't
215
        # start emitting revisions from before that point. Collect the
216
        # revisions to exclude now ...
217
        if start_rev_id is not None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
218
            self.note("Calculating the revisions to exclude ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
219
            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
220
                self.branch.iter_merge_sorted_revisions(start_rev_id)])
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
221
            if self.baseline:
222
                # needed so the first relative commit knows its parent
223
                self.excluded_revisions.remove(start_rev_id)
224
                view_revisions.insert(0, start_rev_id)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
225
        return list(view_revisions)
0.64.173 by Ian Clatworthy
add -r option to fast-export
226
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
227
    def run(self):
228
        # Export the data
0.68.1 by Pieter de Bie
Classify bzr-fast-export
229
        self.branch.repository.lock_read()
230
        try:
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
231
            interesting = self.interesting_history()
0.102.15 by Ian Clatworthy
add revision count to 'Starting export ...' message
232
            self._commit_total = len(interesting)
233
            self.note("Starting export of %d revisions ..." %
234
                self._commit_total)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
235
            if not self.plain_format:
236
                self.emit_features()
0.135.1 by Andy Grimm
Add --baseline option
237
            if self.baseline:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
238
                self.emit_baseline(interesting.pop(0), self.ref)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
239
            for revid in interesting:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
240
                self.emit_commit(revid, self.ref)
0.138.1 by Oleksandr Usov
Add --no-tags flag
241
            if self.branch.supports_tags() and not self.no_tags:
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
242
                self.emit_tags()
0.68.1 by Pieter de Bie
Classify bzr-fast-export
243
        finally:
244
            self.branch.repository.unlock()
245
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
246
        # Save the marks if requested
0.79.2 by Ian Clatworthy
extend & use marks_file API
247
        self._save_marks()
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
248
        self.dump_stats()
249
250
    def note(self, msg, *args):
251
        """Output a note but timestamp it."""
252
        msg = "%s %s" % (self._time_of_day(), msg)
253
        trace.note(msg, *args)
254
255
    def warning(self, msg, *args):
256
        """Output a warning but timestamp it."""
257
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
258
        trace.warning(msg, *args)
259
260
    def _time_of_day(self):
261
        """Time of day as a string."""
262
        # Note: this is a separate method so tests can patch in a fixed value
263
        return time.strftime("%H:%M:%S")
264
265
    def report_progress(self, commit_count, details=''):
266
        if commit_count and commit_count % self.progress_every == 0:
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
267
            if self._commit_total:
268
                counts = "%d/%d" % (commit_count, self._commit_total)
269
            else:
270
                counts = "%d" % (commit_count,)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
271
            minutes = (time.time() - self._start_time) / 60
272
            rate = commit_count * 1.0 / minutes
273
            if rate > 10:
274
                rate_str = "at %.0f/minute " % rate
275
            else:
276
                rate_str = "at %.1f/minute " % rate
277
            self.note("%s commits exported %s%s" % (counts, rate_str, details))
278
279
    def dump_stats(self):
280
        time_required = progress.str_tdelta(time.time() - self._start_time)
281
        rc = len(self.revid_to_mark)
282
        self.note("Exported %d %s in %s",
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
283
            rc, single_plural(rc, "revision", "revisions"),
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
284
            time_required)
0.79.2 by Ian Clatworthy
extend & use marks_file API
285
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
286
    def print_cmd(self, cmd):
287
        self.outf.write("%r\n" % cmd)
288
0.79.2 by Ian Clatworthy
extend & use marks_file API
289
    def _save_marks(self):
290
        if self.export_marks_file:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
291
            revision_ids = dict((m, r) for r, m in self.revid_to_mark.items())
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
292
            marks_file.export_marks(self.export_marks_file, revision_ids)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
293
 
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
294
    def is_empty_dir(self, tree, path):
295
        path_id = tree.path2id(path)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
296
        if path_id is None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
297
            self.warning("Skipping empty_dir detection - no file_id for %s" %
298
                (path,))
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
299
            return False
300
301
        # Continue if path is not a directory
302
        if tree.kind(path_id) != 'directory':
303
            return False
304
305
        # Use treewalk to find the contents of our directory
306
        contents = list(tree.walkdirs(prefix=path))[0]
307
        if len(contents[1]) == 0:
308
            return True
309
        else:
310
            return False
311
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
312
    def emit_features(self):
0.102.5 by Ian Clatworthy
Define feature names in one place
313
        for feature in sorted(commands.FEATURE_NAMES):
314
            self.print_cmd(commands.FeatureCommand(feature))
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
315
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
316
    def emit_baseline(self, revid, ref):
0.135.1 by Andy Grimm
Add --baseline option
317
        # Emit a full source tree of the first commit's parent
318
        revobj = self.branch.repository.get_revision(revid)
319
        mark = 1
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
320
        self.revid_to_mark[revid] = mark
321
        file_cmds = self._get_filecommands(bzrlib.revision.NULL_REVISION, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
322
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.135.1 by Andy Grimm
Add --baseline option
323
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
324
    def emit_commit(self, revid, ref):
0.64.173 by Ian Clatworthy
add -r option to fast-export
325
        if revid in self.revid_to_mark or revid in self.excluded_revisions:
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
326
            return
327
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
328
        # Get the Revision object
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
329
        try:
330
            revobj = self.branch.repository.get_revision(revid)
331
        except bazErrors.NoSuchRevision:
332
            # This is a ghost revision. Mark it as not found and next!
333
            self.revid_to_mark[revid] = -1
334
            return
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
335
 
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
336
        # Get the primary parent
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
337
        # TODO: Consider the excluded revisions when deciding the parents.
338
        # Currently, a commit with parents that are excluded ought to be
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
339
        # triggering the ref calculation below (and it is not).
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
340
        # IGC 20090824
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
341
        ncommits = len(self.revid_to_mark)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
342
        nparents = len(revobj.parent_ids)
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
343
        if nparents == 0:
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
344
            if ncommits:
345
                # This is a parentless commit but it's not the first one
346
                # output. We need to create a new temporary branch for it
347
                # otherwise git-fast-import will assume the previous commit
348
                # was this one's parent
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
349
                ref = self._next_tmp_ref()
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
350
            parent = bzrlib.revision.NULL_REVISION
351
        else:
352
            parent = revobj.parent_ids[0]
353
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
354
        # Print the commit
0.98.1 by Gonéri Le Bouder
add the missing ":" since revid_to_mark are "committish"
355
        mark = ncommits + 1
0.64.221 by Ian Clatworthy
backout git-bzr fix as it was breaking fast-export
356
        self.revid_to_mark[revid] = mark
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
357
        file_cmds = self._get_filecommands(parent, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
358
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
359
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
360
        # Report progress and checkpoint if it's time for that
361
        self.report_progress(ncommits)
362
        if (self.checkpoint > 0 and ncommits
363
            and ncommits % self.checkpoint == 0):
364
            self.note("Exported %i commits - adding checkpoint to output"
365
                % ncommits)
366
            self._save_marks()
367
            self.print_cmd(commands.CheckpointCommand())
368
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
369
    def _get_name_email(self, user):
370
        if user.find('<') == -1:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
371
            # If the email isn't inside <>, we need to use it as the name
372
            # in order for things to round-trip correctly.
373
            # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
374
            name = user
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
375
            email = ''
376
        else:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
377
            name, email = parseaddr(user)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
378
        return name.encode("utf-8"), email.encode("utf-8")
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
379
380
    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
381
        # Get the committer and author info
382
        committer = revobj.committer
383
        name, email = self._get_name_email(committer)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
384
        committer_info = (name, email, revobj.timestamp, revobj.timezone)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
385
        if self._multi_author_api_available:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
386
            more_authors = revobj.get_apparent_authors()
387
            author = more_authors.pop(0)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
388
        else:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
389
            more_authors = []
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
390
            author = revobj.get_apparent_author()
0.64.291 by Jelmer Vernooij
In plain mode, don't export multiple authors.
391
        if not self.plain_format and more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
392
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
393
            author_info = (name, email, revobj.timestamp, revobj.timezone)
394
            more_author_info = []
395
            for a in more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
396
                name, email = self._get_name_email(a)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
397
                more_author_info.append(
398
                    (name, email, revobj.timestamp, revobj.timezone))
399
        elif author != committer:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
400
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
401
            author_info = (name, email, revobj.timestamp, revobj.timezone)
402
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
403
        else:
404
            author_info = None
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
405
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
406
407
        # Get the parents in terms of marks
408
        non_ghost_parents = []
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
409
        for p in revobj.parent_ids:
0.64.173 by Ian Clatworthy
add -r option to fast-export
410
            if p in self.excluded_revisions:
411
                continue
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
412
            try:
413
                parent_mark = self.revid_to_mark[p]
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
414
                non_ghost_parents.append(":%s" % parent_mark)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
415
            except KeyError:
416
                # ghost - ignore
417
                continue
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
418
        if non_ghost_parents:
419
            from_ = non_ghost_parents[0]
420
            merges = non_ghost_parents[1:]
421
        else:
422
            from_ = None
423
            merges = None
424
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
425
        # Filter the revision properties. Some metadata (like the
426
        # author information) is already exposed in other ways so
427
        # don't repeat it here.
428
        if self.plain_format:
429
            properties = None
430
        else:
431
            properties = revobj.properties
432
            for prop in self.properties_to_exclude:
433
                try:
434
                    del properties[prop]
435
                except KeyError:
436
                    pass
437
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
438
        # Build and return the result
439
        return commands.CommitCommand(git_ref, mark, author_info,
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
440
            committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds),
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
441
            more_authors=more_author_info, properties=properties)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
442
443
    def _get_revision_trees(self, parent, revision_id):
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
444
        try:
445
            tree_old = self.branch.repository.revision_tree(parent)
446
        except bazErrors.UnexpectedInventoryFormat:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
447
            self.warning("Parent is malformed - diffing against previous parent")
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
448
            # We can't find the old parent. Let's diff against his parent
449
            pp = self.branch.repository.get_revision(parent)
450
            tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
451
        tree_new = None
452
        try:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
453
            tree_new = self.branch.repository.revision_tree(revision_id)
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
454
        except bazErrors.UnexpectedInventoryFormat:
455
            # We can't really do anything anymore
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
456
            self.warning("Revision %s is malformed - skipping" % revision_id)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
457
        return tree_old, tree_new
0.68.1 by Pieter de Bie
Classify bzr-fast-export
458
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
459
    def _get_filecommands(self, parent, revision_id):
460
        """Get the list of FileCommands for the changes between two revisions."""
461
        tree_old, tree_new = self._get_revision_trees(parent, revision_id)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
462
        if not(tree_old and tree_new):
463
            # Something is wrong with this revision - ignore the filecommands
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
464
            return []
0.68.1 by Pieter de Bie
Classify bzr-fast-export
465
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
466
        changes = tree_new.changes_from(tree_old)
467
468
        # Make "modified" have 3-tuples, as added does
469
        my_modified = [ x[0:3] for x in changes.modified ]
470
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
471
        # The potential interaction between renames and deletes is messy.
472
        # Handle it here ...
473
        file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
474
            changes.renamed, changes.removed, revision_id, tree_old)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
475
476
        # Map kind changes to a delete followed by an add
477
        for path, id_, kind1, kind2 in changes.kind_changed:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
478
            path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
479
            # IGC: I don't understand why a delete is needed here.
480
            # In fact, it seems harmful? If you uncomment this line,
481
            # please file a bug explaining why you needed to.
482
            #file_cmds.append(commands.FileDeleteCommand(path))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
483
            my_modified.append((path, id_, kind2))
484
485
        # Record modifications
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
486
        for path, id_, kind in changes.added + my_modified + rd_modifies:
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
487
            if kind == 'file':
488
                text = tree_new.get_file_text(id_)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
489
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
490
                    helpers.kind_to_mode('file', tree_new.is_executable(id_)),
491
                    None, text))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
492
            elif kind == 'symlink':
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
493
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
494
                    helpers.kind_to_mode('symlink', False),
495
                    None, tree_new.get_symlink_target(id_)))
0.102.14 by Ian Clatworthy
export and import empty directories
496
            elif kind == 'directory':
0.105.1 by John Whitley
Don't emit directory info when plain format is specified.
497
                if not self.plain_format:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
498
                    file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
499
                        helpers.kind_to_mode('directory', False),
500
                        None, None))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
501
            else:
0.102.14 by Ian Clatworthy
export and import empty directories
502
                self.warning("cannot export '%s' of kind %s yet - ignoring" %
503
                    (path, kind))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
504
        return file_cmds
505
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
506
    def _process_renames_and_deletes(self, renames, deletes,
507
        revision_id, tree_old):
508
        file_cmds = []
509
        modifies = []
510
        renamed = []
511
512
        # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933.
513
        # In a nutshell, there are several nasty cases:
514
        #
515
        # 1) bzr rm a; bzr mv b a; bzr commit
516
        # 2) bzr mv x/y z; bzr rm x; commmit
517
        #
518
        # The first must come out with the delete first like this:
519
        #
520
        # D a
521
        # R b a
522
        #
523
        # The second case must come out with the rename first like this:
524
        #
525
        # R x/y z
526
        # D x
527
        #
528
        # So outputting all deletes first or all renames first won't work.
529
        # Instead, we need to make multiple passes over the various lists to
530
        # get the ordering right.
531
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
532
        must_be_renamed = {}
533
        old_to_new = {}
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
534
        deleted_paths = set([p for p, _, _ in deletes])
535
        for (oldpath, newpath, id_, kind,
536
                text_modified, meta_modified) in renames:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
537
            emit = kind != 'directory' or not self.plain_format
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
538
            if newpath in deleted_paths:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
539
                if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
540
                    file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
541
                deleted_paths.remove(newpath)
542
            if (self.is_empty_dir(tree_old, oldpath)):
543
                self.note("Skipping empty dir %s in rev %s" % (oldpath,
544
                    revision_id))
545
                continue
546
            #oldpath = self._adjust_path_for_renames(oldpath, renamed,
547
            #    revision_id)
548
            renamed.append([oldpath, newpath])
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
549
            old_to_new[oldpath] = newpath
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
550
            if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
551
                file_cmds.append(
552
                    commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
553
            if text_modified or meta_modified:
554
                modifies.append((newpath, id_, kind))
555
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
556
            # Renaming a directory implies all children must be renamed.
557
            # Note: changes_from() doesn't handle this
0.64.329 by Jelmer Vernooij
Support exporting files that are changed into directories.
558
            if kind == 'directory' and tree_old.kind(id_) == 'directory':
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
559
                for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
560
                    if e.kind == 'directory' and self.plain_format:
561
                        continue
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
562
                    old_child_path = osutils.pathjoin(oldpath, p)
563
                    new_child_path = osutils.pathjoin(newpath, p)
564
                    must_be_renamed[old_child_path] = new_child_path
565
566
        # Add children not already renamed
567
        if must_be_renamed:
568
            renamed_already = set(old_to_new.keys())
569
            still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
570
            for old_child_path in sorted(still_to_be_renamed):
571
                new_child_path = must_be_renamed[old_child_path]
572
                if self.verbose:
573
                    self.note("implicitly renaming %s => %s" % (old_child_path,
574
                        new_child_path))
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
575
                file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"),
576
                    new_child_path.encode("utf-8")))
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
577
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
578
        # Record remaining deletes
579
        for path, id_, kind in deletes:
580
            if path not in deleted_paths:
581
                continue
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
582
            if kind == 'directory' and self.plain_format:
583
                continue
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
584
            #path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
585
            file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
586
        return file_cmds, modifies, renamed
587
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
588
    def _adjust_path_for_renames(self, path, renamed, revision_id):
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
589
        # If a previous rename is found, we should adjust the path
590
        for old, new in renamed:
591
            if path == old:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
592
                self.note("Changing path %s given rename to %s in revision %s"
593
                    % (path, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
594
                path = new
595
            elif path.startswith(old + '/'):
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
596
                self.note(
597
                    "Adjusting path %s given rename of %s to %s in revision %s"
598
                    % (path, old, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
599
                path = path.replace(old + "/", new + "/")
600
        return path
601
0.68.1 by Pieter de Bie
Classify bzr-fast-export
602
    def emit_tags(self):
603
        for tag, revid in self.branch.tags.get_tag_dict().items():
604
            try:
605
                mark = self.revid_to_mark[revid]
606
            except KeyError:
0.79.4 by Ian Clatworthy
use note and warning APIs
607
                self.warning('not creating tag %r pointing to non-existent '
608
                    'revision %s' % (tag, revid))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
609
            else:
0.64.288 by Jelmer Vernooij
Cope with non-ascii characters in tag names.
610
                git_ref = 'refs/tags/%s' % tag.encode("utf-8")
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
611
                if self.plain_format and not check_ref_format(git_ref):
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
612
                    if self.rewrite_tags:
0.133.3 by Oleksandr Usov
Implement comments from patch review:
613
                        new_ref = sanitize_ref_name_for_git(git_ref)
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
614
                        self.warning('tag %r is exported as %r to be valid in git.',
615
                                     git_ref, new_ref)
616
                        git_ref = new_ref
0.133.3 by Oleksandr Usov
Implement comments from patch review:
617
                    else:
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
618
                        self.warning('not creating tag %r as its name would not be '
619
                                     'valid in git.', git_ref)
620
                        continue
0.64.133 by Ian Clatworthy
Fix str + int concat in bzr-fast-export (Stéphane Raimbault)
621
                self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
622
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
623
    def _next_tmp_ref(self):
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
624
        """Return a unique branch name. The name will start with "tmp"."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
625
        prefix = 'tmp'
0.68.1 by Pieter de Bie
Classify bzr-fast-export
626
        if prefix not in self.branch_names:
627
            self.branch_names[prefix] = 0
628
        else:
629
            self.branch_names[prefix] += 1
630
            prefix = '%s.%d' % (prefix, self.branch_names[prefix])
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
631
        return 'refs/heads/%s' % prefix