/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.128 by Ian Clatworthy
fix encoding issue in bzr_exporter (Teemu Likonen)
1
# -*- coding: utf-8 -*-
2
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
3
# Copyright (C) 2008 Canonical Ltd
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
16
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
17
#
0.64.333 by Jelmer Vernooij
Inline bzr-fast-export license.
18
# Based on bzr-fast-export
19
# Copyright (c) 2008 Adeodato Simó
20
#
21
# Permission is hereby granted, free of charge, to any person obtaining
22
# a copy of this software and associated documentation files (the
23
# "Software"), to deal in the Software without restriction, including
24
# without limitation the rights to use, copy, modify, merge, publish,
25
# distribute, sublicense, and/or sell copies of the Software, and to
26
# permit persons to whom the Software is furnished to do so, subject to
27
# the following conditions:
28
#
29
# The above copyright notice and this permission notice shall be included
30
# in all copies or substantial portions of the Software.
31
#
32
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
35
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
36
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
37
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
38
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0.79.10 by Ian Clatworthy
documentation clean-ups
39
#
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
40
# vim: fileencoding=utf-8
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
41
42
"""Core engine for the fast-export command."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
43
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
44
from __future__ import absolute_import
45
0.79.7 by Ian Clatworthy
trivial bzr_exporter clean-ups
46
# TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
47
# is not updated (because the parent of commit is already merged, so we don't
48
# set new_git_branch to the previously used name)
49
6791.2.4 by Jelmer Vernooij
Fix python3isms.
50
try:
51
    from email.utils import parseaddr
52
except ImportError:  # python < 3
53
    from email.Utils import parseaddr
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
54
import sys, time, re
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
55
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
56
import breezy.branch
57
import breezy.revision
58
from ... import (
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
59
    builtins,
60
    errors as bazErrors,
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
61
    lazy_import,
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
62
    osutils,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
63
    progress,
64
    trace,
65
    )
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
66
from ...sixish import int2byte
0.79.4 by Ian Clatworthy
use note and warning APIs
67
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
68
from . import (
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
69
    helpers,
70
    marks_file,
71
    )
0.79.2 by Ian Clatworthy
extend & use marks_file API
72
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
73
lazy_import.lazy_import(globals(),
74
"""
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
75
from fastimport import commands
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
76
""")
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
77
78
79
def _get_output_stream(destination):
80
    if destination is None or destination == '-':
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
81
        return helpers.binary_stream(sys.stdout)
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
82
    elif destination.endswith('gz'):
83
        import gzip
84
        return gzip.open(destination, 'wb')
85
    else:
86
        return open(destination, 'wb')
87
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
88
# from dulwich.repo:
89
def check_ref_format(refname):
90
    """Check if a refname is correctly formatted.
91
92
    Implements all the same rules as git-check-ref-format[1].
93
94
    [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
95
96
    :param refname: The refname to check
97
    :return: True if refname is valid, False otherwise
98
    """
99
    # These could be combined into one big expression, but are listed separately
100
    # to parallel [1].
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
101
    if b'/.' in refname or refname.startswith(b'.'):
102
        return False
103
    if b'/' not in refname:
104
        return False
105
    if b'..' in refname:
106
        return False
107
    for i in range(len(refname)):
108
        if ord(refname[i:i+1]) < 0o40 or refname[i] in b'\177 ~^:?*[':
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
109
            return False
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
110
    if refname[-1] in b'/.':
111
        return False
112
    if refname.endswith(b'.lock'):
113
        return False
114
    if b'@{' in refname:
115
        return False
116
    if b'\\' in refname:
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
117
        return False
118
    return True
119
0.133.3 by Oleksandr Usov
Implement comments from patch review:
120
121
def sanitize_ref_name_for_git(refname):
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
122
    """Rewrite refname so that it will be accepted by git-fast-import.
123
    For the detailed rules see check_ref_format.
124
125
    By rewriting the refname we are breaking uniqueness guarantees provided by bzr
126
    so we have to manually
127
    verify that resulting ref names are unique.
128
129
    :param refname: refname to rewrite
130
    :return: new refname
131
    """
0.133.3 by Oleksandr Usov
Implement comments from patch review:
132
    new_refname = re.sub(
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
133
        # '/.' in refname or startswith '.'
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
134
        br"/\.|^\."
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
135
        # '..' in refname
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
136
        br"|\.\."
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
137
        # ord(c) < 040
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
138
        br"|[" + b"".join([int2byte(x) for x in range(0o40)]) + br"]"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
139
        # c in '\177 ~^:?*['
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
140
        br"|[\177 ~^:?*[]"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
141
        # last char in "/."
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
142
        br"|[/.]$"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
143
        # endswith '.lock'
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
144
        br"|.lock$"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
145
        # "@{" in refname
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
146
        br"|@{"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
147
        # "\\" in refname
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
148
        br"|\\",
149
        b"_", refname)
0.133.3 by Oleksandr Usov
Implement comments from patch review:
150
    return new_refname
0.64.173 by Ian Clatworthy
add -r option to fast-export
151
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
152
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
153
class BzrFastExporter(object):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
154
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
155
    def __init__(self, source, outf, ref=None, checkpoint=-1,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
156
        import_marks_file=None, export_marks_file=None, revision=None,
0.135.1 by Andy Grimm
Add --baseline option
157
        verbose=False, plain_format=False, rewrite_tags=False,
0.138.1 by Oleksandr Usov
Add --no-tags flag
158
        no_tags=False, baseline=False):
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
159
        """Export branch data in fast import format.
160
161
        :param plain_format: if True, 'classic' fast-import format is
0.64.337 by Jelmer Vernooij
Merge support for --rewrite-tag-names.
162
            used without any extended features; if False, the generated
163
            data is richer and includes information like multiple
164
            authors, revision properties, etc.
165
        :param rewrite_tags: if True and if plain_format is set, tag names
166
            will be rewritten to be git-compatible.
167
            Otherwise tags which aren't valid for git will be skipped if
168
            plain_format is set.
0.138.1 by Oleksandr Usov
Add --no-tags flag
169
        :param no_tags: if True tags won't be exported at all
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
170
        """
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
171
        self.branch = source
172
        self.outf = outf
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
173
        self.ref = ref
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
174
        self.checkpoint = checkpoint
175
        self.import_marks_file = import_marks_file
176
        self.export_marks_file = export_marks_file
0.64.173 by Ian Clatworthy
add -r option to fast-export
177
        self.revision = revision
178
        self.excluded_revisions = set()
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
179
        self.plain_format = plain_format
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
180
        self.rewrite_tags = rewrite_tags
0.138.1 by Oleksandr Usov
Add --no-tags flag
181
        self.no_tags = no_tags
0.135.1 by Andy Grimm
Add --baseline option
182
        self.baseline = baseline
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
183
        self._multi_author_api_available = hasattr(breezy.revision.Revision,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
184
            'get_apparent_authors')
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
185
        self.properties_to_exclude = ['authors', 'author']
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
186
187
        # Progress reporting stuff
188
        self.verbose = verbose
189
        if verbose:
190
            self.progress_every = 100
191
        else:
192
            self.progress_every = 1000
193
        self._start_time = time.time()
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
194
        self._commit_total = 0
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
195
196
        # Load the marks and initialise things accordingly
0.68.1 by Pieter de Bie
Classify bzr-fast-export
197
        self.revid_to_mark = {}
198
        self.branch_names = {}
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
199
        if self.import_marks_file:
0.79.2 by Ian Clatworthy
extend & use marks_file API
200
            marks_info = marks_file.import_marks(self.import_marks_file)
201
            if marks_info is not None:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
202
                self.revid_to_mark = dict((r, m) for m, r in
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
203
                    marks_info.items())
204
                # These are no longer included in the marks file
205
                #self.branch_names = marks_info[1]
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
206
0.64.173 by Ian Clatworthy
add -r option to fast-export
207
    def interesting_history(self):
208
        if self.revision:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
209
            rev1, rev2 = builtins._get_revision_range(self.revision,
210
                self.branch, "fast-export")
0.64.173 by Ian Clatworthy
add -r option to fast-export
211
            start_rev_id = rev1.rev_id
212
            end_rev_id = rev2.rev_id
213
        else:
214
            start_rev_id = None
215
            end_rev_id = None
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
216
        self.note("Calculating the revisions to include ...")
0.64.341 by Jelmer Vernooij
Fix test, clarify help description for 'bzr fast-export'.
217
        view_revisions = [rev_id for rev_id, _, _, _ in
218
            self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)]
219
        view_revisions.reverse()
0.64.173 by Ian Clatworthy
add -r option to fast-export
220
        # If a starting point was given, we need to later check that we don't
221
        # start emitting revisions from before that point. Collect the
222
        # revisions to exclude now ...
223
        if start_rev_id is not None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
224
            self.note("Calculating the revisions to exclude ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
225
            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
226
                self.branch.iter_merge_sorted_revisions(start_rev_id)])
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
227
            if self.baseline:
228
                # needed so the first relative commit knows its parent
229
                self.excluded_revisions.remove(start_rev_id)
230
                view_revisions.insert(0, start_rev_id)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
231
        return list(view_revisions)
0.64.173 by Ian Clatworthy
add -r option to fast-export
232
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
233
    def run(self):
234
        # Export the data
6754.8.4 by Jelmer Vernooij
Use new context stuff.
235
        with self.branch.repository.lock_read():
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
236
            interesting = self.interesting_history()
0.102.15 by Ian Clatworthy
add revision count to 'Starting export ...' message
237
            self._commit_total = len(interesting)
238
            self.note("Starting export of %d revisions ..." %
239
                self._commit_total)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
240
            if not self.plain_format:
241
                self.emit_features()
0.135.1 by Andy Grimm
Add --baseline option
242
            if self.baseline:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
243
                self.emit_baseline(interesting.pop(0), self.ref)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
244
            for revid in interesting:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
245
                self.emit_commit(revid, self.ref)
0.138.1 by Oleksandr Usov
Add --no-tags flag
246
            if self.branch.supports_tags() and not self.no_tags:
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
247
                self.emit_tags()
0.68.1 by Pieter de Bie
Classify bzr-fast-export
248
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
249
        # Save the marks if requested
0.79.2 by Ian Clatworthy
extend & use marks_file API
250
        self._save_marks()
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
251
        self.dump_stats()
252
253
    def note(self, msg, *args):
254
        """Output a note but timestamp it."""
255
        msg = "%s %s" % (self._time_of_day(), msg)
256
        trace.note(msg, *args)
257
258
    def warning(self, msg, *args):
259
        """Output a warning but timestamp it."""
260
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
261
        trace.warning(msg, *args)
262
263
    def _time_of_day(self):
264
        """Time of day as a string."""
265
        # Note: this is a separate method so tests can patch in a fixed value
266
        return time.strftime("%H:%M:%S")
267
268
    def report_progress(self, commit_count, details=''):
269
        if commit_count and commit_count % self.progress_every == 0:
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
270
            if self._commit_total:
271
                counts = "%d/%d" % (commit_count, self._commit_total)
272
            else:
273
                counts = "%d" % (commit_count,)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
274
            minutes = (time.time() - self._start_time) / 60
275
            rate = commit_count * 1.0 / minutes
276
            if rate > 10:
277
                rate_str = "at %.0f/minute " % rate
278
            else:
279
                rate_str = "at %.1f/minute " % rate
280
            self.note("%s commits exported %s%s" % (counts, rate_str, details))
281
282
    def dump_stats(self):
283
        time_required = progress.str_tdelta(time.time() - self._start_time)
284
        rc = len(self.revid_to_mark)
285
        self.note("Exported %d %s in %s",
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
286
            rc, helpers.single_plural(rc, "revision", "revisions"),
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
287
            time_required)
0.79.2 by Ian Clatworthy
extend & use marks_file API
288
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
289
    def print_cmd(self, cmd):
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
290
        self.outf.write(b"%r\n" % cmd)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
291
0.79.2 by Ian Clatworthy
extend & use marks_file API
292
    def _save_marks(self):
293
        if self.export_marks_file:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
294
            revision_ids = dict((m, r) for r, m in self.revid_to_mark.items())
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
295
            marks_file.export_marks(self.export_marks_file, revision_ids)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
296
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
297
    def is_empty_dir(self, tree, path):
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
298
        # Continue if path is not a directory
299
        try:
300
            if tree.kind(path) != 'directory':
301
                return False
302
        except bazErrors.NoSuchFile:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
303
            self.warning("Skipping empty_dir detection - no file_id for %s" %
304
                (path,))
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
305
            return False
306
307
        # Use treewalk to find the contents of our directory
308
        contents = list(tree.walkdirs(prefix=path))[0]
309
        if len(contents[1]) == 0:
310
            return True
311
        else:
312
            return False
313
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
314
    def emit_features(self):
0.102.5 by Ian Clatworthy
Define feature names in one place
315
        for feature in sorted(commands.FEATURE_NAMES):
316
            self.print_cmd(commands.FeatureCommand(feature))
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
317
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
318
    def emit_baseline(self, revid, ref):
0.135.1 by Andy Grimm
Add --baseline option
319
        # Emit a full source tree of the first commit's parent
320
        revobj = self.branch.repository.get_revision(revid)
321
        mark = 1
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
322
        self.revid_to_mark[revid] = mark
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
323
        file_cmds = self._get_filecommands(breezy.revision.NULL_REVISION, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
324
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.135.1 by Andy Grimm
Add --baseline option
325
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
326
    def emit_commit(self, revid, ref):
0.64.173 by Ian Clatworthy
add -r option to fast-export
327
        if revid in self.revid_to_mark or revid in self.excluded_revisions:
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
328
            return
329
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
330
        # Get the Revision object
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
331
        try:
332
            revobj = self.branch.repository.get_revision(revid)
333
        except bazErrors.NoSuchRevision:
334
            # This is a ghost revision. Mark it as not found and next!
335
            self.revid_to_mark[revid] = -1
336
            return
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
337
 
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
338
        # Get the primary parent
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
339
        # TODO: Consider the excluded revisions when deciding the parents.
340
        # Currently, a commit with parents that are excluded ought to be
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
341
        # triggering the ref calculation below (and it is not).
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
342
        # IGC 20090824
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
343
        ncommits = len(self.revid_to_mark)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
344
        nparents = len(revobj.parent_ids)
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
345
        if nparents == 0:
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
346
            if ncommits:
347
                # This is a parentless commit but it's not the first one
348
                # output. We need to create a new temporary branch for it
349
                # otherwise git-fast-import will assume the previous commit
350
                # was this one's parent
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
351
                ref = self._next_tmp_ref()
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
352
            parent = breezy.revision.NULL_REVISION
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
353
        else:
354
            parent = revobj.parent_ids[0]
355
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
356
        # Print the commit
0.98.1 by Gonéri Le Bouder
add the missing ":" since revid_to_mark are "committish"
357
        mark = ncommits + 1
0.64.221 by Ian Clatworthy
backout git-bzr fix as it was breaking fast-export
358
        self.revid_to_mark[revid] = mark
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
359
        file_cmds = self._get_filecommands(parent, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
360
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
361
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
362
        # Report progress and checkpoint if it's time for that
363
        self.report_progress(ncommits)
364
        if (self.checkpoint > 0 and ncommits
365
            and ncommits % self.checkpoint == 0):
366
            self.note("Exported %i commits - adding checkpoint to output"
367
                % ncommits)
368
            self._save_marks()
369
            self.print_cmd(commands.CheckpointCommand())
370
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
371
    def _get_name_email(self, user):
372
        if user.find('<') == -1:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
373
            # If the email isn't inside <>, we need to use it as the name
374
            # in order for things to round-trip correctly.
375
            # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
376
            name = user
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
377
            email = ''
378
        else:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
379
            name, email = parseaddr(user)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
380
        return name.encode("utf-8"), email.encode("utf-8")
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
381
382
    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
383
        # Get the committer and author info
384
        committer = revobj.committer
385
        name, email = self._get_name_email(committer)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
386
        committer_info = (name, email, revobj.timestamp, revobj.timezone)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
387
        if self._multi_author_api_available:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
388
            more_authors = revobj.get_apparent_authors()
389
            author = more_authors.pop(0)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
390
        else:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
391
            more_authors = []
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
392
            author = revobj.get_apparent_author()
0.64.291 by Jelmer Vernooij
In plain mode, don't export multiple authors.
393
        if not self.plain_format and more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
394
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
395
            author_info = (name, email, revobj.timestamp, revobj.timezone)
396
            more_author_info = []
397
            for a in more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
398
                name, email = self._get_name_email(a)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
399
                more_author_info.append(
400
                    (name, email, revobj.timestamp, revobj.timezone))
401
        elif author != committer:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
402
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
403
            author_info = (name, email, revobj.timestamp, revobj.timezone)
404
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
405
        else:
406
            author_info = None
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
407
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
408
409
        # Get the parents in terms of marks
410
        non_ghost_parents = []
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
411
        for p in revobj.parent_ids:
0.64.173 by Ian Clatworthy
add -r option to fast-export
412
            if p in self.excluded_revisions:
413
                continue
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
414
            try:
415
                parent_mark = self.revid_to_mark[p]
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
416
                non_ghost_parents.append(b":%d" % parent_mark)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
417
            except KeyError:
418
                # ghost - ignore
419
                continue
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
420
        if non_ghost_parents:
421
            from_ = non_ghost_parents[0]
422
            merges = non_ghost_parents[1:]
423
        else:
424
            from_ = None
425
            merges = None
426
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
427
        # Filter the revision properties. Some metadata (like the
428
        # author information) is already exposed in other ways so
429
        # don't repeat it here.
430
        if self.plain_format:
431
            properties = None
432
        else:
433
            properties = revobj.properties
434
            for prop in self.properties_to_exclude:
435
                try:
436
                    del properties[prop]
437
                except KeyError:
438
                    pass
439
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
440
        # Build and return the result
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
441
        return commands.CommitCommand(git_ref, mark, author_info,
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
442
            committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds),
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
443
            more_authors=more_author_info, properties=properties)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
444
445
    def _get_revision_trees(self, parent, revision_id):
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
446
        try:
447
            tree_old = self.branch.repository.revision_tree(parent)
448
        except bazErrors.UnexpectedInventoryFormat:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
449
            self.warning("Parent is malformed - diffing against previous parent")
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
450
            # We can't find the old parent. Let's diff against his parent
451
            pp = self.branch.repository.get_revision(parent)
452
            tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
453
        tree_new = None
454
        try:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
455
            tree_new = self.branch.repository.revision_tree(revision_id)
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
456
        except bazErrors.UnexpectedInventoryFormat:
457
            # We can't really do anything anymore
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
458
            self.warning("Revision %s is malformed - skipping" % revision_id)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
459
        return tree_old, tree_new
0.68.1 by Pieter de Bie
Classify bzr-fast-export
460
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
461
    def _get_filecommands(self, parent, revision_id):
462
        """Get the list of FileCommands for the changes between two revisions."""
463
        tree_old, tree_new = self._get_revision_trees(parent, revision_id)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
464
        if not(tree_old and tree_new):
465
            # Something is wrong with this revision - ignore the filecommands
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
466
            return []
0.68.1 by Pieter de Bie
Classify bzr-fast-export
467
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
468
        changes = tree_new.changes_from(tree_old)
469
470
        # Make "modified" have 3-tuples, as added does
471
        my_modified = [ x[0:3] for x in changes.modified ]
472
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
473
        # The potential interaction between renames and deletes is messy.
474
        # Handle it here ...
475
        file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
476
            changes.renamed, changes.removed, revision_id, tree_old)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
477
478
        # Map kind changes to a delete followed by an add
479
        for path, id_, kind1, kind2 in changes.kind_changed:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
480
            path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
481
            # IGC: I don't understand why a delete is needed here.
482
            # In fact, it seems harmful? If you uncomment this line,
483
            # please file a bug explaining why you needed to.
484
            #file_cmds.append(commands.FileDeleteCommand(path))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
485
            my_modified.append((path, id_, kind2))
486
487
        # Record modifications
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
488
        for path, id_, kind in changes.added + my_modified + rd_modifies:
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
489
            if kind == 'file':
6809.4.5 by Jelmer Vernooij
Swap arguments for get_file_*.
490
                text = tree_new.get_file_text(path, id_)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
491
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
6809.4.21 by Jelmer Vernooij
Fix long lines.
492
                    helpers.kind_to_mode(
493
                        'file', tree_new.is_executable(path, id_)),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
494
                    None, text))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
495
            elif kind == 'symlink':
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
496
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
497
                    helpers.kind_to_mode('symlink', False),
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
498
                    None, tree_new.get_symlink_target(path, id_)))
0.102.14 by Ian Clatworthy
export and import empty directories
499
            elif kind == 'directory':
0.105.1 by John Whitley
Don't emit directory info when plain format is specified.
500
                if not self.plain_format:
6809.4.21 by Jelmer Vernooij
Fix long lines.
501
                    file_cmds.append(
502
                            commands.FileModifyCommand(path.encode("utf-8"),
503
                                helpers.kind_to_mode('directory', False), None,
504
                                None))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
505
            else:
0.102.14 by Ian Clatworthy
export and import empty directories
506
                self.warning("cannot export '%s' of kind %s yet - ignoring" %
507
                    (path, kind))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
508
        return file_cmds
509
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
510
    def _process_renames_and_deletes(self, renames, deletes,
511
        revision_id, tree_old):
512
        file_cmds = []
513
        modifies = []
514
        renamed = []
515
516
        # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933.
517
        # In a nutshell, there are several nasty cases:
518
        #
519
        # 1) bzr rm a; bzr mv b a; bzr commit
520
        # 2) bzr mv x/y z; bzr rm x; commmit
521
        #
522
        # The first must come out with the delete first like this:
523
        #
524
        # D a
525
        # R b a
526
        #
527
        # The second case must come out with the rename first like this:
528
        #
529
        # R x/y z
530
        # D x
531
        #
532
        # So outputting all deletes first or all renames first won't work.
533
        # Instead, we need to make multiple passes over the various lists to
534
        # get the ordering right.
535
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
536
        must_be_renamed = {}
537
        old_to_new = {}
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
538
        deleted_paths = set([p for p, _, _ in deletes])
539
        for (oldpath, newpath, id_, kind,
540
                text_modified, meta_modified) in renames:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
541
            emit = kind != 'directory' or not self.plain_format
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
542
            if newpath in deleted_paths:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
543
                if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
544
                    file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
545
                deleted_paths.remove(newpath)
546
            if (self.is_empty_dir(tree_old, oldpath)):
547
                self.note("Skipping empty dir %s in rev %s" % (oldpath,
548
                    revision_id))
549
                continue
550
            #oldpath = self._adjust_path_for_renames(oldpath, renamed,
551
            #    revision_id)
552
            renamed.append([oldpath, newpath])
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
553
            old_to_new[oldpath] = newpath
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
554
            if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
555
                file_cmds.append(
556
                    commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
557
            if text_modified or meta_modified:
558
                modifies.append((newpath, id_, kind))
559
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
560
            # Renaming a directory implies all children must be renamed.
561
            # Note: changes_from() doesn't handle this
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
562
            if kind == 'directory' and tree_old.kind(oldpath, id_) == 'directory':
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
563
                for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
564
                    if e.kind == 'directory' and self.plain_format:
565
                        continue
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
566
                    old_child_path = osutils.pathjoin(oldpath, p)
567
                    new_child_path = osutils.pathjoin(newpath, p)
568
                    must_be_renamed[old_child_path] = new_child_path
569
570
        # Add children not already renamed
571
        if must_be_renamed:
572
            renamed_already = set(old_to_new.keys())
573
            still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
574
            for old_child_path in sorted(still_to_be_renamed):
575
                new_child_path = must_be_renamed[old_child_path]
576
                if self.verbose:
577
                    self.note("implicitly renaming %s => %s" % (old_child_path,
578
                        new_child_path))
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
579
                file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"),
580
                    new_child_path.encode("utf-8")))
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
581
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
582
        # Record remaining deletes
583
        for path, id_, kind in deletes:
584
            if path not in deleted_paths:
585
                continue
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
586
            if kind == 'directory' and self.plain_format:
587
                continue
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
588
            #path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
589
            file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
590
        return file_cmds, modifies, renamed
591
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
592
    def _adjust_path_for_renames(self, path, renamed, revision_id):
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
593
        # If a previous rename is found, we should adjust the path
594
        for old, new in renamed:
595
            if path == old:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
596
                self.note("Changing path %s given rename to %s in revision %s"
597
                    % (path, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
598
                path = new
599
            elif path.startswith(old + '/'):
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
600
                self.note(
601
                    "Adjusting path %s given rename of %s to %s in revision %s"
602
                    % (path, old, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
603
                path = path.replace(old + "/", new + "/")
604
        return path
605
0.68.1 by Pieter de Bie
Classify bzr-fast-export
606
    def emit_tags(self):
607
        for tag, revid in self.branch.tags.get_tag_dict().items():
608
            try:
609
                mark = self.revid_to_mark[revid]
610
            except KeyError:
0.79.4 by Ian Clatworthy
use note and warning APIs
611
                self.warning('not creating tag %r pointing to non-existent '
612
                    'revision %s' % (tag, revid))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
613
            else:
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
614
                git_ref = b'refs/tags/%s' % tag.encode("utf-8")
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
615
                if self.plain_format and not check_ref_format(git_ref):
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
616
                    if self.rewrite_tags:
0.133.3 by Oleksandr Usov
Implement comments from patch review:
617
                        new_ref = sanitize_ref_name_for_git(git_ref)
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
618
                        self.warning('tag %r is exported as %r to be valid in git.',
619
                                     git_ref, new_ref)
620
                        git_ref = new_ref
0.133.3 by Oleksandr Usov
Implement comments from patch review:
621
                    else:
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
622
                        self.warning('not creating tag %r as its name would not be '
623
                                     'valid in git.', git_ref)
624
                        continue
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
625
                self.print_cmd(commands.ResetCommand(git_ref, b":%d" % mark))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
626
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
627
    def _next_tmp_ref(self):
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
628
        """Return a unique branch name. The name will start with "tmp"."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
629
        prefix = 'tmp'
0.68.1 by Pieter de Bie
Classify bzr-fast-export
630
        if prefix not in self.branch_names:
631
            self.branch_names[prefix] = 0
632
        else:
633
            self.branch_names[prefix] += 1
634
            prefix = '%s.%d' % (prefix, self.branch_names[prefix])
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
635
        return 'refs/heads/%s' % prefix