/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.128 by Ian Clatworthy
fix encoding issue in bzr_exporter (Teemu Likonen)
1
# -*- coding: utf-8 -*-
2
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
3
# Copyright (C) 2008 Canonical Ltd
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
16
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
17
#
0.64.333 by Jelmer Vernooij
Inline bzr-fast-export license.
18
# Based on bzr-fast-export
19
# Copyright (c) 2008 Adeodato Simó
20
#
21
# Permission is hereby granted, free of charge, to any person obtaining
22
# a copy of this software and associated documentation files (the
23
# "Software"), to deal in the Software without restriction, including
24
# without limitation the rights to use, copy, modify, merge, publish,
25
# distribute, sublicense, and/or sell copies of the Software, and to
26
# permit persons to whom the Software is furnished to do so, subject to
27
# the following conditions:
28
#
29
# The above copyright notice and this permission notice shall be included
30
# in all copies or substantial portions of the Software.
31
#
32
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
35
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
36
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
37
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
38
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0.79.10 by Ian Clatworthy
documentation clean-ups
39
#
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
40
# vim: fileencoding=utf-8
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
41
42
"""Core engine for the fast-export command."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
43
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
44
from __future__ import absolute_import
45
0.79.7 by Ian Clatworthy
trivial bzr_exporter clean-ups
46
# TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
47
# is not updated (because the parent of commit is already merged, so we don't
48
# set new_git_branch to the previously used name)
49
6791.2.4 by Jelmer Vernooij
Fix python3isms.
50
try:
51
    from email.utils import parseaddr
52
except ImportError:  # python < 3
53
    from email.Utils import parseaddr
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
54
import sys, time, re
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
55
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
56
import breezy.branch
57
import breezy.revision
58
from ... import (
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
59
    builtins,
60
    errors as bazErrors,
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
61
    lazy_import,
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
62
    osutils,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
63
    progress,
64
    trace,
65
    )
7045.3.1 by Jelmer Vernooij
Fix another ~500 tests.
66
from ...sixish import (
67
    int2byte,
7045.4.18 by Jelmer Vernooij
Fix compatibility with older versions of fastimport.
68
    PY3,
7045.3.1 by Jelmer Vernooij
Fix another ~500 tests.
69
    viewitems,
70
    )
0.79.4 by Ian Clatworthy
use note and warning APIs
71
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
72
from . import (
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
73
    helpers,
74
    marks_file,
75
    )
0.79.2 by Ian Clatworthy
extend & use marks_file API
76
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
77
lazy_import.lazy_import(globals(),
78
"""
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
79
from fastimport import commands
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
80
""")
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
81
82
83
def _get_output_stream(destination):
84
    if destination is None or destination == '-':
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
85
        return helpers.binary_stream(sys.stdout)
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
86
    elif destination.endswith('gz'):
87
        import gzip
88
        return gzip.open(destination, 'wb')
89
    else:
90
        return open(destination, 'wb')
91
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
92
# from dulwich.repo:
93
def check_ref_format(refname):
94
    """Check if a refname is correctly formatted.
95
96
    Implements all the same rules as git-check-ref-format[1].
97
98
    [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
99
100
    :param refname: The refname to check
101
    :return: True if refname is valid, False otherwise
102
    """
103
    # These could be combined into one big expression, but are listed separately
104
    # to parallel [1].
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
105
    if b'/.' in refname or refname.startswith(b'.'):
106
        return False
107
    if b'/' not in refname:
108
        return False
109
    if b'..' in refname:
110
        return False
111
    for i in range(len(refname)):
112
        if ord(refname[i:i+1]) < 0o40 or refname[i] in b'\177 ~^:?*[':
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
113
            return False
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
114
    if refname[-1] in b'/.':
115
        return False
116
    if refname.endswith(b'.lock'):
117
        return False
118
    if b'@{' in refname:
119
        return False
120
    if b'\\' in refname:
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
121
        return False
122
    return True
123
0.133.3 by Oleksandr Usov
Implement comments from patch review:
124
125
def sanitize_ref_name_for_git(refname):
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
126
    """Rewrite refname so that it will be accepted by git-fast-import.
127
    For the detailed rules see check_ref_format.
128
129
    By rewriting the refname we are breaking uniqueness guarantees provided by bzr
130
    so we have to manually
131
    verify that resulting ref names are unique.
132
133
    :param refname: refname to rewrite
134
    :return: new refname
135
    """
0.133.3 by Oleksandr Usov
Implement comments from patch review:
136
    new_refname = re.sub(
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
137
        # '/.' in refname or startswith '.'
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
138
        br"/\.|^\."
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
139
        # '..' in refname
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
140
        br"|\.\."
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
141
        # ord(c) < 040
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
142
        br"|[" + b"".join([int2byte(x) for x in range(0o40)]) + br"]"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
143
        # c in '\177 ~^:?*['
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
144
        br"|[\177 ~^:?*[]"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
145
        # last char in "/."
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
146
        br"|[/.]$"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
147
        # endswith '.lock'
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
148
        br"|.lock$"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
149
        # "@{" in refname
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
150
        br"|@{"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
151
        # "\\" in refname
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
152
        br"|\\",
153
        b"_", refname)
0.133.3 by Oleksandr Usov
Implement comments from patch review:
154
    return new_refname
0.64.173 by Ian Clatworthy
add -r option to fast-export
155
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
156
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
157
class BzrFastExporter(object):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
158
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
159
    def __init__(self, source, outf, ref=None, checkpoint=-1,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
160
        import_marks_file=None, export_marks_file=None, revision=None,
0.135.1 by Andy Grimm
Add --baseline option
161
        verbose=False, plain_format=False, rewrite_tags=False,
0.138.1 by Oleksandr Usov
Add --no-tags flag
162
        no_tags=False, baseline=False):
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
163
        """Export branch data in fast import format.
164
165
        :param plain_format: if True, 'classic' fast-import format is
0.64.337 by Jelmer Vernooij
Merge support for --rewrite-tag-names.
166
            used without any extended features; if False, the generated
167
            data is richer and includes information like multiple
168
            authors, revision properties, etc.
169
        :param rewrite_tags: if True and if plain_format is set, tag names
170
            will be rewritten to be git-compatible.
171
            Otherwise tags which aren't valid for git will be skipped if
172
            plain_format is set.
0.138.1 by Oleksandr Usov
Add --no-tags flag
173
        :param no_tags: if True tags won't be exported at all
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
174
        """
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
175
        self.branch = source
176
        self.outf = outf
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
177
        self.ref = ref
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
178
        self.checkpoint = checkpoint
179
        self.import_marks_file = import_marks_file
180
        self.export_marks_file = export_marks_file
0.64.173 by Ian Clatworthy
add -r option to fast-export
181
        self.revision = revision
182
        self.excluded_revisions = set()
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
183
        self.plain_format = plain_format
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
184
        self.rewrite_tags = rewrite_tags
0.138.1 by Oleksandr Usov
Add --no-tags flag
185
        self.no_tags = no_tags
0.135.1 by Andy Grimm
Add --baseline option
186
        self.baseline = baseline
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
187
        self._multi_author_api_available = hasattr(breezy.revision.Revision,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
188
            'get_apparent_authors')
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
189
        self.properties_to_exclude = ['authors', 'author']
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
190
191
        # Progress reporting stuff
192
        self.verbose = verbose
193
        if verbose:
194
            self.progress_every = 100
195
        else:
196
            self.progress_every = 1000
197
        self._start_time = time.time()
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
198
        self._commit_total = 0
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
199
200
        # Load the marks and initialise things accordingly
0.68.1 by Pieter de Bie
Classify bzr-fast-export
201
        self.revid_to_mark = {}
202
        self.branch_names = {}
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
203
        if self.import_marks_file:
0.79.2 by Ian Clatworthy
extend & use marks_file API
204
            marks_info = marks_file.import_marks(self.import_marks_file)
205
            if marks_info is not None:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
206
                self.revid_to_mark = dict((r, m) for m, r in
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
207
                    marks_info.items())
208
                # These are no longer included in the marks file
209
                #self.branch_names = marks_info[1]
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
210
0.64.173 by Ian Clatworthy
add -r option to fast-export
211
    def interesting_history(self):
212
        if self.revision:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
213
            rev1, rev2 = builtins._get_revision_range(self.revision,
214
                self.branch, "fast-export")
0.64.173 by Ian Clatworthy
add -r option to fast-export
215
            start_rev_id = rev1.rev_id
216
            end_rev_id = rev2.rev_id
217
        else:
218
            start_rev_id = None
219
            end_rev_id = None
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
220
        self.note("Calculating the revisions to include ...")
0.64.341 by Jelmer Vernooij
Fix test, clarify help description for 'bzr fast-export'.
221
        view_revisions = [rev_id for rev_id, _, _, _ in
222
            self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)]
223
        view_revisions.reverse()
0.64.173 by Ian Clatworthy
add -r option to fast-export
224
        # If a starting point was given, we need to later check that we don't
225
        # start emitting revisions from before that point. Collect the
226
        # revisions to exclude now ...
227
        if start_rev_id is not None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
228
            self.note("Calculating the revisions to exclude ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
229
            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
230
                self.branch.iter_merge_sorted_revisions(start_rev_id)])
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
231
            if self.baseline:
232
                # needed so the first relative commit knows its parent
233
                self.excluded_revisions.remove(start_rev_id)
234
                view_revisions.insert(0, start_rev_id)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
235
        return list(view_revisions)
0.64.173 by Ian Clatworthy
add -r option to fast-export
236
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
237
    def run(self):
238
        # Export the data
6754.8.4 by Jelmer Vernooij
Use new context stuff.
239
        with self.branch.repository.lock_read():
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
240
            interesting = self.interesting_history()
0.102.15 by Ian Clatworthy
add revision count to 'Starting export ...' message
241
            self._commit_total = len(interesting)
242
            self.note("Starting export of %d revisions ..." %
243
                self._commit_total)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
244
            if not self.plain_format:
245
                self.emit_features()
0.135.1 by Andy Grimm
Add --baseline option
246
            if self.baseline:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
247
                self.emit_baseline(interesting.pop(0), self.ref)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
248
            for revid in interesting:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
249
                self.emit_commit(revid, self.ref)
0.138.1 by Oleksandr Usov
Add --no-tags flag
250
            if self.branch.supports_tags() and not self.no_tags:
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
251
                self.emit_tags()
0.68.1 by Pieter de Bie
Classify bzr-fast-export
252
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
253
        # Save the marks if requested
0.79.2 by Ian Clatworthy
extend & use marks_file API
254
        self._save_marks()
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
255
        self.dump_stats()
256
257
    def note(self, msg, *args):
258
        """Output a note but timestamp it."""
259
        msg = "%s %s" % (self._time_of_day(), msg)
260
        trace.note(msg, *args)
261
262
    def warning(self, msg, *args):
263
        """Output a warning but timestamp it."""
264
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
265
        trace.warning(msg, *args)
266
267
    def _time_of_day(self):
268
        """Time of day as a string."""
269
        # Note: this is a separate method so tests can patch in a fixed value
270
        return time.strftime("%H:%M:%S")
271
272
    def report_progress(self, commit_count, details=''):
273
        if commit_count and commit_count % self.progress_every == 0:
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
274
            if self._commit_total:
275
                counts = "%d/%d" % (commit_count, self._commit_total)
276
            else:
277
                counts = "%d" % (commit_count,)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
278
            minutes = (time.time() - self._start_time) / 60
279
            rate = commit_count * 1.0 / minutes
280
            if rate > 10:
281
                rate_str = "at %.0f/minute " % rate
282
            else:
283
                rate_str = "at %.1f/minute " % rate
284
            self.note("%s commits exported %s%s" % (counts, rate_str, details))
285
286
    def dump_stats(self):
287
        time_required = progress.str_tdelta(time.time() - self._start_time)
288
        rc = len(self.revid_to_mark)
289
        self.note("Exported %d %s in %s",
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
290
            rc, helpers.single_plural(rc, "revision", "revisions"),
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
291
            time_required)
0.79.2 by Ian Clatworthy
extend & use marks_file API
292
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
293
    def print_cmd(self, cmd):
7045.4.18 by Jelmer Vernooij
Fix compatibility with older versions of fastimport.
294
        if PY3:
295
            self.outf.write(b"%s\n" % cmd)
296
        else:
297
            self.outf.write(b"%r\n" % cmd)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
298
0.79.2 by Ian Clatworthy
extend & use marks_file API
299
    def _save_marks(self):
300
        if self.export_marks_file:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
301
            revision_ids = dict((m, r) for r, m in self.revid_to_mark.items())
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
302
            marks_file.export_marks(self.export_marks_file, revision_ids)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
303
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
304
    def is_empty_dir(self, tree, path):
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
305
        # Continue if path is not a directory
306
        try:
307
            if tree.kind(path) != 'directory':
308
                return False
309
        except bazErrors.NoSuchFile:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
310
            self.warning("Skipping empty_dir detection - no file_id for %s" %
311
                (path,))
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
312
            return False
313
314
        # Use treewalk to find the contents of our directory
315
        contents = list(tree.walkdirs(prefix=path))[0]
316
        if len(contents[1]) == 0:
317
            return True
318
        else:
319
            return False
320
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
321
    def emit_features(self):
0.102.5 by Ian Clatworthy
Define feature names in one place
322
        for feature in sorted(commands.FEATURE_NAMES):
323
            self.print_cmd(commands.FeatureCommand(feature))
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
324
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
325
    def emit_baseline(self, revid, ref):
0.135.1 by Andy Grimm
Add --baseline option
326
        # Emit a full source tree of the first commit's parent
327
        revobj = self.branch.repository.get_revision(revid)
328
        mark = 1
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
329
        self.revid_to_mark[revid] = mark
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
330
        file_cmds = self._get_filecommands(breezy.revision.NULL_REVISION, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
331
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.135.1 by Andy Grimm
Add --baseline option
332
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
333
    def emit_commit(self, revid, ref):
0.64.173 by Ian Clatworthy
add -r option to fast-export
334
        if revid in self.revid_to_mark or revid in self.excluded_revisions:
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
335
            return
336
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
337
        # Get the Revision object
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
338
        try:
339
            revobj = self.branch.repository.get_revision(revid)
340
        except bazErrors.NoSuchRevision:
341
            # This is a ghost revision. Mark it as not found and next!
342
            self.revid_to_mark[revid] = -1
343
            return
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
344
 
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
345
        # Get the primary parent
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
346
        # TODO: Consider the excluded revisions when deciding the parents.
347
        # Currently, a commit with parents that are excluded ought to be
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
348
        # triggering the ref calculation below (and it is not).
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
349
        # IGC 20090824
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
350
        ncommits = len(self.revid_to_mark)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
351
        nparents = len(revobj.parent_ids)
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
352
        if nparents == 0:
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
353
            if ncommits:
354
                # This is a parentless commit but it's not the first one
355
                # output. We need to create a new temporary branch for it
356
                # otherwise git-fast-import will assume the previous commit
357
                # was this one's parent
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
358
                ref = self._next_tmp_ref()
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
359
            parent = breezy.revision.NULL_REVISION
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
360
        else:
361
            parent = revobj.parent_ids[0]
362
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
363
        # Print the commit
0.98.1 by Gonéri Le Bouder
add the missing ":" since revid_to_mark are "committish"
364
        mark = ncommits + 1
0.64.221 by Ian Clatworthy
backout git-bzr fix as it was breaking fast-export
365
        self.revid_to_mark[revid] = mark
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
366
        file_cmds = self._get_filecommands(parent, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
367
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
368
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
369
        # Report progress and checkpoint if it's time for that
370
        self.report_progress(ncommits)
7045.4.8 by Jelmer Vernooij
Fix another 128 tests on python 3.
371
        if (self.checkpoint is not None and self.checkpoint > 0 and ncommits
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
372
            and ncommits % self.checkpoint == 0):
373
            self.note("Exported %i commits - adding checkpoint to output"
374
                % ncommits)
375
            self._save_marks()
376
            self.print_cmd(commands.CheckpointCommand())
377
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
378
    def _get_name_email(self, user):
379
        if user.find('<') == -1:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
380
            # If the email isn't inside <>, we need to use it as the name
381
            # in order for things to round-trip correctly.
382
            # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
383
            name = user
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
384
            email = ''
385
        else:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
386
            name, email = parseaddr(user)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
387
        return name.encode("utf-8"), email.encode("utf-8")
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
388
389
    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
390
        # Get the committer and author info
391
        committer = revobj.committer
392
        name, email = self._get_name_email(committer)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
393
        committer_info = (name, email, revobj.timestamp, revobj.timezone)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
394
        if self._multi_author_api_available:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
395
            more_authors = revobj.get_apparent_authors()
396
            author = more_authors.pop(0)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
397
        else:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
398
            more_authors = []
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
399
            author = revobj.get_apparent_author()
0.64.291 by Jelmer Vernooij
In plain mode, don't export multiple authors.
400
        if not self.plain_format and more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
401
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
402
            author_info = (name, email, revobj.timestamp, revobj.timezone)
403
            more_author_info = []
404
            for a in more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
405
                name, email = self._get_name_email(a)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
406
                more_author_info.append(
407
                    (name, email, revobj.timestamp, revobj.timezone))
408
        elif author != committer:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
409
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
410
            author_info = (name, email, revobj.timestamp, revobj.timezone)
411
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
412
        else:
413
            author_info = None
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
414
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
415
416
        # Get the parents in terms of marks
417
        non_ghost_parents = []
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
418
        for p in revobj.parent_ids:
0.64.173 by Ian Clatworthy
add -r option to fast-export
419
            if p in self.excluded_revisions:
420
                continue
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
421
            try:
422
                parent_mark = self.revid_to_mark[p]
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
423
                non_ghost_parents.append(b":%d" % parent_mark)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
424
            except KeyError:
425
                # ghost - ignore
426
                continue
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
427
        if non_ghost_parents:
428
            from_ = non_ghost_parents[0]
429
            merges = non_ghost_parents[1:]
430
        else:
431
            from_ = None
432
            merges = None
433
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
434
        # Filter the revision properties. Some metadata (like the
435
        # author information) is already exposed in other ways so
436
        # don't repeat it here.
437
        if self.plain_format:
438
            properties = None
439
        else:
440
            properties = revobj.properties
441
            for prop in self.properties_to_exclude:
442
                try:
443
                    del properties[prop]
444
                except KeyError:
445
                    pass
446
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
447
        # Build and return the result
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
448
        return commands.CommitCommand(git_ref, mark, author_info,
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
449
            committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds),
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
450
            more_authors=more_author_info, properties=properties)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
451
452
    def _get_revision_trees(self, parent, revision_id):
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
453
        try:
454
            tree_old = self.branch.repository.revision_tree(parent)
455
        except bazErrors.UnexpectedInventoryFormat:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
456
            self.warning("Parent is malformed - diffing against previous parent")
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
457
            # We can't find the old parent. Let's diff against his parent
458
            pp = self.branch.repository.get_revision(parent)
459
            tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
460
        tree_new = None
461
        try:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
462
            tree_new = self.branch.repository.revision_tree(revision_id)
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
463
        except bazErrors.UnexpectedInventoryFormat:
464
            # We can't really do anything anymore
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
465
            self.warning("Revision %s is malformed - skipping" % revision_id)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
466
        return tree_old, tree_new
0.68.1 by Pieter de Bie
Classify bzr-fast-export
467
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
468
    def _get_filecommands(self, parent, revision_id):
469
        """Get the list of FileCommands for the changes between two revisions."""
470
        tree_old, tree_new = self._get_revision_trees(parent, revision_id)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
471
        if not(tree_old and tree_new):
472
            # Something is wrong with this revision - ignore the filecommands
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
473
            return []
0.68.1 by Pieter de Bie
Classify bzr-fast-export
474
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
475
        changes = tree_new.changes_from(tree_old)
476
477
        # Make "modified" have 3-tuples, as added does
478
        my_modified = [ x[0:3] for x in changes.modified ]
479
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
480
        # The potential interaction between renames and deletes is messy.
481
        # Handle it here ...
482
        file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
483
            changes.renamed, changes.removed, revision_id, tree_old)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
484
485
        # Map kind changes to a delete followed by an add
486
        for path, id_, kind1, kind2 in changes.kind_changed:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
487
            path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
488
            # IGC: I don't understand why a delete is needed here.
489
            # In fact, it seems harmful? If you uncomment this line,
490
            # please file a bug explaining why you needed to.
491
            #file_cmds.append(commands.FileDeleteCommand(path))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
492
            my_modified.append((path, id_, kind2))
493
494
        # Record modifications
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
495
        for path, id_, kind in changes.added + my_modified + rd_modifies:
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
496
            if kind == 'file':
6809.4.5 by Jelmer Vernooij
Swap arguments for get_file_*.
497
                text = tree_new.get_file_text(path, id_)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
498
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
6809.4.21 by Jelmer Vernooij
Fix long lines.
499
                    helpers.kind_to_mode(
500
                        'file', tree_new.is_executable(path, id_)),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
501
                    None, text))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
502
            elif kind == 'symlink':
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
503
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
504
                    helpers.kind_to_mode('symlink', False),
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
505
                    None, tree_new.get_symlink_target(path, id_)))
0.102.14 by Ian Clatworthy
export and import empty directories
506
            elif kind == 'directory':
0.105.1 by John Whitley
Don't emit directory info when plain format is specified.
507
                if not self.plain_format:
6809.4.21 by Jelmer Vernooij
Fix long lines.
508
                    file_cmds.append(
509
                            commands.FileModifyCommand(path.encode("utf-8"),
510
                                helpers.kind_to_mode('directory', False), None,
511
                                None))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
512
            else:
0.102.14 by Ian Clatworthy
export and import empty directories
513
                self.warning("cannot export '%s' of kind %s yet - ignoring" %
514
                    (path, kind))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
515
        return file_cmds
516
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
517
    def _process_renames_and_deletes(self, renames, deletes,
518
        revision_id, tree_old):
519
        file_cmds = []
520
        modifies = []
521
        renamed = []
522
523
        # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933.
524
        # In a nutshell, there are several nasty cases:
525
        #
526
        # 1) bzr rm a; bzr mv b a; bzr commit
527
        # 2) bzr mv x/y z; bzr rm x; commmit
528
        #
529
        # The first must come out with the delete first like this:
530
        #
531
        # D a
532
        # R b a
533
        #
534
        # The second case must come out with the rename first like this:
535
        #
536
        # R x/y z
537
        # D x
538
        #
539
        # So outputting all deletes first or all renames first won't work.
540
        # Instead, we need to make multiple passes over the various lists to
541
        # get the ordering right.
542
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
543
        must_be_renamed = {}
544
        old_to_new = {}
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
545
        deleted_paths = set([p for p, _, _ in deletes])
546
        for (oldpath, newpath, id_, kind,
547
                text_modified, meta_modified) in renames:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
548
            emit = kind != 'directory' or not self.plain_format
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
549
            if newpath in deleted_paths:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
550
                if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
551
                    file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
552
                deleted_paths.remove(newpath)
553
            if (self.is_empty_dir(tree_old, oldpath)):
554
                self.note("Skipping empty dir %s in rev %s" % (oldpath,
555
                    revision_id))
556
                continue
557
            #oldpath = self._adjust_path_for_renames(oldpath, renamed,
558
            #    revision_id)
559
            renamed.append([oldpath, newpath])
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
560
            old_to_new[oldpath] = newpath
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
561
            if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
562
                file_cmds.append(
563
                    commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
564
            if text_modified or meta_modified:
565
                modifies.append((newpath, id_, kind))
566
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
567
            # Renaming a directory implies all children must be renamed.
568
            # Note: changes_from() doesn't handle this
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
569
            if kind == 'directory' and tree_old.kind(oldpath, id_) == 'directory':
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
570
                for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
571
                    if e.kind == 'directory' and self.plain_format:
572
                        continue
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
573
                    old_child_path = osutils.pathjoin(oldpath, p)
574
                    new_child_path = osutils.pathjoin(newpath, p)
575
                    must_be_renamed[old_child_path] = new_child_path
576
577
        # Add children not already renamed
578
        if must_be_renamed:
579
            renamed_already = set(old_to_new.keys())
580
            still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
581
            for old_child_path in sorted(still_to_be_renamed):
582
                new_child_path = must_be_renamed[old_child_path]
583
                if self.verbose:
584
                    self.note("implicitly renaming %s => %s" % (old_child_path,
585
                        new_child_path))
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
586
                file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"),
587
                    new_child_path.encode("utf-8")))
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
588
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
589
        # Record remaining deletes
590
        for path, id_, kind in deletes:
591
            if path not in deleted_paths:
592
                continue
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
593
            if kind == 'directory' and self.plain_format:
594
                continue
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
595
            #path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
596
            file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
597
        return file_cmds, modifies, renamed
598
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
599
    def _adjust_path_for_renames(self, path, renamed, revision_id):
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
600
        # If a previous rename is found, we should adjust the path
601
        for old, new in renamed:
602
            if path == old:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
603
                self.note("Changing path %s given rename to %s in revision %s"
604
                    % (path, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
605
                path = new
606
            elif path.startswith(old + '/'):
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
607
                self.note(
608
                    "Adjusting path %s given rename of %s to %s in revision %s"
609
                    % (path, old, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
610
                path = path.replace(old + "/", new + "/")
611
        return path
612
0.68.1 by Pieter de Bie
Classify bzr-fast-export
613
    def emit_tags(self):
7045.3.1 by Jelmer Vernooij
Fix another ~500 tests.
614
        for tag, revid in viewitems(self.branch.tags.get_tag_dict()):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
615
            try:
616
                mark = self.revid_to_mark[revid]
617
            except KeyError:
0.79.4 by Ian Clatworthy
use note and warning APIs
618
                self.warning('not creating tag %r pointing to non-existent '
619
                    'revision %s' % (tag, revid))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
620
            else:
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
621
                git_ref = b'refs/tags/%s' % tag.encode("utf-8")
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
622
                if self.plain_format and not check_ref_format(git_ref):
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
623
                    if self.rewrite_tags:
0.133.3 by Oleksandr Usov
Implement comments from patch review:
624
                        new_ref = sanitize_ref_name_for_git(git_ref)
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
625
                        self.warning('tag %r is exported as %r to be valid in git.',
626
                                     git_ref, new_ref)
627
                        git_ref = new_ref
0.133.3 by Oleksandr Usov
Implement comments from patch review:
628
                    else:
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
629
                        self.warning('not creating tag %r as its name would not be '
630
                                     'valid in git.', git_ref)
631
                        continue
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
632
                self.print_cmd(commands.ResetCommand(git_ref, b":%d" % mark))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
633
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
634
    def _next_tmp_ref(self):
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
635
        """Return a unique branch name. The name will start with "tmp"."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
636
        prefix = 'tmp'
0.68.1 by Pieter de Bie
Classify bzr-fast-export
637
        if prefix not in self.branch_names:
638
            self.branch_names[prefix] = 0
639
        else:
640
            self.branch_names[prefix] += 1
641
            prefix = '%s.%d' % (prefix, self.branch_names[prefix])
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
642
        return 'refs/heads/%s' % prefix