/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.128 by Ian Clatworthy
fix encoding issue in bzr_exporter (Teemu Likonen)
1
# -*- coding: utf-8 -*-
2
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
3
# Copyright (C) 2008 Canonical Ltd
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
16
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
17
#
0.64.333 by Jelmer Vernooij
Inline bzr-fast-export license.
18
# Based on bzr-fast-export
19
# Copyright (c) 2008 Adeodato Simó
20
#
21
# Permission is hereby granted, free of charge, to any person obtaining
22
# a copy of this software and associated documentation files (the
23
# "Software"), to deal in the Software without restriction, including
24
# without limitation the rights to use, copy, modify, merge, publish,
25
# distribute, sublicense, and/or sell copies of the Software, and to
26
# permit persons to whom the Software is furnished to do so, subject to
27
# the following conditions:
28
#
29
# The above copyright notice and this permission notice shall be included
30
# in all copies or substantial portions of the Software.
31
#
32
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
35
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
36
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
37
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
38
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0.79.10 by Ian Clatworthy
documentation clean-ups
39
#
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
40
# vim: fileencoding=utf-8
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
41
42
"""Core engine for the fast-export command."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
43
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
44
from __future__ import absolute_import
45
0.79.7 by Ian Clatworthy
trivial bzr_exporter clean-ups
46
# TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
47
# is not updated (because the parent of commit is already merged, so we don't
48
# set new_git_branch to the previously used name)
49
6791.2.4 by Jelmer Vernooij
Fix python3isms.
50
try:
51
    from email.utils import parseaddr
52
except ImportError:  # python < 3
53
    from email.Utils import parseaddr
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
54
import sys, time, re
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
55
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
56
import breezy.branch
57
import breezy.revision
58
from ... import (
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
59
    builtins,
60
    errors as bazErrors,
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
61
    lazy_import,
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
62
    osutils,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
63
    progress,
64
    trace,
65
    )
7045.3.1 by Jelmer Vernooij
Fix another ~500 tests.
66
from ...sixish import (
67
    int2byte,
68
    viewitems,
69
    )
0.79.4 by Ian Clatworthy
use note and warning APIs
70
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
71
from . import (
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
72
    helpers,
73
    marks_file,
74
    )
0.79.2 by Ian Clatworthy
extend & use marks_file API
75
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
76
lazy_import.lazy_import(globals(),
77
"""
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
78
from fastimport import commands
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
79
""")
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
80
81
82
def _get_output_stream(destination):
83
    if destination is None or destination == '-':
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
84
        return helpers.binary_stream(sys.stdout)
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
85
    elif destination.endswith('gz'):
86
        import gzip
87
        return gzip.open(destination, 'wb')
88
    else:
89
        return open(destination, 'wb')
90
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
91
# from dulwich.repo:
92
def check_ref_format(refname):
93
    """Check if a refname is correctly formatted.
94
95
    Implements all the same rules as git-check-ref-format[1].
96
97
    [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
98
99
    :param refname: The refname to check
100
    :return: True if refname is valid, False otherwise
101
    """
102
    # These could be combined into one big expression, but are listed separately
103
    # to parallel [1].
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
104
    if b'/.' in refname or refname.startswith(b'.'):
105
        return False
106
    if b'/' not in refname:
107
        return False
108
    if b'..' in refname:
109
        return False
110
    for i in range(len(refname)):
111
        if ord(refname[i:i+1]) < 0o40 or refname[i] in b'\177 ~^:?*[':
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
112
            return False
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
113
    if refname[-1] in b'/.':
114
        return False
115
    if refname.endswith(b'.lock'):
116
        return False
117
    if b'@{' in refname:
118
        return False
119
    if b'\\' in refname:
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
120
        return False
121
    return True
122
0.133.3 by Oleksandr Usov
Implement comments from patch review:
123
124
def sanitize_ref_name_for_git(refname):
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
125
    """Rewrite refname so that it will be accepted by git-fast-import.
126
    For the detailed rules see check_ref_format.
127
128
    By rewriting the refname we are breaking uniqueness guarantees provided by bzr
129
    so we have to manually
130
    verify that resulting ref names are unique.
131
132
    :param refname: refname to rewrite
133
    :return: new refname
134
    """
0.133.3 by Oleksandr Usov
Implement comments from patch review:
135
    new_refname = re.sub(
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
136
        # '/.' in refname or startswith '.'
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
137
        br"/\.|^\."
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
138
        # '..' in refname
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
139
        br"|\.\."
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
140
        # ord(c) < 040
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
141
        br"|[" + b"".join([int2byte(x) for x in range(0o40)]) + br"]"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
142
        # c in '\177 ~^:?*['
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
143
        br"|[\177 ~^:?*[]"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
144
        # last char in "/."
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
145
        br"|[/.]$"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
146
        # endswith '.lock'
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
147
        br"|.lock$"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
148
        # "@{" in refname
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
149
        br"|@{"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
150
        # "\\" in refname
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
151
        br"|\\",
152
        b"_", refname)
0.133.3 by Oleksandr Usov
Implement comments from patch review:
153
    return new_refname
0.64.173 by Ian Clatworthy
add -r option to fast-export
154
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
155
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
156
class BzrFastExporter(object):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
157
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
158
    def __init__(self, source, outf, ref=None, checkpoint=-1,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
159
        import_marks_file=None, export_marks_file=None, revision=None,
0.135.1 by Andy Grimm
Add --baseline option
160
        verbose=False, plain_format=False, rewrite_tags=False,
0.138.1 by Oleksandr Usov
Add --no-tags flag
161
        no_tags=False, baseline=False):
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
162
        """Export branch data in fast import format.
163
164
        :param plain_format: if True, 'classic' fast-import format is
0.64.337 by Jelmer Vernooij
Merge support for --rewrite-tag-names.
165
            used without any extended features; if False, the generated
166
            data is richer and includes information like multiple
167
            authors, revision properties, etc.
168
        :param rewrite_tags: if True and if plain_format is set, tag names
169
            will be rewritten to be git-compatible.
170
            Otherwise tags which aren't valid for git will be skipped if
171
            plain_format is set.
0.138.1 by Oleksandr Usov
Add --no-tags flag
172
        :param no_tags: if True tags won't be exported at all
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
173
        """
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
174
        self.branch = source
175
        self.outf = outf
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
176
        self.ref = ref
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
177
        self.checkpoint = checkpoint
178
        self.import_marks_file = import_marks_file
179
        self.export_marks_file = export_marks_file
0.64.173 by Ian Clatworthy
add -r option to fast-export
180
        self.revision = revision
181
        self.excluded_revisions = set()
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
182
        self.plain_format = plain_format
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
183
        self.rewrite_tags = rewrite_tags
0.138.1 by Oleksandr Usov
Add --no-tags flag
184
        self.no_tags = no_tags
0.135.1 by Andy Grimm
Add --baseline option
185
        self.baseline = baseline
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
186
        self._multi_author_api_available = hasattr(breezy.revision.Revision,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
187
            'get_apparent_authors')
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
188
        self.properties_to_exclude = ['authors', 'author']
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
189
190
        # Progress reporting stuff
191
        self.verbose = verbose
192
        if verbose:
193
            self.progress_every = 100
194
        else:
195
            self.progress_every = 1000
196
        self._start_time = time.time()
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
197
        self._commit_total = 0
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
198
199
        # Load the marks and initialise things accordingly
0.68.1 by Pieter de Bie
Classify bzr-fast-export
200
        self.revid_to_mark = {}
201
        self.branch_names = {}
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
202
        if self.import_marks_file:
0.79.2 by Ian Clatworthy
extend & use marks_file API
203
            marks_info = marks_file.import_marks(self.import_marks_file)
204
            if marks_info is not None:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
205
                self.revid_to_mark = dict((r, m) for m, r in
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
206
                    marks_info.items())
207
                # These are no longer included in the marks file
208
                #self.branch_names = marks_info[1]
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
209
0.64.173 by Ian Clatworthy
add -r option to fast-export
210
    def interesting_history(self):
211
        if self.revision:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
212
            rev1, rev2 = builtins._get_revision_range(self.revision,
213
                self.branch, "fast-export")
0.64.173 by Ian Clatworthy
add -r option to fast-export
214
            start_rev_id = rev1.rev_id
215
            end_rev_id = rev2.rev_id
216
        else:
217
            start_rev_id = None
218
            end_rev_id = None
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
219
        self.note("Calculating the revisions to include ...")
0.64.341 by Jelmer Vernooij
Fix test, clarify help description for 'bzr fast-export'.
220
        view_revisions = [rev_id for rev_id, _, _, _ in
221
            self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)]
222
        view_revisions.reverse()
0.64.173 by Ian Clatworthy
add -r option to fast-export
223
        # If a starting point was given, we need to later check that we don't
224
        # start emitting revisions from before that point. Collect the
225
        # revisions to exclude now ...
226
        if start_rev_id is not None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
227
            self.note("Calculating the revisions to exclude ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
228
            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
229
                self.branch.iter_merge_sorted_revisions(start_rev_id)])
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
230
            if self.baseline:
231
                # needed so the first relative commit knows its parent
232
                self.excluded_revisions.remove(start_rev_id)
233
                view_revisions.insert(0, start_rev_id)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
234
        return list(view_revisions)
0.64.173 by Ian Clatworthy
add -r option to fast-export
235
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
236
    def run(self):
237
        # Export the data
6754.8.4 by Jelmer Vernooij
Use new context stuff.
238
        with self.branch.repository.lock_read():
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
239
            interesting = self.interesting_history()
0.102.15 by Ian Clatworthy
add revision count to 'Starting export ...' message
240
            self._commit_total = len(interesting)
241
            self.note("Starting export of %d revisions ..." %
242
                self._commit_total)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
243
            if not self.plain_format:
244
                self.emit_features()
0.135.1 by Andy Grimm
Add --baseline option
245
            if self.baseline:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
246
                self.emit_baseline(interesting.pop(0), self.ref)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
247
            for revid in interesting:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
248
                self.emit_commit(revid, self.ref)
0.138.1 by Oleksandr Usov
Add --no-tags flag
249
            if self.branch.supports_tags() and not self.no_tags:
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
250
                self.emit_tags()
0.68.1 by Pieter de Bie
Classify bzr-fast-export
251
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
252
        # Save the marks if requested
0.79.2 by Ian Clatworthy
extend & use marks_file API
253
        self._save_marks()
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
254
        self.dump_stats()
255
256
    def note(self, msg, *args):
257
        """Output a note but timestamp it."""
258
        msg = "%s %s" % (self._time_of_day(), msg)
259
        trace.note(msg, *args)
260
261
    def warning(self, msg, *args):
262
        """Output a warning but timestamp it."""
263
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
264
        trace.warning(msg, *args)
265
266
    def _time_of_day(self):
267
        """Time of day as a string."""
268
        # Note: this is a separate method so tests can patch in a fixed value
269
        return time.strftime("%H:%M:%S")
270
271
    def report_progress(self, commit_count, details=''):
272
        if commit_count and commit_count % self.progress_every == 0:
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
273
            if self._commit_total:
274
                counts = "%d/%d" % (commit_count, self._commit_total)
275
            else:
276
                counts = "%d" % (commit_count,)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
277
            minutes = (time.time() - self._start_time) / 60
278
            rate = commit_count * 1.0 / minutes
279
            if rate > 10:
280
                rate_str = "at %.0f/minute " % rate
281
            else:
282
                rate_str = "at %.1f/minute " % rate
283
            self.note("%s commits exported %s%s" % (counts, rate_str, details))
284
285
    def dump_stats(self):
286
        time_required = progress.str_tdelta(time.time() - self._start_time)
287
        rc = len(self.revid_to_mark)
288
        self.note("Exported %d %s in %s",
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
289
            rc, helpers.single_plural(rc, "revision", "revisions"),
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
290
            time_required)
0.79.2 by Ian Clatworthy
extend & use marks_file API
291
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
292
    def print_cmd(self, cmd):
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
293
        self.outf.write(b"%r\n" % cmd)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
294
0.79.2 by Ian Clatworthy
extend & use marks_file API
295
    def _save_marks(self):
296
        if self.export_marks_file:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
297
            revision_ids = dict((m, r) for r, m in self.revid_to_mark.items())
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
298
            marks_file.export_marks(self.export_marks_file, revision_ids)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
299
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
300
    def is_empty_dir(self, tree, path):
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
301
        # Continue if path is not a directory
302
        try:
303
            if tree.kind(path) != 'directory':
304
                return False
305
        except bazErrors.NoSuchFile:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
306
            self.warning("Skipping empty_dir detection - no file_id for %s" %
307
                (path,))
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
308
            return False
309
310
        # Use treewalk to find the contents of our directory
311
        contents = list(tree.walkdirs(prefix=path))[0]
312
        if len(contents[1]) == 0:
313
            return True
314
        else:
315
            return False
316
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
317
    def emit_features(self):
0.102.5 by Ian Clatworthy
Define feature names in one place
318
        for feature in sorted(commands.FEATURE_NAMES):
319
            self.print_cmd(commands.FeatureCommand(feature))
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
320
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
321
    def emit_baseline(self, revid, ref):
0.135.1 by Andy Grimm
Add --baseline option
322
        # Emit a full source tree of the first commit's parent
323
        revobj = self.branch.repository.get_revision(revid)
324
        mark = 1
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
325
        self.revid_to_mark[revid] = mark
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
326
        file_cmds = self._get_filecommands(breezy.revision.NULL_REVISION, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
327
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.135.1 by Andy Grimm
Add --baseline option
328
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
329
    def emit_commit(self, revid, ref):
0.64.173 by Ian Clatworthy
add -r option to fast-export
330
        if revid in self.revid_to_mark or revid in self.excluded_revisions:
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
331
            return
332
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
333
        # Get the Revision object
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
334
        try:
335
            revobj = self.branch.repository.get_revision(revid)
336
        except bazErrors.NoSuchRevision:
337
            # This is a ghost revision. Mark it as not found and next!
338
            self.revid_to_mark[revid] = -1
339
            return
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
340
 
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
341
        # Get the primary parent
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
342
        # TODO: Consider the excluded revisions when deciding the parents.
343
        # Currently, a commit with parents that are excluded ought to be
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
344
        # triggering the ref calculation below (and it is not).
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
345
        # IGC 20090824
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
346
        ncommits = len(self.revid_to_mark)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
347
        nparents = len(revobj.parent_ids)
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
348
        if nparents == 0:
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
349
            if ncommits:
350
                # This is a parentless commit but it's not the first one
351
                # output. We need to create a new temporary branch for it
352
                # otherwise git-fast-import will assume the previous commit
353
                # was this one's parent
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
354
                ref = self._next_tmp_ref()
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
355
            parent = breezy.revision.NULL_REVISION
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
356
        else:
357
            parent = revobj.parent_ids[0]
358
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
359
        # Print the commit
0.98.1 by Gonéri Le Bouder
add the missing ":" since revid_to_mark are "committish"
360
        mark = ncommits + 1
0.64.221 by Ian Clatworthy
backout git-bzr fix as it was breaking fast-export
361
        self.revid_to_mark[revid] = mark
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
362
        file_cmds = self._get_filecommands(parent, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
363
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
364
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
365
        # Report progress and checkpoint if it's time for that
366
        self.report_progress(ncommits)
367
        if (self.checkpoint > 0 and ncommits
368
            and ncommits % self.checkpoint == 0):
369
            self.note("Exported %i commits - adding checkpoint to output"
370
                % ncommits)
371
            self._save_marks()
372
            self.print_cmd(commands.CheckpointCommand())
373
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
374
    def _get_name_email(self, user):
375
        if user.find('<') == -1:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
376
            # If the email isn't inside <>, we need to use it as the name
377
            # in order for things to round-trip correctly.
378
            # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
379
            name = user
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
380
            email = ''
381
        else:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
382
            name, email = parseaddr(user)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
383
        return name.encode("utf-8"), email.encode("utf-8")
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
384
385
    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
386
        # Get the committer and author info
387
        committer = revobj.committer
388
        name, email = self._get_name_email(committer)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
389
        committer_info = (name, email, revobj.timestamp, revobj.timezone)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
390
        if self._multi_author_api_available:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
391
            more_authors = revobj.get_apparent_authors()
392
            author = more_authors.pop(0)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
393
        else:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
394
            more_authors = []
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
395
            author = revobj.get_apparent_author()
0.64.291 by Jelmer Vernooij
In plain mode, don't export multiple authors.
396
        if not self.plain_format and more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
397
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
398
            author_info = (name, email, revobj.timestamp, revobj.timezone)
399
            more_author_info = []
400
            for a in more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
401
                name, email = self._get_name_email(a)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
402
                more_author_info.append(
403
                    (name, email, revobj.timestamp, revobj.timezone))
404
        elif author != committer:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
405
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
406
            author_info = (name, email, revobj.timestamp, revobj.timezone)
407
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
408
        else:
409
            author_info = None
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
410
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
411
412
        # Get the parents in terms of marks
413
        non_ghost_parents = []
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
414
        for p in revobj.parent_ids:
0.64.173 by Ian Clatworthy
add -r option to fast-export
415
            if p in self.excluded_revisions:
416
                continue
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
417
            try:
418
                parent_mark = self.revid_to_mark[p]
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
419
                non_ghost_parents.append(b":%d" % parent_mark)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
420
            except KeyError:
421
                # ghost - ignore
422
                continue
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
423
        if non_ghost_parents:
424
            from_ = non_ghost_parents[0]
425
            merges = non_ghost_parents[1:]
426
        else:
427
            from_ = None
428
            merges = None
429
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
430
        # Filter the revision properties. Some metadata (like the
431
        # author information) is already exposed in other ways so
432
        # don't repeat it here.
433
        if self.plain_format:
434
            properties = None
435
        else:
436
            properties = revobj.properties
437
            for prop in self.properties_to_exclude:
438
                try:
439
                    del properties[prop]
440
                except KeyError:
441
                    pass
442
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
443
        # Build and return the result
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
444
        return commands.CommitCommand(git_ref, mark, author_info,
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
445
            committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds),
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
446
            more_authors=more_author_info, properties=properties)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
447
448
    def _get_revision_trees(self, parent, revision_id):
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
449
        try:
450
            tree_old = self.branch.repository.revision_tree(parent)
451
        except bazErrors.UnexpectedInventoryFormat:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
452
            self.warning("Parent is malformed - diffing against previous parent")
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
453
            # We can't find the old parent. Let's diff against his parent
454
            pp = self.branch.repository.get_revision(parent)
455
            tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
456
        tree_new = None
457
        try:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
458
            tree_new = self.branch.repository.revision_tree(revision_id)
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
459
        except bazErrors.UnexpectedInventoryFormat:
460
            # We can't really do anything anymore
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
461
            self.warning("Revision %s is malformed - skipping" % revision_id)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
462
        return tree_old, tree_new
0.68.1 by Pieter de Bie
Classify bzr-fast-export
463
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
464
    def _get_filecommands(self, parent, revision_id):
465
        """Get the list of FileCommands for the changes between two revisions."""
466
        tree_old, tree_new = self._get_revision_trees(parent, revision_id)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
467
        if not(tree_old and tree_new):
468
            # Something is wrong with this revision - ignore the filecommands
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
469
            return []
0.68.1 by Pieter de Bie
Classify bzr-fast-export
470
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
471
        changes = tree_new.changes_from(tree_old)
472
473
        # Make "modified" have 3-tuples, as added does
474
        my_modified = [ x[0:3] for x in changes.modified ]
475
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
476
        # The potential interaction between renames and deletes is messy.
477
        # Handle it here ...
478
        file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
479
            changes.renamed, changes.removed, revision_id, tree_old)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
480
481
        # Map kind changes to a delete followed by an add
482
        for path, id_, kind1, kind2 in changes.kind_changed:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
483
            path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
484
            # IGC: I don't understand why a delete is needed here.
485
            # In fact, it seems harmful? If you uncomment this line,
486
            # please file a bug explaining why you needed to.
487
            #file_cmds.append(commands.FileDeleteCommand(path))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
488
            my_modified.append((path, id_, kind2))
489
490
        # Record modifications
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
491
        for path, id_, kind in changes.added + my_modified + rd_modifies:
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
492
            if kind == 'file':
6809.4.5 by Jelmer Vernooij
Swap arguments for get_file_*.
493
                text = tree_new.get_file_text(path, id_)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
494
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
6809.4.21 by Jelmer Vernooij
Fix long lines.
495
                    helpers.kind_to_mode(
496
                        'file', tree_new.is_executable(path, id_)),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
497
                    None, text))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
498
            elif kind == 'symlink':
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
499
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
500
                    helpers.kind_to_mode('symlink', False),
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
501
                    None, tree_new.get_symlink_target(path, id_)))
0.102.14 by Ian Clatworthy
export and import empty directories
502
            elif kind == 'directory':
0.105.1 by John Whitley
Don't emit directory info when plain format is specified.
503
                if not self.plain_format:
6809.4.21 by Jelmer Vernooij
Fix long lines.
504
                    file_cmds.append(
505
                            commands.FileModifyCommand(path.encode("utf-8"),
506
                                helpers.kind_to_mode('directory', False), None,
507
                                None))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
508
            else:
0.102.14 by Ian Clatworthy
export and import empty directories
509
                self.warning("cannot export '%s' of kind %s yet - ignoring" %
510
                    (path, kind))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
511
        return file_cmds
512
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
513
    def _process_renames_and_deletes(self, renames, deletes,
514
        revision_id, tree_old):
515
        file_cmds = []
516
        modifies = []
517
        renamed = []
518
519
        # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933.
520
        # In a nutshell, there are several nasty cases:
521
        #
522
        # 1) bzr rm a; bzr mv b a; bzr commit
523
        # 2) bzr mv x/y z; bzr rm x; commmit
524
        #
525
        # The first must come out with the delete first like this:
526
        #
527
        # D a
528
        # R b a
529
        #
530
        # The second case must come out with the rename first like this:
531
        #
532
        # R x/y z
533
        # D x
534
        #
535
        # So outputting all deletes first or all renames first won't work.
536
        # Instead, we need to make multiple passes over the various lists to
537
        # get the ordering right.
538
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
539
        must_be_renamed = {}
540
        old_to_new = {}
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
541
        deleted_paths = set([p for p, _, _ in deletes])
542
        for (oldpath, newpath, id_, kind,
543
                text_modified, meta_modified) in renames:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
544
            emit = kind != 'directory' or not self.plain_format
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
545
            if newpath in deleted_paths:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
546
                if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
547
                    file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
548
                deleted_paths.remove(newpath)
549
            if (self.is_empty_dir(tree_old, oldpath)):
550
                self.note("Skipping empty dir %s in rev %s" % (oldpath,
551
                    revision_id))
552
                continue
553
            #oldpath = self._adjust_path_for_renames(oldpath, renamed,
554
            #    revision_id)
555
            renamed.append([oldpath, newpath])
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
556
            old_to_new[oldpath] = newpath
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
557
            if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
558
                file_cmds.append(
559
                    commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
560
            if text_modified or meta_modified:
561
                modifies.append((newpath, id_, kind))
562
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
563
            # Renaming a directory implies all children must be renamed.
564
            # Note: changes_from() doesn't handle this
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
565
            if kind == 'directory' and tree_old.kind(oldpath, id_) == 'directory':
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
566
                for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
567
                    if e.kind == 'directory' and self.plain_format:
568
                        continue
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
569
                    old_child_path = osutils.pathjoin(oldpath, p)
570
                    new_child_path = osutils.pathjoin(newpath, p)
571
                    must_be_renamed[old_child_path] = new_child_path
572
573
        # Add children not already renamed
574
        if must_be_renamed:
575
            renamed_already = set(old_to_new.keys())
576
            still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
577
            for old_child_path in sorted(still_to_be_renamed):
578
                new_child_path = must_be_renamed[old_child_path]
579
                if self.verbose:
580
                    self.note("implicitly renaming %s => %s" % (old_child_path,
581
                        new_child_path))
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
582
                file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"),
583
                    new_child_path.encode("utf-8")))
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
584
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
585
        # Record remaining deletes
586
        for path, id_, kind in deletes:
587
            if path not in deleted_paths:
588
                continue
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
589
            if kind == 'directory' and self.plain_format:
590
                continue
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
591
            #path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
592
            file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
593
        return file_cmds, modifies, renamed
594
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
595
    def _adjust_path_for_renames(self, path, renamed, revision_id):
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
596
        # If a previous rename is found, we should adjust the path
597
        for old, new in renamed:
598
            if path == old:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
599
                self.note("Changing path %s given rename to %s in revision %s"
600
                    % (path, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
601
                path = new
602
            elif path.startswith(old + '/'):
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
603
                self.note(
604
                    "Adjusting path %s given rename of %s to %s in revision %s"
605
                    % (path, old, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
606
                path = path.replace(old + "/", new + "/")
607
        return path
608
0.68.1 by Pieter de Bie
Classify bzr-fast-export
609
    def emit_tags(self):
7045.3.1 by Jelmer Vernooij
Fix another ~500 tests.
610
        for tag, revid in viewitems(self.branch.tags.get_tag_dict()):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
611
            try:
612
                mark = self.revid_to_mark[revid]
613
            except KeyError:
0.79.4 by Ian Clatworthy
use note and warning APIs
614
                self.warning('not creating tag %r pointing to non-existent '
615
                    'revision %s' % (tag, revid))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
616
            else:
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
617
                git_ref = b'refs/tags/%s' % tag.encode("utf-8")
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
618
                if self.plain_format and not check_ref_format(git_ref):
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
619
                    if self.rewrite_tags:
0.133.3 by Oleksandr Usov
Implement comments from patch review:
620
                        new_ref = sanitize_ref_name_for_git(git_ref)
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
621
                        self.warning('tag %r is exported as %r to be valid in git.',
622
                                     git_ref, new_ref)
623
                        git_ref = new_ref
0.133.3 by Oleksandr Usov
Implement comments from patch review:
624
                    else:
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
625
                        self.warning('not creating tag %r as its name would not be '
626
                                     'valid in git.', git_ref)
627
                        continue
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
628
                self.print_cmd(commands.ResetCommand(git_ref, b":%d" % mark))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
629
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
630
    def _next_tmp_ref(self):
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
631
        """Return a unique branch name. The name will start with "tmp"."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
632
        prefix = 'tmp'
0.68.1 by Pieter de Bie
Classify bzr-fast-export
633
        if prefix not in self.branch_names:
634
            self.branch_names[prefix] = 0
635
        else:
636
            self.branch_names[prefix] += 1
637
            prefix = '%s.%d' % (prefix, self.branch_names[prefix])
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
638
        return 'refs/heads/%s' % prefix