/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.128 by Ian Clatworthy
fix encoding issue in bzr_exporter (Teemu Likonen)
1
# -*- coding: utf-8 -*-
2
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
3
# Copyright (C) 2008 Canonical Ltd
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
16
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
17
#
0.64.333 by Jelmer Vernooij
Inline bzr-fast-export license.
18
# Based on bzr-fast-export
19
# Copyright (c) 2008 Adeodato Simó
20
#
21
# Permission is hereby granted, free of charge, to any person obtaining
22
# a copy of this software and associated documentation files (the
23
# "Software"), to deal in the Software without restriction, including
24
# without limitation the rights to use, copy, modify, merge, publish,
25
# distribute, sublicense, and/or sell copies of the Software, and to
26
# permit persons to whom the Software is furnished to do so, subject to
27
# the following conditions:
28
#
29
# The above copyright notice and this permission notice shall be included
30
# in all copies or substantial portions of the Software.
31
#
32
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
35
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
36
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
37
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
38
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0.79.10 by Ian Clatworthy
documentation clean-ups
39
#
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
40
# vim: fileencoding=utf-8
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
41
42
"""Core engine for the fast-export command."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
43
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
44
from __future__ import absolute_import
45
0.79.7 by Ian Clatworthy
trivial bzr_exporter clean-ups
46
# TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
47
# is not updated (because the parent of commit is already merged, so we don't
48
# set new_git_branch to the previously used name)
49
6791.2.4 by Jelmer Vernooij
Fix python3isms.
50
try:
51
    from email.utils import parseaddr
52
except ImportError:  # python < 3
53
    from email.Utils import parseaddr
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
54
import sys, time, re
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
55
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
56
import breezy.branch
57
import breezy.revision
58
from ... import (
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
59
    builtins,
60
    errors as bazErrors,
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
61
    lazy_import,
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
62
    osutils,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
63
    progress,
64
    trace,
65
    )
0.79.4 by Ian Clatworthy
use note and warning APIs
66
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
67
from . import (
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
68
    helpers,
69
    marks_file,
70
    )
0.79.2 by Ian Clatworthy
extend & use marks_file API
71
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
72
lazy_import.lazy_import(globals(),
73
"""
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
74
from fastimport import commands
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
75
""")
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
76
77
78
def _get_output_stream(destination):
79
    if destination is None or destination == '-':
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
80
        return helpers.binary_stream(sys.stdout)
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
81
    elif destination.endswith('gz'):
82
        import gzip
83
        return gzip.open(destination, 'wb')
84
    else:
85
        return open(destination, 'wb')
86
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
87
# from dulwich.repo:
88
def check_ref_format(refname):
89
    """Check if a refname is correctly formatted.
90
91
    Implements all the same rules as git-check-ref-format[1].
92
93
    [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
94
95
    :param refname: The refname to check
96
    :return: True if refname is valid, False otherwise
97
    """
98
    # These could be combined into one big expression, but are listed separately
99
    # to parallel [1].
100
    if '/.' in refname or refname.startswith('.'):
101
        return False
102
    if '/' not in refname:
103
        return False
104
    if '..' in refname:
105
        return False
106
    for c in refname:
6791.2.4 by Jelmer Vernooij
Fix python3isms.
107
        if ord(c) < 0o40 or c in '\177 ~^:?*[':
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
108
            return False
109
    if refname[-1] in '/.':
110
        return False
111
    if refname.endswith('.lock'):
112
        return False
113
    if '@{' in refname:
114
        return False
115
    if '\\' in refname:
116
        return False
117
    return True
118
0.133.3 by Oleksandr Usov
Implement comments from patch review:
119
120
def sanitize_ref_name_for_git(refname):
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
121
    """Rewrite refname so that it will be accepted by git-fast-import.
122
    For the detailed rules see check_ref_format.
123
124
    By rewriting the refname we are breaking uniqueness guarantees provided by bzr
125
    so we have to manually
126
    verify that resulting ref names are unique.
127
128
    :param refname: refname to rewrite
129
    :return: new refname
130
    """
0.133.3 by Oleksandr Usov
Implement comments from patch review:
131
    new_refname = re.sub(
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
132
        # '/.' in refname or startswith '.'
133
        r"/\.|^\."
134
        # '..' in refname
135
        r"|\.\."
136
        # ord(c) < 040
6791.2.4 by Jelmer Vernooij
Fix python3isms.
137
        r"|[" + "".join([chr(x) for x in range(0o40)]) + r"]"
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
138
        # c in '\177 ~^:?*['
139
        r"|[\177 ~^:?*[]"
140
        # last char in "/."
141
        r"|[/.]$"
142
        # endswith '.lock'
143
        r"|.lock$"
144
        # "@{" in refname
145
        r"|@{"
146
        # "\\" in refname
147
        r"|\\",
148
        "_", refname)
0.133.3 by Oleksandr Usov
Implement comments from patch review:
149
    return new_refname
0.64.173 by Ian Clatworthy
add -r option to fast-export
150
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
151
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
152
class BzrFastExporter(object):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
153
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
154
    def __init__(self, source, outf, ref=None, checkpoint=-1,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
155
        import_marks_file=None, export_marks_file=None, revision=None,
0.135.1 by Andy Grimm
Add --baseline option
156
        verbose=False, plain_format=False, rewrite_tags=False,
0.138.1 by Oleksandr Usov
Add --no-tags flag
157
        no_tags=False, baseline=False):
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
158
        """Export branch data in fast import format.
159
160
        :param plain_format: if True, 'classic' fast-import format is
0.64.337 by Jelmer Vernooij
Merge support for --rewrite-tag-names.
161
            used without any extended features; if False, the generated
162
            data is richer and includes information like multiple
163
            authors, revision properties, etc.
164
        :param rewrite_tags: if True and if plain_format is set, tag names
165
            will be rewritten to be git-compatible.
166
            Otherwise tags which aren't valid for git will be skipped if
167
            plain_format is set.
0.138.1 by Oleksandr Usov
Add --no-tags flag
168
        :param no_tags: if True tags won't be exported at all
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
169
        """
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
170
        self.branch = source
171
        self.outf = outf
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
172
        self.ref = ref
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
173
        self.checkpoint = checkpoint
174
        self.import_marks_file = import_marks_file
175
        self.export_marks_file = export_marks_file
0.64.173 by Ian Clatworthy
add -r option to fast-export
176
        self.revision = revision
177
        self.excluded_revisions = set()
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
178
        self.plain_format = plain_format
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
179
        self.rewrite_tags = rewrite_tags
0.138.1 by Oleksandr Usov
Add --no-tags flag
180
        self.no_tags = no_tags
0.135.1 by Andy Grimm
Add --baseline option
181
        self.baseline = baseline
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
182
        self._multi_author_api_available = hasattr(breezy.revision.Revision,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
183
            'get_apparent_authors')
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
184
        self.properties_to_exclude = ['authors', 'author']
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
185
186
        # Progress reporting stuff
187
        self.verbose = verbose
188
        if verbose:
189
            self.progress_every = 100
190
        else:
191
            self.progress_every = 1000
192
        self._start_time = time.time()
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
193
        self._commit_total = 0
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
194
195
        # Load the marks and initialise things accordingly
0.68.1 by Pieter de Bie
Classify bzr-fast-export
196
        self.revid_to_mark = {}
197
        self.branch_names = {}
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
198
        if self.import_marks_file:
0.79.2 by Ian Clatworthy
extend & use marks_file API
199
            marks_info = marks_file.import_marks(self.import_marks_file)
200
            if marks_info is not None:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
201
                self.revid_to_mark = dict((r, m) for m, r in
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
202
                    marks_info.items())
203
                # These are no longer included in the marks file
204
                #self.branch_names = marks_info[1]
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
205
0.64.173 by Ian Clatworthy
add -r option to fast-export
206
    def interesting_history(self):
207
        if self.revision:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
208
            rev1, rev2 = builtins._get_revision_range(self.revision,
209
                self.branch, "fast-export")
0.64.173 by Ian Clatworthy
add -r option to fast-export
210
            start_rev_id = rev1.rev_id
211
            end_rev_id = rev2.rev_id
212
        else:
213
            start_rev_id = None
214
            end_rev_id = None
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
215
        self.note("Calculating the revisions to include ...")
0.64.341 by Jelmer Vernooij
Fix test, clarify help description for 'bzr fast-export'.
216
        view_revisions = [rev_id for rev_id, _, _, _ in
217
            self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)]
218
        view_revisions.reverse()
0.64.173 by Ian Clatworthy
add -r option to fast-export
219
        # If a starting point was given, we need to later check that we don't
220
        # start emitting revisions from before that point. Collect the
221
        # revisions to exclude now ...
222
        if start_rev_id is not None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
223
            self.note("Calculating the revisions to exclude ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
224
            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
225
                self.branch.iter_merge_sorted_revisions(start_rev_id)])
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
226
            if self.baseline:
227
                # needed so the first relative commit knows its parent
228
                self.excluded_revisions.remove(start_rev_id)
229
                view_revisions.insert(0, start_rev_id)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
230
        return list(view_revisions)
0.64.173 by Ian Clatworthy
add -r option to fast-export
231
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
232
    def run(self):
233
        # Export the data
6754.8.4 by Jelmer Vernooij
Use new context stuff.
234
        with self.branch.repository.lock_read():
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
235
            interesting = self.interesting_history()
0.102.15 by Ian Clatworthy
add revision count to 'Starting export ...' message
236
            self._commit_total = len(interesting)
237
            self.note("Starting export of %d revisions ..." %
238
                self._commit_total)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
239
            if not self.plain_format:
240
                self.emit_features()
0.135.1 by Andy Grimm
Add --baseline option
241
            if self.baseline:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
242
                self.emit_baseline(interesting.pop(0), self.ref)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
243
            for revid in interesting:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
244
                self.emit_commit(revid, self.ref)
0.138.1 by Oleksandr Usov
Add --no-tags flag
245
            if self.branch.supports_tags() and not self.no_tags:
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
246
                self.emit_tags()
0.68.1 by Pieter de Bie
Classify bzr-fast-export
247
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
248
        # Save the marks if requested
0.79.2 by Ian Clatworthy
extend & use marks_file API
249
        self._save_marks()
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
250
        self.dump_stats()
251
252
    def note(self, msg, *args):
253
        """Output a note but timestamp it."""
254
        msg = "%s %s" % (self._time_of_day(), msg)
255
        trace.note(msg, *args)
256
257
    def warning(self, msg, *args):
258
        """Output a warning but timestamp it."""
259
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
260
        trace.warning(msg, *args)
261
262
    def _time_of_day(self):
263
        """Time of day as a string."""
264
        # Note: this is a separate method so tests can patch in a fixed value
265
        return time.strftime("%H:%M:%S")
266
267
    def report_progress(self, commit_count, details=''):
268
        if commit_count and commit_count % self.progress_every == 0:
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
269
            if self._commit_total:
270
                counts = "%d/%d" % (commit_count, self._commit_total)
271
            else:
272
                counts = "%d" % (commit_count,)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
273
            minutes = (time.time() - self._start_time) / 60
274
            rate = commit_count * 1.0 / minutes
275
            if rate > 10:
276
                rate_str = "at %.0f/minute " % rate
277
            else:
278
                rate_str = "at %.1f/minute " % rate
279
            self.note("%s commits exported %s%s" % (counts, rate_str, details))
280
281
    def dump_stats(self):
282
        time_required = progress.str_tdelta(time.time() - self._start_time)
283
        rc = len(self.revid_to_mark)
284
        self.note("Exported %d %s in %s",
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
285
            rc, helpers.single_plural(rc, "revision", "revisions"),
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
286
            time_required)
0.79.2 by Ian Clatworthy
extend & use marks_file API
287
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
288
    def print_cmd(self, cmd):
289
        self.outf.write("%r\n" % cmd)
290
0.79.2 by Ian Clatworthy
extend & use marks_file API
291
    def _save_marks(self):
292
        if self.export_marks_file:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
293
            revision_ids = dict((m, r) for r, m in self.revid_to_mark.items())
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
294
            marks_file.export_marks(self.export_marks_file, revision_ids)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
295
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
296
    def is_empty_dir(self, tree, path):
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
297
        # Continue if path is not a directory
298
        try:
299
            if tree.kind(path) != 'directory':
300
                return False
301
        except bazErrors.NoSuchFile:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
302
            self.warning("Skipping empty_dir detection - no file_id for %s" %
303
                (path,))
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
304
            return False
305
306
        # Use treewalk to find the contents of our directory
307
        contents = list(tree.walkdirs(prefix=path))[0]
308
        if len(contents[1]) == 0:
309
            return True
310
        else:
311
            return False
312
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
313
    def emit_features(self):
0.102.5 by Ian Clatworthy
Define feature names in one place
314
        for feature in sorted(commands.FEATURE_NAMES):
315
            self.print_cmd(commands.FeatureCommand(feature))
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
316
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
317
    def emit_baseline(self, revid, ref):
0.135.1 by Andy Grimm
Add --baseline option
318
        # Emit a full source tree of the first commit's parent
319
        revobj = self.branch.repository.get_revision(revid)
320
        mark = 1
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
321
        self.revid_to_mark[revid] = mark
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
322
        file_cmds = self._get_filecommands(breezy.revision.NULL_REVISION, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
323
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.135.1 by Andy Grimm
Add --baseline option
324
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
325
    def emit_commit(self, revid, ref):
0.64.173 by Ian Clatworthy
add -r option to fast-export
326
        if revid in self.revid_to_mark or revid in self.excluded_revisions:
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
327
            return
328
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
329
        # Get the Revision object
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
330
        try:
331
            revobj = self.branch.repository.get_revision(revid)
332
        except bazErrors.NoSuchRevision:
333
            # This is a ghost revision. Mark it as not found and next!
334
            self.revid_to_mark[revid] = -1
335
            return
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
336
 
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
337
        # Get the primary parent
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
338
        # TODO: Consider the excluded revisions when deciding the parents.
339
        # Currently, a commit with parents that are excluded ought to be
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
340
        # triggering the ref calculation below (and it is not).
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
341
        # IGC 20090824
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
342
        ncommits = len(self.revid_to_mark)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
343
        nparents = len(revobj.parent_ids)
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
344
        if nparents == 0:
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
345
            if ncommits:
346
                # This is a parentless commit but it's not the first one
347
                # output. We need to create a new temporary branch for it
348
                # otherwise git-fast-import will assume the previous commit
349
                # was this one's parent
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
350
                ref = self._next_tmp_ref()
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
351
            parent = breezy.revision.NULL_REVISION
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
352
        else:
353
            parent = revobj.parent_ids[0]
354
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
355
        # Print the commit
0.98.1 by Gonéri Le Bouder
add the missing ":" since revid_to_mark are "committish"
356
        mark = ncommits + 1
0.64.221 by Ian Clatworthy
backout git-bzr fix as it was breaking fast-export
357
        self.revid_to_mark[revid] = mark
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
358
        file_cmds = self._get_filecommands(parent, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
359
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
360
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
361
        # Report progress and checkpoint if it's time for that
362
        self.report_progress(ncommits)
363
        if (self.checkpoint > 0 and ncommits
364
            and ncommits % self.checkpoint == 0):
365
            self.note("Exported %i commits - adding checkpoint to output"
366
                % ncommits)
367
            self._save_marks()
368
            self.print_cmd(commands.CheckpointCommand())
369
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
370
    def _get_name_email(self, user):
371
        if user.find('<') == -1:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
372
            # If the email isn't inside <>, we need to use it as the name
373
            # in order for things to round-trip correctly.
374
            # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
375
            name = user
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
376
            email = ''
377
        else:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
378
            name, email = parseaddr(user)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
379
        return name.encode("utf-8"), email.encode("utf-8")
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
380
381
    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
382
        # Get the committer and author info
383
        committer = revobj.committer
384
        name, email = self._get_name_email(committer)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
385
        committer_info = (name, email, revobj.timestamp, revobj.timezone)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
386
        if self._multi_author_api_available:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
387
            more_authors = revobj.get_apparent_authors()
388
            author = more_authors.pop(0)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
389
        else:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
390
            more_authors = []
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
391
            author = revobj.get_apparent_author()
0.64.291 by Jelmer Vernooij
In plain mode, don't export multiple authors.
392
        if not self.plain_format and more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
393
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
394
            author_info = (name, email, revobj.timestamp, revobj.timezone)
395
            more_author_info = []
396
            for a in more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
397
                name, email = self._get_name_email(a)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
398
                more_author_info.append(
399
                    (name, email, revobj.timestamp, revobj.timezone))
400
        elif author != committer:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
401
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
402
            author_info = (name, email, revobj.timestamp, revobj.timezone)
403
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
404
        else:
405
            author_info = None
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
406
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
407
408
        # Get the parents in terms of marks
409
        non_ghost_parents = []
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
410
        for p in revobj.parent_ids:
0.64.173 by Ian Clatworthy
add -r option to fast-export
411
            if p in self.excluded_revisions:
412
                continue
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
413
            try:
414
                parent_mark = self.revid_to_mark[p]
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
415
                non_ghost_parents.append(":%s" % parent_mark)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
416
            except KeyError:
417
                # ghost - ignore
418
                continue
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
419
        if non_ghost_parents:
420
            from_ = non_ghost_parents[0]
421
            merges = non_ghost_parents[1:]
422
        else:
423
            from_ = None
424
            merges = None
425
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
426
        # Filter the revision properties. Some metadata (like the
427
        # author information) is already exposed in other ways so
428
        # don't repeat it here.
429
        if self.plain_format:
430
            properties = None
431
        else:
432
            properties = revobj.properties
433
            for prop in self.properties_to_exclude:
434
                try:
435
                    del properties[prop]
436
                except KeyError:
437
                    pass
438
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
439
        # Build and return the result
0.64.361 by Jelmer Vernooij
Fix compatibility with newer versions of python-fastimport.
440
        return commands.CommitCommand(git_ref, str(mark), author_info,
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
441
            committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds),
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
442
            more_authors=more_author_info, properties=properties)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
443
444
    def _get_revision_trees(self, parent, revision_id):
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
445
        try:
446
            tree_old = self.branch.repository.revision_tree(parent)
447
        except bazErrors.UnexpectedInventoryFormat:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
448
            self.warning("Parent is malformed - diffing against previous parent")
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
449
            # We can't find the old parent. Let's diff against his parent
450
            pp = self.branch.repository.get_revision(parent)
451
            tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
452
        tree_new = None
453
        try:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
454
            tree_new = self.branch.repository.revision_tree(revision_id)
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
455
        except bazErrors.UnexpectedInventoryFormat:
456
            # We can't really do anything anymore
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
457
            self.warning("Revision %s is malformed - skipping" % revision_id)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
458
        return tree_old, tree_new
0.68.1 by Pieter de Bie
Classify bzr-fast-export
459
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
460
    def _get_filecommands(self, parent, revision_id):
461
        """Get the list of FileCommands for the changes between two revisions."""
462
        tree_old, tree_new = self._get_revision_trees(parent, revision_id)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
463
        if not(tree_old and tree_new):
464
            # Something is wrong with this revision - ignore the filecommands
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
465
            return []
0.68.1 by Pieter de Bie
Classify bzr-fast-export
466
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
467
        changes = tree_new.changes_from(tree_old)
468
469
        # Make "modified" have 3-tuples, as added does
470
        my_modified = [ x[0:3] for x in changes.modified ]
471
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
472
        # The potential interaction between renames and deletes is messy.
473
        # Handle it here ...
474
        file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
475
            changes.renamed, changes.removed, revision_id, tree_old)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
476
477
        # Map kind changes to a delete followed by an add
478
        for path, id_, kind1, kind2 in changes.kind_changed:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
479
            path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
480
            # IGC: I don't understand why a delete is needed here.
481
            # In fact, it seems harmful? If you uncomment this line,
482
            # please file a bug explaining why you needed to.
483
            #file_cmds.append(commands.FileDeleteCommand(path))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
484
            my_modified.append((path, id_, kind2))
485
486
        # Record modifications
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
487
        for path, id_, kind in changes.added + my_modified + rd_modifies:
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
488
            if kind == 'file':
6809.4.5 by Jelmer Vernooij
Swap arguments for get_file_*.
489
                text = tree_new.get_file_text(path, id_)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
490
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
6809.4.21 by Jelmer Vernooij
Fix long lines.
491
                    helpers.kind_to_mode(
492
                        'file', tree_new.is_executable(path, id_)),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
493
                    None, text))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
494
            elif kind == 'symlink':
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
495
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
496
                    helpers.kind_to_mode('symlink', False),
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
497
                    None, tree_new.get_symlink_target(path, id_)))
0.102.14 by Ian Clatworthy
export and import empty directories
498
            elif kind == 'directory':
0.105.1 by John Whitley
Don't emit directory info when plain format is specified.
499
                if not self.plain_format:
6809.4.21 by Jelmer Vernooij
Fix long lines.
500
                    file_cmds.append(
501
                            commands.FileModifyCommand(path.encode("utf-8"),
502
                                helpers.kind_to_mode('directory', False), None,
503
                                None))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
504
            else:
0.102.14 by Ian Clatworthy
export and import empty directories
505
                self.warning("cannot export '%s' of kind %s yet - ignoring" %
506
                    (path, kind))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
507
        return file_cmds
508
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
509
    def _process_renames_and_deletes(self, renames, deletes,
510
        revision_id, tree_old):
511
        file_cmds = []
512
        modifies = []
513
        renamed = []
514
515
        # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933.
516
        # In a nutshell, there are several nasty cases:
517
        #
518
        # 1) bzr rm a; bzr mv b a; bzr commit
519
        # 2) bzr mv x/y z; bzr rm x; commmit
520
        #
521
        # The first must come out with the delete first like this:
522
        #
523
        # D a
524
        # R b a
525
        #
526
        # The second case must come out with the rename first like this:
527
        #
528
        # R x/y z
529
        # D x
530
        #
531
        # So outputting all deletes first or all renames first won't work.
532
        # Instead, we need to make multiple passes over the various lists to
533
        # get the ordering right.
534
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
535
        must_be_renamed = {}
536
        old_to_new = {}
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
537
        deleted_paths = set([p for p, _, _ in deletes])
538
        for (oldpath, newpath, id_, kind,
539
                text_modified, meta_modified) in renames:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
540
            emit = kind != 'directory' or not self.plain_format
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
541
            if newpath in deleted_paths:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
542
                if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
543
                    file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
544
                deleted_paths.remove(newpath)
545
            if (self.is_empty_dir(tree_old, oldpath)):
546
                self.note("Skipping empty dir %s in rev %s" % (oldpath,
547
                    revision_id))
548
                continue
549
            #oldpath = self._adjust_path_for_renames(oldpath, renamed,
550
            #    revision_id)
551
            renamed.append([oldpath, newpath])
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
552
            old_to_new[oldpath] = newpath
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
553
            if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
554
                file_cmds.append(
555
                    commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
556
            if text_modified or meta_modified:
557
                modifies.append((newpath, id_, kind))
558
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
559
            # Renaming a directory implies all children must be renamed.
560
            # Note: changes_from() doesn't handle this
6809.4.7 by Jelmer Vernooij
Swap arguments for get_symlink_target and kind/stored_kind.
561
            if kind == 'directory' and tree_old.kind(oldpath, id_) == 'directory':
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
562
                for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
563
                    if e.kind == 'directory' and self.plain_format:
564
                        continue
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
565
                    old_child_path = osutils.pathjoin(oldpath, p)
566
                    new_child_path = osutils.pathjoin(newpath, p)
567
                    must_be_renamed[old_child_path] = new_child_path
568
569
        # Add children not already renamed
570
        if must_be_renamed:
571
            renamed_already = set(old_to_new.keys())
572
            still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
573
            for old_child_path in sorted(still_to_be_renamed):
574
                new_child_path = must_be_renamed[old_child_path]
575
                if self.verbose:
576
                    self.note("implicitly renaming %s => %s" % (old_child_path,
577
                        new_child_path))
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
578
                file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"),
579
                    new_child_path.encode("utf-8")))
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
580
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
581
        # Record remaining deletes
582
        for path, id_, kind in deletes:
583
            if path not in deleted_paths:
584
                continue
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
585
            if kind == 'directory' and self.plain_format:
586
                continue
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
587
            #path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
588
            file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
589
        return file_cmds, modifies, renamed
590
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
591
    def _adjust_path_for_renames(self, path, renamed, revision_id):
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
592
        # If a previous rename is found, we should adjust the path
593
        for old, new in renamed:
594
            if path == old:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
595
                self.note("Changing path %s given rename to %s in revision %s"
596
                    % (path, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
597
                path = new
598
            elif path.startswith(old + '/'):
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
599
                self.note(
600
                    "Adjusting path %s given rename of %s to %s in revision %s"
601
                    % (path, old, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
602
                path = path.replace(old + "/", new + "/")
603
        return path
604
0.68.1 by Pieter de Bie
Classify bzr-fast-export
605
    def emit_tags(self):
606
        for tag, revid in self.branch.tags.get_tag_dict().items():
607
            try:
608
                mark = self.revid_to_mark[revid]
609
            except KeyError:
0.79.4 by Ian Clatworthy
use note and warning APIs
610
                self.warning('not creating tag %r pointing to non-existent '
611
                    'revision %s' % (tag, revid))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
612
            else:
0.64.288 by Jelmer Vernooij
Cope with non-ascii characters in tag names.
613
                git_ref = 'refs/tags/%s' % tag.encode("utf-8")
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
614
                if self.plain_format and not check_ref_format(git_ref):
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
615
                    if self.rewrite_tags:
0.133.3 by Oleksandr Usov
Implement comments from patch review:
616
                        new_ref = sanitize_ref_name_for_git(git_ref)
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
617
                        self.warning('tag %r is exported as %r to be valid in git.',
618
                                     git_ref, new_ref)
619
                        git_ref = new_ref
0.133.3 by Oleksandr Usov
Implement comments from patch review:
620
                    else:
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
621
                        self.warning('not creating tag %r as its name would not be '
622
                                     'valid in git.', git_ref)
623
                        continue
0.64.133 by Ian Clatworthy
Fix str + int concat in bzr-fast-export (Stéphane Raimbault)
624
                self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
625
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
626
    def _next_tmp_ref(self):
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
627
        """Return a unique branch name. The name will start with "tmp"."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
628
        prefix = 'tmp'
0.68.1 by Pieter de Bie
Classify bzr-fast-export
629
        if prefix not in self.branch_names:
630
            self.branch_names[prefix] = 0
631
        else:
632
            self.branch_names[prefix] += 1
633
            prefix = '%s.%d' % (prefix, self.branch_names[prefix])
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
634
        return 'refs/heads/%s' % prefix