/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.128 by Ian Clatworthy
fix encoding issue in bzr_exporter (Teemu Likonen)
1
# -*- coding: utf-8 -*-
2
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
3
# Copyright (C) 2008 Canonical Ltd
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
16
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
17
#
0.64.333 by Jelmer Vernooij
Inline bzr-fast-export license.
18
# Based on bzr-fast-export
19
# Copyright (c) 2008 Adeodato Simó
20
#
21
# Permission is hereby granted, free of charge, to any person obtaining
22
# a copy of this software and associated documentation files (the
23
# "Software"), to deal in the Software without restriction, including
24
# without limitation the rights to use, copy, modify, merge, publish,
25
# distribute, sublicense, and/or sell copies of the Software, and to
26
# permit persons to whom the Software is furnished to do so, subject to
27
# the following conditions:
28
#
29
# The above copyright notice and this permission notice shall be included
30
# in all copies or substantial portions of the Software.
31
#
32
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
35
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
36
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
37
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
38
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0.79.10 by Ian Clatworthy
documentation clean-ups
39
#
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
40
# vim: fileencoding=utf-8
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
41
42
"""Core engine for the fast-export command."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
43
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
44
from __future__ import absolute_import
45
0.79.7 by Ian Clatworthy
trivial bzr_exporter clean-ups
46
# TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
47
# is not updated (because the parent of commit is already merged, so we don't
48
# set new_git_branch to the previously used name)
49
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
50
from email.Utils import parseaddr
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
51
import sys, time, re
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
52
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
53
import breezy.branch
54
import breezy.revision
55
from ... import (
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
56
    builtins,
57
    errors as bazErrors,
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
58
    lazy_import,
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
59
    osutils,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
60
    progress,
61
    trace,
62
    )
0.79.4 by Ian Clatworthy
use note and warning APIs
63
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
64
from . import (
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
65
    helpers,
66
    marks_file,
67
    )
0.79.2 by Ian Clatworthy
extend & use marks_file API
68
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
69
lazy_import.lazy_import(globals(),
70
"""
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
71
from fastimport import commands
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
72
""")
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
73
74
75
def _get_output_stream(destination):
76
    if destination is None or destination == '-':
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
77
        return helpers.binary_stream(sys.stdout)
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
78
    elif destination.endswith('gz'):
79
        import gzip
80
        return gzip.open(destination, 'wb')
81
    else:
82
        return open(destination, 'wb')
83
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
84
# from dulwich.repo:
85
def check_ref_format(refname):
86
    """Check if a refname is correctly formatted.
87
88
    Implements all the same rules as git-check-ref-format[1].
89
90
    [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
91
92
    :param refname: The refname to check
93
    :return: True if refname is valid, False otherwise
94
    """
95
    # These could be combined into one big expression, but are listed separately
96
    # to parallel [1].
97
    if '/.' in refname or refname.startswith('.'):
98
        return False
99
    if '/' not in refname:
100
        return False
101
    if '..' in refname:
102
        return False
103
    for c in refname:
104
        if ord(c) < 040 or c in '\177 ~^:?*[':
105
            return False
106
    if refname[-1] in '/.':
107
        return False
108
    if refname.endswith('.lock'):
109
        return False
110
    if '@{' in refname:
111
        return False
112
    if '\\' in refname:
113
        return False
114
    return True
115
0.133.3 by Oleksandr Usov
Implement comments from patch review:
116
117
def sanitize_ref_name_for_git(refname):
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
118
    """Rewrite refname so that it will be accepted by git-fast-import.
119
    For the detailed rules see check_ref_format.
120
121
    By rewriting the refname we are breaking uniqueness guarantees provided by bzr
122
    so we have to manually
123
    verify that resulting ref names are unique.
124
125
    :param refname: refname to rewrite
126
    :return: new refname
127
    """
0.133.3 by Oleksandr Usov
Implement comments from patch review:
128
    new_refname = re.sub(
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
129
        # '/.' in refname or startswith '.'
130
        r"/\.|^\."
131
        # '..' in refname
132
        r"|\.\."
133
        # ord(c) < 040
134
        r"|[" + "".join([chr(x) for x in range(040)]) + r"]"
135
        # c in '\177 ~^:?*['
136
        r"|[\177 ~^:?*[]"
137
        # last char in "/."
138
        r"|[/.]$"
139
        # endswith '.lock'
140
        r"|.lock$"
141
        # "@{" in refname
142
        r"|@{"
143
        # "\\" in refname
144
        r"|\\",
145
        "_", refname)
0.133.3 by Oleksandr Usov
Implement comments from patch review:
146
    return new_refname
0.64.173 by Ian Clatworthy
add -r option to fast-export
147
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
148
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
149
class BzrFastExporter(object):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
150
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
151
    def __init__(self, source, outf, ref=None, checkpoint=-1,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
152
        import_marks_file=None, export_marks_file=None, revision=None,
0.135.1 by Andy Grimm
Add --baseline option
153
        verbose=False, plain_format=False, rewrite_tags=False,
0.138.1 by Oleksandr Usov
Add --no-tags flag
154
        no_tags=False, baseline=False):
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
155
        """Export branch data in fast import format.
156
157
        :param plain_format: if True, 'classic' fast-import format is
0.64.337 by Jelmer Vernooij
Merge support for --rewrite-tag-names.
158
            used without any extended features; if False, the generated
159
            data is richer and includes information like multiple
160
            authors, revision properties, etc.
161
        :param rewrite_tags: if True and if plain_format is set, tag names
162
            will be rewritten to be git-compatible.
163
            Otherwise tags which aren't valid for git will be skipped if
164
            plain_format is set.
0.138.1 by Oleksandr Usov
Add --no-tags flag
165
        :param no_tags: if True tags won't be exported at all
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
166
        """
0.64.339 by Jelmer Vernooij
Some refactoring of exporter.
167
        self.branch = source
168
        self.outf = outf
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
169
        self.ref = ref
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
170
        self.checkpoint = checkpoint
171
        self.import_marks_file = import_marks_file
172
        self.export_marks_file = export_marks_file
0.64.173 by Ian Clatworthy
add -r option to fast-export
173
        self.revision = revision
174
        self.excluded_revisions = set()
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
175
        self.plain_format = plain_format
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
176
        self.rewrite_tags = rewrite_tags
0.138.1 by Oleksandr Usov
Add --no-tags flag
177
        self.no_tags = no_tags
0.135.1 by Andy Grimm
Add --baseline option
178
        self.baseline = baseline
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
179
        self._multi_author_api_available = hasattr(breezy.revision.Revision,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
180
            'get_apparent_authors')
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
181
        self.properties_to_exclude = ['authors', 'author']
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
182
183
        # Progress reporting stuff
184
        self.verbose = verbose
185
        if verbose:
186
            self.progress_every = 100
187
        else:
188
            self.progress_every = 1000
189
        self._start_time = time.time()
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
190
        self._commit_total = 0
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
191
192
        # Load the marks and initialise things accordingly
0.68.1 by Pieter de Bie
Classify bzr-fast-export
193
        self.revid_to_mark = {}
194
        self.branch_names = {}
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
195
        if self.import_marks_file:
0.79.2 by Ian Clatworthy
extend & use marks_file API
196
            marks_info = marks_file.import_marks(self.import_marks_file)
197
            if marks_info is not None:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
198
                self.revid_to_mark = dict((r, m) for m, r in
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
199
                    marks_info.items())
200
                # These are no longer included in the marks file
201
                #self.branch_names = marks_info[1]
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
202
0.64.173 by Ian Clatworthy
add -r option to fast-export
203
    def interesting_history(self):
204
        if self.revision:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
205
            rev1, rev2 = builtins._get_revision_range(self.revision,
206
                self.branch, "fast-export")
0.64.173 by Ian Clatworthy
add -r option to fast-export
207
            start_rev_id = rev1.rev_id
208
            end_rev_id = rev2.rev_id
209
        else:
210
            start_rev_id = None
211
            end_rev_id = None
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
212
        self.note("Calculating the revisions to include ...")
0.64.341 by Jelmer Vernooij
Fix test, clarify help description for 'bzr fast-export'.
213
        view_revisions = [rev_id for rev_id, _, _, _ in
214
            self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)]
215
        view_revisions.reverse()
0.64.173 by Ian Clatworthy
add -r option to fast-export
216
        # If a starting point was given, we need to later check that we don't
217
        # start emitting revisions from before that point. Collect the
218
        # revisions to exclude now ...
219
        if start_rev_id is not None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
220
            self.note("Calculating the revisions to exclude ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
221
            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
222
                self.branch.iter_merge_sorted_revisions(start_rev_id)])
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
223
            if self.baseline:
224
                # needed so the first relative commit knows its parent
225
                self.excluded_revisions.remove(start_rev_id)
226
                view_revisions.insert(0, start_rev_id)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
227
        return list(view_revisions)
0.64.173 by Ian Clatworthy
add -r option to fast-export
228
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
229
    def run(self):
230
        # Export the data
6754.8.4 by Jelmer Vernooij
Use new context stuff.
231
        with self.branch.repository.lock_read():
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
232
            interesting = self.interesting_history()
0.102.15 by Ian Clatworthy
add revision count to 'Starting export ...' message
233
            self._commit_total = len(interesting)
234
            self.note("Starting export of %d revisions ..." %
235
                self._commit_total)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
236
            if not self.plain_format:
237
                self.emit_features()
0.135.1 by Andy Grimm
Add --baseline option
238
            if self.baseline:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
239
                self.emit_baseline(interesting.pop(0), self.ref)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
240
            for revid in interesting:
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
241
                self.emit_commit(revid, self.ref)
0.138.1 by Oleksandr Usov
Add --no-tags flag
242
            if self.branch.supports_tags() and not self.no_tags:
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
243
                self.emit_tags()
0.68.1 by Pieter de Bie
Classify bzr-fast-export
244
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
245
        # Save the marks if requested
0.79.2 by Ian Clatworthy
extend & use marks_file API
246
        self._save_marks()
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
247
        self.dump_stats()
248
249
    def note(self, msg, *args):
250
        """Output a note but timestamp it."""
251
        msg = "%s %s" % (self._time_of_day(), msg)
252
        trace.note(msg, *args)
253
254
    def warning(self, msg, *args):
255
        """Output a warning but timestamp it."""
256
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
257
        trace.warning(msg, *args)
258
259
    def _time_of_day(self):
260
        """Time of day as a string."""
261
        # Note: this is a separate method so tests can patch in a fixed value
262
        return time.strftime("%H:%M:%S")
263
264
    def report_progress(self, commit_count, details=''):
265
        if commit_count and commit_count % self.progress_every == 0:
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
266
            if self._commit_total:
267
                counts = "%d/%d" % (commit_count, self._commit_total)
268
            else:
269
                counts = "%d" % (commit_count,)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
270
            minutes = (time.time() - self._start_time) / 60
271
            rate = commit_count * 1.0 / minutes
272
            if rate > 10:
273
                rate_str = "at %.0f/minute " % rate
274
            else:
275
                rate_str = "at %.1f/minute " % rate
276
            self.note("%s commits exported %s%s" % (counts, rate_str, details))
277
278
    def dump_stats(self):
279
        time_required = progress.str_tdelta(time.time() - self._start_time)
280
        rc = len(self.revid_to_mark)
281
        self.note("Exported %d %s in %s",
6628.1.5 by Jelmer Vernooij
Consistently use fastimport feature.
282
            rc, helpers.single_plural(rc, "revision", "revisions"),
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
283
            time_required)
0.79.2 by Ian Clatworthy
extend & use marks_file API
284
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
285
    def print_cmd(self, cmd):
286
        self.outf.write("%r\n" % cmd)
287
0.79.2 by Ian Clatworthy
extend & use marks_file API
288
    def _save_marks(self):
289
        if self.export_marks_file:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
290
            revision_ids = dict((m, r) for r, m in self.revid_to_mark.items())
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
291
            marks_file.export_marks(self.export_marks_file, revision_ids)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
292
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
293
    def is_empty_dir(self, tree, path):
294
        path_id = tree.path2id(path)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
295
        if path_id is None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
296
            self.warning("Skipping empty_dir detection - no file_id for %s" %
297
                (path,))
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
298
            return False
299
300
        # Continue if path is not a directory
301
        if tree.kind(path_id) != 'directory':
302
            return False
303
304
        # Use treewalk to find the contents of our directory
305
        contents = list(tree.walkdirs(prefix=path))[0]
306
        if len(contents[1]) == 0:
307
            return True
308
        else:
309
            return False
310
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
311
    def emit_features(self):
0.102.5 by Ian Clatworthy
Define feature names in one place
312
        for feature in sorted(commands.FEATURE_NAMES):
313
            self.print_cmd(commands.FeatureCommand(feature))
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
314
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
315
    def emit_baseline(self, revid, ref):
0.135.1 by Andy Grimm
Add --baseline option
316
        # Emit a full source tree of the first commit's parent
317
        revobj = self.branch.repository.get_revision(revid)
318
        mark = 1
0.135.2 by Andy Grimm
fix --baseline bugs, and add a couple of tests
319
        self.revid_to_mark[revid] = mark
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
320
        file_cmds = self._get_filecommands(breezy.revision.NULL_REVISION, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
321
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.135.1 by Andy Grimm
Add --baseline option
322
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
323
    def emit_commit(self, revid, ref):
0.64.173 by Ian Clatworthy
add -r option to fast-export
324
        if revid in self.revid_to_mark or revid in self.excluded_revisions:
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
325
            return
326
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
327
        # Get the Revision object
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
328
        try:
329
            revobj = self.branch.repository.get_revision(revid)
330
        except bazErrors.NoSuchRevision:
331
            # This is a ghost revision. Mark it as not found and next!
332
            self.revid_to_mark[revid] = -1
333
            return
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
334
 
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
335
        # Get the primary parent
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
336
        # TODO: Consider the excluded revisions when deciding the parents.
337
        # Currently, a commit with parents that are excluded ought to be
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
338
        # triggering the ref calculation below (and it is not).
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
339
        # IGC 20090824
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
340
        ncommits = len(self.revid_to_mark)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
341
        nparents = len(revobj.parent_ids)
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
342
        if nparents == 0:
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
343
            if ncommits:
344
                # This is a parentless commit but it's not the first one
345
                # output. We need to create a new temporary branch for it
346
                # otherwise git-fast-import will assume the previous commit
347
                # was this one's parent
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
348
                ref = self._next_tmp_ref()
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
349
            parent = breezy.revision.NULL_REVISION
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
350
        else:
351
            parent = revobj.parent_ids[0]
352
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
353
        # Print the commit
0.98.1 by Gonéri Le Bouder
add the missing ":" since revid_to_mark are "committish"
354
        mark = ncommits + 1
0.64.221 by Ian Clatworthy
backout git-bzr fix as it was breaking fast-export
355
        self.revid_to_mark[revid] = mark
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
356
        file_cmds = self._get_filecommands(parent, revid)
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
357
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
358
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
359
        # Report progress and checkpoint if it's time for that
360
        self.report_progress(ncommits)
361
        if (self.checkpoint > 0 and ncommits
362
            and ncommits % self.checkpoint == 0):
363
            self.note("Exported %i commits - adding checkpoint to output"
364
                % ncommits)
365
            self._save_marks()
366
            self.print_cmd(commands.CheckpointCommand())
367
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
368
    def _get_name_email(self, user):
369
        if user.find('<') == -1:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
370
            # If the email isn't inside <>, we need to use it as the name
371
            # in order for things to round-trip correctly.
372
            # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
373
            name = user
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
374
            email = ''
375
        else:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
376
            name, email = parseaddr(user)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
377
        return name.encode("utf-8"), email.encode("utf-8")
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
378
379
    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
380
        # Get the committer and author info
381
        committer = revobj.committer
382
        name, email = self._get_name_email(committer)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
383
        committer_info = (name, email, revobj.timestamp, revobj.timezone)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
384
        if self._multi_author_api_available:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
385
            more_authors = revobj.get_apparent_authors()
386
            author = more_authors.pop(0)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
387
        else:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
388
            more_authors = []
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
389
            author = revobj.get_apparent_author()
0.64.291 by Jelmer Vernooij
In plain mode, don't export multiple authors.
390
        if not self.plain_format and more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
391
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
392
            author_info = (name, email, revobj.timestamp, revobj.timezone)
393
            more_author_info = []
394
            for a in more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
395
                name, email = self._get_name_email(a)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
396
                more_author_info.append(
397
                    (name, email, revobj.timestamp, revobj.timezone))
398
        elif author != committer:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
399
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
400
            author_info = (name, email, revobj.timestamp, revobj.timezone)
401
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
402
        else:
403
            author_info = None
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
404
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
405
406
        # Get the parents in terms of marks
407
        non_ghost_parents = []
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
408
        for p in revobj.parent_ids:
0.64.173 by Ian Clatworthy
add -r option to fast-export
409
            if p in self.excluded_revisions:
410
                continue
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
411
            try:
412
                parent_mark = self.revid_to_mark[p]
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
413
                non_ghost_parents.append(":%s" % parent_mark)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
414
            except KeyError:
415
                # ghost - ignore
416
                continue
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
417
        if non_ghost_parents:
418
            from_ = non_ghost_parents[0]
419
            merges = non_ghost_parents[1:]
420
        else:
421
            from_ = None
422
            merges = None
423
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
424
        # Filter the revision properties. Some metadata (like the
425
        # author information) is already exposed in other ways so
426
        # don't repeat it here.
427
        if self.plain_format:
428
            properties = None
429
        else:
430
            properties = revobj.properties
431
            for prop in self.properties_to_exclude:
432
                try:
433
                    del properties[prop]
434
                except KeyError:
435
                    pass
436
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
437
        # Build and return the result
0.64.361 by Jelmer Vernooij
Fix compatibility with newer versions of python-fastimport.
438
        return commands.CommitCommand(git_ref, str(mark), author_info,
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
439
            committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds),
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
440
            more_authors=more_author_info, properties=properties)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
441
442
    def _get_revision_trees(self, parent, revision_id):
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
443
        try:
444
            tree_old = self.branch.repository.revision_tree(parent)
445
        except bazErrors.UnexpectedInventoryFormat:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
446
            self.warning("Parent is malformed - diffing against previous parent")
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
447
            # We can't find the old parent. Let's diff against his parent
448
            pp = self.branch.repository.get_revision(parent)
449
            tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
450
        tree_new = None
451
        try:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
452
            tree_new = self.branch.repository.revision_tree(revision_id)
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
453
        except bazErrors.UnexpectedInventoryFormat:
454
            # We can't really do anything anymore
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
455
            self.warning("Revision %s is malformed - skipping" % revision_id)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
456
        return tree_old, tree_new
0.68.1 by Pieter de Bie
Classify bzr-fast-export
457
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
458
    def _get_filecommands(self, parent, revision_id):
459
        """Get the list of FileCommands for the changes between two revisions."""
460
        tree_old, tree_new = self._get_revision_trees(parent, revision_id)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
461
        if not(tree_old and tree_new):
462
            # Something is wrong with this revision - ignore the filecommands
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
463
            return []
0.68.1 by Pieter de Bie
Classify bzr-fast-export
464
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
465
        changes = tree_new.changes_from(tree_old)
466
467
        # Make "modified" have 3-tuples, as added does
468
        my_modified = [ x[0:3] for x in changes.modified ]
469
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
470
        # The potential interaction between renames and deletes is messy.
471
        # Handle it here ...
472
        file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
473
            changes.renamed, changes.removed, revision_id, tree_old)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
474
475
        # Map kind changes to a delete followed by an add
476
        for path, id_, kind1, kind2 in changes.kind_changed:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
477
            path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
478
            # IGC: I don't understand why a delete is needed here.
479
            # In fact, it seems harmful? If you uncomment this line,
480
            # please file a bug explaining why you needed to.
481
            #file_cmds.append(commands.FileDeleteCommand(path))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
482
            my_modified.append((path, id_, kind2))
483
484
        # Record modifications
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
485
        for path, id_, kind in changes.added + my_modified + rd_modifies:
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
486
            if kind == 'file':
487
                text = tree_new.get_file_text(id_)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
488
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
489
                    helpers.kind_to_mode('file', tree_new.is_executable(id_)),
490
                    None, text))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
491
            elif kind == 'symlink':
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
492
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
493
                    helpers.kind_to_mode('symlink', False),
494
                    None, tree_new.get_symlink_target(id_)))
0.102.14 by Ian Clatworthy
export and import empty directories
495
            elif kind == 'directory':
0.105.1 by John Whitley
Don't emit directory info when plain format is specified.
496
                if not self.plain_format:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
497
                    file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
498
                        helpers.kind_to_mode('directory', False),
499
                        None, None))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
500
            else:
0.102.14 by Ian Clatworthy
export and import empty directories
501
                self.warning("cannot export '%s' of kind %s yet - ignoring" %
502
                    (path, kind))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
503
        return file_cmds
504
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
505
    def _process_renames_and_deletes(self, renames, deletes,
506
        revision_id, tree_old):
507
        file_cmds = []
508
        modifies = []
509
        renamed = []
510
511
        # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933.
512
        # In a nutshell, there are several nasty cases:
513
        #
514
        # 1) bzr rm a; bzr mv b a; bzr commit
515
        # 2) bzr mv x/y z; bzr rm x; commmit
516
        #
517
        # The first must come out with the delete first like this:
518
        #
519
        # D a
520
        # R b a
521
        #
522
        # The second case must come out with the rename first like this:
523
        #
524
        # R x/y z
525
        # D x
526
        #
527
        # So outputting all deletes first or all renames first won't work.
528
        # Instead, we need to make multiple passes over the various lists to
529
        # get the ordering right.
530
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
531
        must_be_renamed = {}
532
        old_to_new = {}
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
533
        deleted_paths = set([p for p, _, _ in deletes])
534
        for (oldpath, newpath, id_, kind,
535
                text_modified, meta_modified) in renames:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
536
            emit = kind != 'directory' or not self.plain_format
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
537
            if newpath in deleted_paths:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
538
                if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
539
                    file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
540
                deleted_paths.remove(newpath)
541
            if (self.is_empty_dir(tree_old, oldpath)):
542
                self.note("Skipping empty dir %s in rev %s" % (oldpath,
543
                    revision_id))
544
                continue
545
            #oldpath = self._adjust_path_for_renames(oldpath, renamed,
546
            #    revision_id)
547
            renamed.append([oldpath, newpath])
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
548
            old_to_new[oldpath] = newpath
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
549
            if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
550
                file_cmds.append(
551
                    commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
552
            if text_modified or meta_modified:
553
                modifies.append((newpath, id_, kind))
554
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
555
            # Renaming a directory implies all children must be renamed.
556
            # Note: changes_from() doesn't handle this
0.64.329 by Jelmer Vernooij
Support exporting files that are changed into directories.
557
            if kind == 'directory' and tree_old.kind(id_) == 'directory':
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
558
                for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
559
                    if e.kind == 'directory' and self.plain_format:
560
                        continue
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
561
                    old_child_path = osutils.pathjoin(oldpath, p)
562
                    new_child_path = osutils.pathjoin(newpath, p)
563
                    must_be_renamed[old_child_path] = new_child_path
564
565
        # Add children not already renamed
566
        if must_be_renamed:
567
            renamed_already = set(old_to_new.keys())
568
            still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
569
            for old_child_path in sorted(still_to_be_renamed):
570
                new_child_path = must_be_renamed[old_child_path]
571
                if self.verbose:
572
                    self.note("implicitly renaming %s => %s" % (old_child_path,
573
                        new_child_path))
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
574
                file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"),
575
                    new_child_path.encode("utf-8")))
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
576
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
577
        # Record remaining deletes
578
        for path, id_, kind in deletes:
579
            if path not in deleted_paths:
580
                continue
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
581
            if kind == 'directory' and self.plain_format:
582
                continue
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
583
            #path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
584
            file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
585
        return file_cmds, modifies, renamed
586
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
587
    def _adjust_path_for_renames(self, path, renamed, revision_id):
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
588
        # If a previous rename is found, we should adjust the path
589
        for old, new in renamed:
590
            if path == old:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
591
                self.note("Changing path %s given rename to %s in revision %s"
592
                    % (path, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
593
                path = new
594
            elif path.startswith(old + '/'):
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
595
                self.note(
596
                    "Adjusting path %s given rename of %s to %s in revision %s"
597
                    % (path, old, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
598
                path = path.replace(old + "/", new + "/")
599
        return path
600
0.68.1 by Pieter de Bie
Classify bzr-fast-export
601
    def emit_tags(self):
602
        for tag, revid in self.branch.tags.get_tag_dict().items():
603
            try:
604
                mark = self.revid_to_mark[revid]
605
            except KeyError:
0.79.4 by Ian Clatworthy
use note and warning APIs
606
                self.warning('not creating tag %r pointing to non-existent '
607
                    'revision %s' % (tag, revid))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
608
            else:
0.64.288 by Jelmer Vernooij
Cope with non-ascii characters in tag names.
609
                git_ref = 'refs/tags/%s' % tag.encode("utf-8")
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
610
                if self.plain_format and not check_ref_format(git_ref):
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
611
                    if self.rewrite_tags:
0.133.3 by Oleksandr Usov
Implement comments from patch review:
612
                        new_ref = sanitize_ref_name_for_git(git_ref)
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
613
                        self.warning('tag %r is exported as %r to be valid in git.',
614
                                     git_ref, new_ref)
615
                        git_ref = new_ref
0.133.3 by Oleksandr Usov
Implement comments from patch review:
616
                    else:
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
617
                        self.warning('not creating tag %r as its name would not be '
618
                                     'valid in git.', git_ref)
619
                        continue
0.64.133 by Ian Clatworthy
Fix str + int concat in bzr-fast-export (Stéphane Raimbault)
620
                self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
621
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
622
    def _next_tmp_ref(self):
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
623
        """Return a unique branch name. The name will start with "tmp"."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
624
        prefix = 'tmp'
0.68.1 by Pieter de Bie
Classify bzr-fast-export
625
        if prefix not in self.branch_names:
626
            self.branch_names[prefix] = 0
627
        else:
628
            self.branch_names[prefix] += 1
629
            prefix = '%s.%d' % (prefix, self.branch_names[prefix])
0.64.350 by Jelmer Vernooij
Rename git_branch to ref.
630
        return 'refs/heads/%s' % prefix