/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.128 by Ian Clatworthy
fix encoding issue in bzr_exporter (Teemu Likonen)
1
# -*- coding: utf-8 -*-
2
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
3
# Copyright (C) 2008 Canonical Ltd
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
16
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
17
#
0.64.333 by Jelmer Vernooij
Inline bzr-fast-export license.
18
# Based on bzr-fast-export
19
# Copyright (c) 2008 Adeodato Simó
20
#
21
# Permission is hereby granted, free of charge, to any person obtaining
22
# a copy of this software and associated documentation files (the
23
# "Software"), to deal in the Software without restriction, including
24
# without limitation the rights to use, copy, modify, merge, publish,
25
# distribute, sublicense, and/or sell copies of the Software, and to
26
# permit persons to whom the Software is furnished to do so, subject to
27
# the following conditions:
28
#
29
# The above copyright notice and this permission notice shall be included
30
# in all copies or substantial portions of the Software.
31
#
32
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
35
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
36
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
37
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
38
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0.79.10 by Ian Clatworthy
documentation clean-ups
39
#
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
40
# vim: fileencoding=utf-8
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
41
42
"""Core engine for the fast-export command."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
43
0.79.7 by Ian Clatworthy
trivial bzr_exporter clean-ups
44
# TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
45
# is not updated (because the parent of commit is already merged, so we don't
46
# set new_git_branch to the previously used name)
47
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
48
from email.Utils import parseaddr
0.64.173 by Ian Clatworthy
add -r option to fast-export
49
import sys, time
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
50
51
import bzrlib.branch
52
import bzrlib.revision
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
53
from bzrlib import (
54
    builtins,
55
    errors as bazErrors,
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
56
    osutils,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
57
    progress,
58
    trace,
59
    )
0.79.4 by Ian Clatworthy
use note and warning APIs
60
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
61
from bzrlib.plugins.fastimport import (
62
    helpers,
63
    marks_file,
64
    )
0.79.2 by Ian Clatworthy
extend & use marks_file API
65
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
66
from fastimport import commands
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
67
from fastimport.helpers import (
68
    binary_stream,
69
    single_plural,
70
    )
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
71
72
73
def _get_output_stream(destination):
74
    if destination is None or destination == '-':
75
        return binary_stream(sys.stdout)
76
    elif destination.endswith('gz'):
77
        import gzip
78
        return gzip.open(destination, 'wb')
79
    else:
80
        return open(destination, 'wb')
81
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
82
# from dulwich.repo:
83
def check_ref_format(refname):
84
    """Check if a refname is correctly formatted.
85
86
    Implements all the same rules as git-check-ref-format[1].
87
88
    [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
89
90
    :param refname: The refname to check
91
    :return: True if refname is valid, False otherwise
92
    """
93
    # These could be combined into one big expression, but are listed separately
94
    # to parallel [1].
95
    if '/.' in refname or refname.startswith('.'):
96
        return False
97
    if '/' not in refname:
98
        return False
99
    if '..' in refname:
100
        return False
101
    for c in refname:
102
        if ord(c) < 040 or c in '\177 ~^:?*[':
103
            return False
104
    if refname[-1] in '/.':
105
        return False
106
    if refname.endswith('.lock'):
107
        return False
108
    if '@{' in refname:
109
        return False
110
    if '\\' in refname:
111
        return False
112
    return True
113
114
0.64.173 by Ian Clatworthy
add -r option to fast-export
115
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
116
class BzrFastExporter(object):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
117
0.64.222 by Ian Clatworthy
Support an explicit output destination for bzr fast-export
118
    def __init__(self, source, destination, git_branch=None, checkpoint=-1,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
119
        import_marks_file=None, export_marks_file=None, revision=None,
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
120
        verbose=False, plain_format=False):
121
        """Export branch data in fast import format.
122
123
        :param plain_format: if True, 'classic' fast-import format is
124
          used without any extended features; if False, the generated
125
          data is richer and includes information like multiple
126
          authors, revision properties, etc.
127
        """
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
128
        self.source = source
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
129
        self.outf = _get_output_stream(destination)
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
130
        self.git_branch = git_branch
131
        self.checkpoint = checkpoint
132
        self.import_marks_file = import_marks_file
133
        self.export_marks_file = export_marks_file
0.64.173 by Ian Clatworthy
add -r option to fast-export
134
        self.revision = revision
135
        self.excluded_revisions = set()
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
136
        self.plain_format = plain_format
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
137
        self._multi_author_api_available = hasattr(bzrlib.revision.Revision,
138
            'get_apparent_authors')
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
139
        self.properties_to_exclude = ['authors', 'author']
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
140
141
        # Progress reporting stuff
142
        self.verbose = verbose
143
        if verbose:
144
            self.progress_every = 100
145
        else:
146
            self.progress_every = 1000
147
        self._start_time = time.time()
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
148
        self._commit_total = 0
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
149
150
        # Load the marks and initialise things accordingly
0.68.1 by Pieter de Bie
Classify bzr-fast-export
151
        self.revid_to_mark = {}
152
        self.branch_names = {}
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
153
        if self.import_marks_file:
0.79.2 by Ian Clatworthy
extend & use marks_file API
154
            marks_info = marks_file.import_marks(self.import_marks_file)
155
            if marks_info is not None:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
156
                self.revid_to_mark = dict((r, m) for m, r in
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
157
                    marks_info.items())
158
                # These are no longer included in the marks file
159
                #self.branch_names = marks_info[1]
0.64.173 by Ian Clatworthy
add -r option to fast-export
160
 
161
    def interesting_history(self):
162
        if self.revision:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
163
            rev1, rev2 = builtins._get_revision_range(self.revision,
164
                self.branch, "fast-export")
0.64.173 by Ian Clatworthy
add -r option to fast-export
165
            start_rev_id = rev1.rev_id
166
            end_rev_id = rev2.rev_id
167
        else:
168
            start_rev_id = None
169
            end_rev_id = None
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
170
        self.note("Calculating the revisions to include ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
171
        view_revisions = reversed([rev_id for rev_id, _, _, _ in
172
            self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)])
0.64.173 by Ian Clatworthy
add -r option to fast-export
173
        # If a starting point was given, we need to later check that we don't
174
        # start emitting revisions from before that point. Collect the
175
        # revisions to exclude now ...
176
        if start_rev_id is not None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
177
            self.note("Calculating the revisions to exclude ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
178
            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
179
                self.branch.iter_merge_sorted_revisions(start_rev_id)])
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
180
        return list(view_revisions)
0.64.173 by Ian Clatworthy
add -r option to fast-export
181
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
182
    def run(self):
183
        # Open the source
184
        self.branch = bzrlib.branch.Branch.open_containing(self.source)[0]
185
186
        # Export the data
0.68.1 by Pieter de Bie
Classify bzr-fast-export
187
        self.branch.repository.lock_read()
188
        try:
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
189
            interesting = self.interesting_history()
0.102.15 by Ian Clatworthy
add revision count to 'Starting export ...' message
190
            self._commit_total = len(interesting)
191
            self.note("Starting export of %d revisions ..." %
192
                self._commit_total)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
193
            if not self.plain_format:
194
                self.emit_features()
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
195
            for revid in interesting:
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
196
                self.emit_commit(revid, self.git_branch)
197
            if self.branch.supports_tags():
198
                self.emit_tags()
0.68.1 by Pieter de Bie
Classify bzr-fast-export
199
        finally:
200
            self.branch.repository.unlock()
201
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
202
        # Save the marks if requested
0.79.2 by Ian Clatworthy
extend & use marks_file API
203
        self._save_marks()
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
204
        self.dump_stats()
205
206
    def note(self, msg, *args):
207
        """Output a note but timestamp it."""
208
        msg = "%s %s" % (self._time_of_day(), msg)
209
        trace.note(msg, *args)
210
211
    def warning(self, msg, *args):
212
        """Output a warning but timestamp it."""
213
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
214
        trace.warning(msg, *args)
215
216
    def _time_of_day(self):
217
        """Time of day as a string."""
218
        # Note: this is a separate method so tests can patch in a fixed value
219
        return time.strftime("%H:%M:%S")
220
221
    def report_progress(self, commit_count, details=''):
222
        if commit_count and commit_count % self.progress_every == 0:
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
223
            if self._commit_total:
224
                counts = "%d/%d" % (commit_count, self._commit_total)
225
            else:
226
                counts = "%d" % (commit_count,)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
227
            minutes = (time.time() - self._start_time) / 60
228
            rate = commit_count * 1.0 / minutes
229
            if rate > 10:
230
                rate_str = "at %.0f/minute " % rate
231
            else:
232
                rate_str = "at %.1f/minute " % rate
233
            self.note("%s commits exported %s%s" % (counts, rate_str, details))
234
235
    def dump_stats(self):
236
        time_required = progress.str_tdelta(time.time() - self._start_time)
237
        rc = len(self.revid_to_mark)
238
        self.note("Exported %d %s in %s",
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
239
            rc, single_plural(rc, "revision", "revisions"),
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
240
            time_required)
0.79.2 by Ian Clatworthy
extend & use marks_file API
241
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
242
    def print_cmd(self, cmd):
243
        self.outf.write("%r\n" % cmd)
244
0.79.2 by Ian Clatworthy
extend & use marks_file API
245
    def _save_marks(self):
246
        if self.export_marks_file:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
247
            revision_ids = dict((m, r) for r, m in self.revid_to_mark.items())
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
248
            marks_file.export_marks(self.export_marks_file, revision_ids)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
249
 
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
250
    def is_empty_dir(self, tree, path):
251
        path_id = tree.path2id(path)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
252
        if path_id is None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
253
            self.warning("Skipping empty_dir detection - no file_id for %s" %
254
                (path,))
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
255
            return False
256
257
        # Continue if path is not a directory
258
        if tree.kind(path_id) != 'directory':
259
            return False
260
261
        # Use treewalk to find the contents of our directory
262
        contents = list(tree.walkdirs(prefix=path))[0]
263
        if len(contents[1]) == 0:
264
            return True
265
        else:
266
            return False
267
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
268
    def emit_features(self):
0.102.5 by Ian Clatworthy
Define feature names in one place
269
        for feature in sorted(commands.FEATURE_NAMES):
270
            self.print_cmd(commands.FeatureCommand(feature))
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
271
0.68.1 by Pieter de Bie
Classify bzr-fast-export
272
    def emit_commit(self, revid, git_branch):
0.64.173 by Ian Clatworthy
add -r option to fast-export
273
        if revid in self.revid_to_mark or revid in self.excluded_revisions:
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
274
            return
275
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
276
        # Get the Revision object
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
277
        try:
278
            revobj = self.branch.repository.get_revision(revid)
279
        except bazErrors.NoSuchRevision:
280
            # This is a ghost revision. Mark it as not found and next!
281
            self.revid_to_mark[revid] = -1
282
            return
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
283
 
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
284
        # Get the primary parent
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
285
        # TODO: Consider the excluded revisions when deciding the parents.
286
        # Currently, a commit with parents that are excluded ought to be
287
        # triggering the git_branch calculation below (and it is not).
288
        # IGC 20090824
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
289
        ncommits = len(self.revid_to_mark)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
290
        nparents = len(revobj.parent_ids)
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
291
        if nparents == 0:
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
292
            if ncommits:
293
                # This is a parentless commit but it's not the first one
294
                # output. We need to create a new temporary branch for it
295
                # otherwise git-fast-import will assume the previous commit
296
                # was this one's parent
297
                git_branch = self._next_tmp_branch_name()
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
298
            parent = bzrlib.revision.NULL_REVISION
299
        else:
300
            parent = revobj.parent_ids[0]
301
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
302
        # Print the commit
303
        git_ref = 'refs/heads/%s' % (git_branch,)
0.98.1 by Gonéri Le Bouder
add the missing ":" since revid_to_mark are "committish"
304
        mark = ncommits + 1
0.64.221 by Ian Clatworthy
backout git-bzr fix as it was breaking fast-export
305
        self.revid_to_mark[revid] = mark
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
306
        file_cmds = self._get_filecommands(parent, revid)
307
        self.print_cmd(self._get_commit_command(git_ref, mark, revobj,
308
            file_cmds))
309
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
310
        # Report progress and checkpoint if it's time for that
311
        self.report_progress(ncommits)
312
        if (self.checkpoint > 0 and ncommits
313
            and ncommits % self.checkpoint == 0):
314
            self.note("Exported %i commits - adding checkpoint to output"
315
                % ncommits)
316
            self._save_marks()
317
            self.print_cmd(commands.CheckpointCommand())
318
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
319
    def _get_name_email(self, user):
320
        if user.find('<') == -1:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
321
            # If the email isn't inside <>, we need to use it as the name
322
            # in order for things to round-trip correctly.
323
            # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
324
            name = user
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
325
            email = ''
326
        else:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
327
            name, email = parseaddr(user)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
328
        return name.encode("utf-8"), email.encode("utf-8")
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
329
330
    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
331
        # Get the committer and author info
332
        committer = revobj.committer
333
        name, email = self._get_name_email(committer)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
334
        committer_info = (name, email, revobj.timestamp, revobj.timezone)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
335
        if self._multi_author_api_available:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
336
            more_authors = revobj.get_apparent_authors()
337
            author = more_authors.pop(0)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
338
        else:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
339
            more_authors = []
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
340
            author = revobj.get_apparent_author()
0.64.291 by Jelmer Vernooij
In plain mode, don't export multiple authors.
341
        if not self.plain_format and more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
342
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
343
            author_info = (name, email, revobj.timestamp, revobj.timezone)
344
            more_author_info = []
345
            for a in more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
346
                name, email = self._get_name_email(a)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
347
                more_author_info.append(
348
                    (name, email, revobj.timestamp, revobj.timezone))
349
        elif author != committer:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
350
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
351
            author_info = (name, email, revobj.timestamp, revobj.timezone)
352
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
353
        else:
354
            author_info = None
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
355
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
356
357
        # Get the parents in terms of marks
358
        non_ghost_parents = []
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
359
        for p in revobj.parent_ids:
0.64.173 by Ian Clatworthy
add -r option to fast-export
360
            if p in self.excluded_revisions:
361
                continue
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
362
            try:
363
                parent_mark = self.revid_to_mark[p]
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
364
                non_ghost_parents.append(":%s" % parent_mark)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
365
            except KeyError:
366
                # ghost - ignore
367
                continue
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
368
        if non_ghost_parents:
369
            from_ = non_ghost_parents[0]
370
            merges = non_ghost_parents[1:]
371
        else:
372
            from_ = None
373
            merges = None
374
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
375
        # Filter the revision properties. Some metadata (like the
376
        # author information) is already exposed in other ways so
377
        # don't repeat it here.
378
        if self.plain_format:
379
            properties = None
380
        else:
381
            properties = revobj.properties
382
            for prop in self.properties_to_exclude:
383
                try:
384
                    del properties[prop]
385
                except KeyError:
386
                    pass
387
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
388
        # Build and return the result
389
        return commands.CommitCommand(git_ref, mark, author_info,
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
390
            committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds),
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
391
            more_authors=more_author_info, properties=properties)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
392
393
    def _get_revision_trees(self, parent, revision_id):
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
394
        try:
395
            tree_old = self.branch.repository.revision_tree(parent)
396
        except bazErrors.UnexpectedInventoryFormat:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
397
            self.warning("Parent is malformed - diffing against previous parent")
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
398
            # We can't find the old parent. Let's diff against his parent
399
            pp = self.branch.repository.get_revision(parent)
400
            tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
401
        tree_new = None
402
        try:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
403
            tree_new = self.branch.repository.revision_tree(revision_id)
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
404
        except bazErrors.UnexpectedInventoryFormat:
405
            # We can't really do anything anymore
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
406
            self.warning("Revision %s is malformed - skipping" % revision_id)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
407
        return tree_old, tree_new
0.68.1 by Pieter de Bie
Classify bzr-fast-export
408
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
409
    def _get_filecommands(self, parent, revision_id):
410
        """Get the list of FileCommands for the changes between two revisions."""
411
        tree_old, tree_new = self._get_revision_trees(parent, revision_id)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
412
        if not(tree_old and tree_new):
413
            # Something is wrong with this revision - ignore the filecommands
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
414
            return []
0.68.1 by Pieter de Bie
Classify bzr-fast-export
415
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
416
        changes = tree_new.changes_from(tree_old)
417
418
        # Make "modified" have 3-tuples, as added does
419
        my_modified = [ x[0:3] for x in changes.modified ]
420
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
421
        # The potential interaction between renames and deletes is messy.
422
        # Handle it here ...
423
        file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
424
            changes.renamed, changes.removed, revision_id, tree_old)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
425
426
        # Map kind changes to a delete followed by an add
427
        for path, id_, kind1, kind2 in changes.kind_changed:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
428
            path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
429
            # IGC: I don't understand why a delete is needed here.
430
            # In fact, it seems harmful? If you uncomment this line,
431
            # please file a bug explaining why you needed to.
432
            #file_cmds.append(commands.FileDeleteCommand(path))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
433
            my_modified.append((path, id_, kind2))
434
435
        # Record modifications
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
436
        for path, id_, kind in changes.added + my_modified + rd_modifies:
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
437
            if kind == 'file':
438
                text = tree_new.get_file_text(id_)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
439
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
440
                    helpers.kind_to_mode('file', tree_new.is_executable(id_)),
441
                    None, text))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
442
            elif kind == 'symlink':
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
443
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
444
                    helpers.kind_to_mode('symlink', False),
445
                    None, tree_new.get_symlink_target(id_)))
0.102.14 by Ian Clatworthy
export and import empty directories
446
            elif kind == 'directory':
0.105.1 by John Whitley
Don't emit directory info when plain format is specified.
447
                if not self.plain_format:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
448
                    file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
449
                        helpers.kind_to_mode('directory', False),
450
                        None, None))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
451
            else:
0.102.14 by Ian Clatworthy
export and import empty directories
452
                self.warning("cannot export '%s' of kind %s yet - ignoring" %
453
                    (path, kind))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
454
        return file_cmds
455
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
456
    def _process_renames_and_deletes(self, renames, deletes,
457
        revision_id, tree_old):
458
        file_cmds = []
459
        modifies = []
460
        renamed = []
461
462
        # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933.
463
        # In a nutshell, there are several nasty cases:
464
        #
465
        # 1) bzr rm a; bzr mv b a; bzr commit
466
        # 2) bzr mv x/y z; bzr rm x; commmit
467
        #
468
        # The first must come out with the delete first like this:
469
        #
470
        # D a
471
        # R b a
472
        #
473
        # The second case must come out with the rename first like this:
474
        #
475
        # R x/y z
476
        # D x
477
        #
478
        # So outputting all deletes first or all renames first won't work.
479
        # Instead, we need to make multiple passes over the various lists to
480
        # get the ordering right.
481
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
482
        must_be_renamed = {}
483
        old_to_new = {}
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
484
        deleted_paths = set([p for p, _, _ in deletes])
485
        for (oldpath, newpath, id_, kind,
486
                text_modified, meta_modified) in renames:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
487
            emit = kind != 'directory' or not self.plain_format
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
488
            if newpath in deleted_paths:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
489
                if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
490
                    file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
491
                deleted_paths.remove(newpath)
492
            if (self.is_empty_dir(tree_old, oldpath)):
493
                self.note("Skipping empty dir %s in rev %s" % (oldpath,
494
                    revision_id))
495
                continue
496
            #oldpath = self._adjust_path_for_renames(oldpath, renamed,
497
            #    revision_id)
498
            renamed.append([oldpath, newpath])
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
499
            old_to_new[oldpath] = newpath
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
500
            if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
501
                file_cmds.append(
502
                    commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
503
            if text_modified or meta_modified:
504
                modifies.append((newpath, id_, kind))
505
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
506
            # Renaming a directory implies all children must be renamed.
507
            # Note: changes_from() doesn't handle this
0.64.329 by Jelmer Vernooij
Support exporting files that are changed into directories.
508
            if kind == 'directory' and tree_old.kind(id_) == 'directory':
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
509
                for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
510
                    if e.kind == 'directory' and self.plain_format:
511
                        continue
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
512
                    old_child_path = osutils.pathjoin(oldpath, p)
513
                    new_child_path = osutils.pathjoin(newpath, p)
514
                    must_be_renamed[old_child_path] = new_child_path
515
516
        # Add children not already renamed
517
        if must_be_renamed:
518
            renamed_already = set(old_to_new.keys())
519
            still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
520
            for old_child_path in sorted(still_to_be_renamed):
521
                new_child_path = must_be_renamed[old_child_path]
522
                if self.verbose:
523
                    self.note("implicitly renaming %s => %s" % (old_child_path,
524
                        new_child_path))
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
525
                file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"),
526
                    new_child_path.encode("utf-8")))
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
527
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
528
        # Record remaining deletes
529
        for path, id_, kind in deletes:
530
            if path not in deleted_paths:
531
                continue
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
532
            if kind == 'directory' and self.plain_format:
533
                continue
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
534
            #path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
535
            file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
536
        return file_cmds, modifies, renamed
537
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
538
    def _adjust_path_for_renames(self, path, renamed, revision_id):
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
539
        # If a previous rename is found, we should adjust the path
540
        for old, new in renamed:
541
            if path == old:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
542
                self.note("Changing path %s given rename to %s in revision %s"
543
                    % (path, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
544
                path = new
545
            elif path.startswith(old + '/'):
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
546
                self.note(
547
                    "Adjusting path %s given rename of %s to %s in revision %s"
548
                    % (path, old, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
549
                path = path.replace(old + "/", new + "/")
550
        return path
551
0.68.1 by Pieter de Bie
Classify bzr-fast-export
552
    def emit_tags(self):
553
        for tag, revid in self.branch.tags.get_tag_dict().items():
554
            try:
555
                mark = self.revid_to_mark[revid]
556
            except KeyError:
0.79.4 by Ian Clatworthy
use note and warning APIs
557
                self.warning('not creating tag %r pointing to non-existent '
558
                    'revision %s' % (tag, revid))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
559
            else:
0.64.288 by Jelmer Vernooij
Cope with non-ascii characters in tag names.
560
                git_ref = 'refs/tags/%s' % tag.encode("utf-8")
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
561
                if self.plain_format and not check_ref_format(git_ref):
562
                    self.warning('not creating tag %r as its name would not be '
563
                                 'valid in git.', git_ref)
564
                    continue
0.64.133 by Ian Clatworthy
Fix str + int concat in bzr-fast-export (Stéphane Raimbault)
565
                self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
566
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
567
    def _next_tmp_branch_name(self):
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
568
        """Return a unique branch name. The name will start with "tmp"."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
569
        prefix = 'tmp'
0.68.1 by Pieter de Bie
Classify bzr-fast-export
570
        if prefix not in self.branch_names:
571
            self.branch_names[prefix] = 0
572
        else:
573
            self.branch_names[prefix] += 1
574
            prefix = '%s.%d' % (prefix, self.branch_names[prefix])
575
        return prefix