/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.128 by Ian Clatworthy
fix encoding issue in bzr_exporter (Teemu Likonen)
1
# -*- coding: utf-8 -*-
2
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
3
# Copyright (C) 2008 Canonical Ltd
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
#
0.79.10 by Ian Clatworthy
documentation clean-ups
19
# Original Copyright (c) 2008 Adeodato Simó
20
# Original License: MIT (See exporters/bzr-fast-export.LICENSE)
21
#
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
22
# vim: fileencoding=utf-8
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
23
24
"""Core engine for the fast-export command."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
25
0.79.7 by Ian Clatworthy
trivial bzr_exporter clean-ups
26
# TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
27
# is not updated (because the parent of commit is already merged, so we don't
28
# set new_git_branch to the previously used name)
29
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
30
from email.Utils import parseaddr
0.64.173 by Ian Clatworthy
add -r option to fast-export
31
import sys, time
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
32
33
import bzrlib.branch
34
import bzrlib.revision
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
35
from bzrlib import (
36
    builtins,
37
    errors as bazErrors,
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
38
    osutils,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
39
    progress,
40
    trace,
41
    )
0.79.4 by Ian Clatworthy
use note and warning APIs
42
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
43
from bzrlib.plugins.fastimport import (
44
    helpers,
45
    marks_file,
46
    )
0.79.2 by Ian Clatworthy
extend & use marks_file API
47
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
48
from fastimport import commands
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
49
from fastimport.helpers import (
50
    binary_stream,
51
    single_plural,
52
    )
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
53
54
55
def _get_output_stream(destination):
56
    if destination is None or destination == '-':
57
        return binary_stream(sys.stdout)
58
    elif destination.endswith('gz'):
59
        import gzip
60
        return gzip.open(destination, 'wb')
61
    else:
62
        return open(destination, 'wb')
63
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
64
# from dulwich.repo:
65
def check_ref_format(refname):
66
    """Check if a refname is correctly formatted.
67
68
    Implements all the same rules as git-check-ref-format[1].
69
70
    [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
71
72
    :param refname: The refname to check
73
    :return: True if refname is valid, False otherwise
74
    """
75
    # These could be combined into one big expression, but are listed separately
76
    # to parallel [1].
77
    if '/.' in refname or refname.startswith('.'):
78
        return False
79
    if '/' not in refname:
80
        return False
81
    if '..' in refname:
82
        return False
83
    for c in refname:
84
        if ord(c) < 040 or c in '\177 ~^:?*[':
85
            return False
86
    if refname[-1] in '/.':
87
        return False
88
    if refname.endswith('.lock'):
89
        return False
90
    if '@{' in refname:
91
        return False
92
    if '\\' in refname:
93
        return False
94
    return True
95
96
0.64.173 by Ian Clatworthy
add -r option to fast-export
97
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
98
class BzrFastExporter(object):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
99
0.64.222 by Ian Clatworthy
Support an explicit output destination for bzr fast-export
100
    def __init__(self, source, destination, git_branch=None, checkpoint=-1,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
101
        import_marks_file=None, export_marks_file=None, revision=None,
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
102
        verbose=False, plain_format=False):
103
        """Export branch data in fast import format.
104
105
        :param plain_format: if True, 'classic' fast-import format is
106
          used without any extended features; if False, the generated
107
          data is richer and includes information like multiple
108
          authors, revision properties, etc.
109
        """
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
110
        self.source = source
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
111
        self.outf = _get_output_stream(destination)
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
112
        self.git_branch = git_branch
113
        self.checkpoint = checkpoint
114
        self.import_marks_file = import_marks_file
115
        self.export_marks_file = export_marks_file
0.64.173 by Ian Clatworthy
add -r option to fast-export
116
        self.revision = revision
117
        self.excluded_revisions = set()
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
118
        self.plain_format = plain_format
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
119
        self._multi_author_api_available = hasattr(bzrlib.revision.Revision,
120
            'get_apparent_authors')
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
121
        self.properties_to_exclude = ['authors', 'author']
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
122
123
        # Progress reporting stuff
124
        self.verbose = verbose
125
        if verbose:
126
            self.progress_every = 100
127
        else:
128
            self.progress_every = 1000
129
        self._start_time = time.time()
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
130
        self._commit_total = 0
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
131
132
        # Load the marks and initialise things accordingly
0.68.1 by Pieter de Bie
Classify bzr-fast-export
133
        self.revid_to_mark = {}
134
        self.branch_names = {}
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
135
        if self.import_marks_file:
0.79.2 by Ian Clatworthy
extend & use marks_file API
136
            marks_info = marks_file.import_marks(self.import_marks_file)
137
            if marks_info is not None:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
138
                self.revid_to_mark = dict((r, m) for m, r in
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
139
                    marks_info.items())
140
                # These are no longer included in the marks file
141
                #self.branch_names = marks_info[1]
0.64.173 by Ian Clatworthy
add -r option to fast-export
142
 
143
    def interesting_history(self):
144
        if self.revision:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
145
            rev1, rev2 = builtins._get_revision_range(self.revision,
146
                self.branch, "fast-export")
0.64.173 by Ian Clatworthy
add -r option to fast-export
147
            start_rev_id = rev1.rev_id
148
            end_rev_id = rev2.rev_id
149
        else:
150
            start_rev_id = None
151
            end_rev_id = None
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
152
        self.note("Calculating the revisions to include ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
153
        view_revisions = reversed([rev_id for rev_id, _, _, _ in
154
            self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)])
0.64.173 by Ian Clatworthy
add -r option to fast-export
155
        # If a starting point was given, we need to later check that we don't
156
        # start emitting revisions from before that point. Collect the
157
        # revisions to exclude now ...
158
        if start_rev_id is not None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
159
            self.note("Calculating the revisions to exclude ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
160
            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
161
                self.branch.iter_merge_sorted_revisions(start_rev_id)])
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
162
        return list(view_revisions)
0.64.173 by Ian Clatworthy
add -r option to fast-export
163
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
164
    def run(self):
165
        # Open the source
166
        self.branch = bzrlib.branch.Branch.open_containing(self.source)[0]
167
168
        # Export the data
0.68.1 by Pieter de Bie
Classify bzr-fast-export
169
        self.branch.repository.lock_read()
170
        try:
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
171
            interesting = self.interesting_history()
0.102.15 by Ian Clatworthy
add revision count to 'Starting export ...' message
172
            self._commit_total = len(interesting)
173
            self.note("Starting export of %d revisions ..." %
174
                self._commit_total)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
175
            if not self.plain_format:
176
                self.emit_features()
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
177
            for revid in interesting:
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
178
                self.emit_commit(revid, self.git_branch)
179
            if self.branch.supports_tags():
180
                self.emit_tags()
0.68.1 by Pieter de Bie
Classify bzr-fast-export
181
        finally:
182
            self.branch.repository.unlock()
183
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
184
        # Save the marks if requested
0.79.2 by Ian Clatworthy
extend & use marks_file API
185
        self._save_marks()
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
186
        self.dump_stats()
187
188
    def note(self, msg, *args):
189
        """Output a note but timestamp it."""
190
        msg = "%s %s" % (self._time_of_day(), msg)
191
        trace.note(msg, *args)
192
193
    def warning(self, msg, *args):
194
        """Output a warning but timestamp it."""
195
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
196
        trace.warning(msg, *args)
197
198
    def _time_of_day(self):
199
        """Time of day as a string."""
200
        # Note: this is a separate method so tests can patch in a fixed value
201
        return time.strftime("%H:%M:%S")
202
203
    def report_progress(self, commit_count, details=''):
204
        if commit_count and commit_count % self.progress_every == 0:
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
205
            if self._commit_total:
206
                counts = "%d/%d" % (commit_count, self._commit_total)
207
            else:
208
                counts = "%d" % (commit_count,)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
209
            minutes = (time.time() - self._start_time) / 60
210
            rate = commit_count * 1.0 / minutes
211
            if rate > 10:
212
                rate_str = "at %.0f/minute " % rate
213
            else:
214
                rate_str = "at %.1f/minute " % rate
215
            self.note("%s commits exported %s%s" % (counts, rate_str, details))
216
217
    def dump_stats(self):
218
        time_required = progress.str_tdelta(time.time() - self._start_time)
219
        rc = len(self.revid_to_mark)
220
        self.note("Exported %d %s in %s",
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
221
            rc, single_plural(rc, "revision", "revisions"),
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
222
            time_required)
0.79.2 by Ian Clatworthy
extend & use marks_file API
223
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
224
    def print_cmd(self, cmd):
225
        self.outf.write("%r\n" % cmd)
226
0.79.2 by Ian Clatworthy
extend & use marks_file API
227
    def _save_marks(self):
228
        if self.export_marks_file:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
229
            revision_ids = dict((m, r) for r, m in self.revid_to_mark.items())
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
230
            marks_file.export_marks(self.export_marks_file, revision_ids)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
231
 
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
232
    def is_empty_dir(self, tree, path):
233
        path_id = tree.path2id(path)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
234
        if path_id is None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
235
            self.warning("Skipping empty_dir detection - no file_id for %s" %
236
                (path,))
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
237
            return False
238
239
        # Continue if path is not a directory
240
        if tree.kind(path_id) != 'directory':
241
            return False
242
243
        # Use treewalk to find the contents of our directory
244
        contents = list(tree.walkdirs(prefix=path))[0]
245
        if len(contents[1]) == 0:
246
            return True
247
        else:
248
            return False
249
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
250
    def emit_features(self):
0.102.5 by Ian Clatworthy
Define feature names in one place
251
        for feature in sorted(commands.FEATURE_NAMES):
252
            self.print_cmd(commands.FeatureCommand(feature))
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
253
0.68.1 by Pieter de Bie
Classify bzr-fast-export
254
    def emit_commit(self, revid, git_branch):
0.64.173 by Ian Clatworthy
add -r option to fast-export
255
        if revid in self.revid_to_mark or revid in self.excluded_revisions:
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
256
            return
257
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
258
        # Get the Revision object
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
259
        try:
260
            revobj = self.branch.repository.get_revision(revid)
261
        except bazErrors.NoSuchRevision:
262
            # This is a ghost revision. Mark it as not found and next!
263
            self.revid_to_mark[revid] = -1
264
            return
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
265
 
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
266
        # Get the primary parent
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
267
        # TODO: Consider the excluded revisions when deciding the parents.
268
        # Currently, a commit with parents that are excluded ought to be
269
        # triggering the git_branch calculation below (and it is not).
270
        # IGC 20090824
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
271
        ncommits = len(self.revid_to_mark)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
272
        nparents = len(revobj.parent_ids)
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
273
        if nparents == 0:
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
274
            if ncommits:
275
                # This is a parentless commit but it's not the first one
276
                # output. We need to create a new temporary branch for it
277
                # otherwise git-fast-import will assume the previous commit
278
                # was this one's parent
279
                git_branch = self._next_tmp_branch_name()
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
280
            parent = bzrlib.revision.NULL_REVISION
281
        else:
282
            parent = revobj.parent_ids[0]
283
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
284
        # Print the commit
285
        git_ref = 'refs/heads/%s' % (git_branch,)
0.98.1 by Gonéri Le Bouder
add the missing ":" since revid_to_mark are "committish"
286
        mark = ncommits + 1
0.64.221 by Ian Clatworthy
backout git-bzr fix as it was breaking fast-export
287
        self.revid_to_mark[revid] = mark
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
288
        file_cmds = self._get_filecommands(parent, revid)
289
        self.print_cmd(self._get_commit_command(git_ref, mark, revobj,
290
            file_cmds))
291
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
292
        # Report progress and checkpoint if it's time for that
293
        self.report_progress(ncommits)
294
        if (self.checkpoint > 0 and ncommits
295
            and ncommits % self.checkpoint == 0):
296
            self.note("Exported %i commits - adding checkpoint to output"
297
                % ncommits)
298
            self._save_marks()
299
            self.print_cmd(commands.CheckpointCommand())
300
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
301
    def _get_name_email(self, user):
302
        if user.find('<') == -1:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
303
            # If the email isn't inside <>, we need to use it as the name
304
            # in order for things to round-trip correctly.
305
            # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
306
            name = user
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
307
            email = ''
308
        else:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
309
            name, email = parseaddr(user)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
310
        return name.encode("utf-8"), email.encode("utf-8")
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
311
312
    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
313
        # Get the committer and author info
314
        committer = revobj.committer
315
        name, email = self._get_name_email(committer)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
316
        committer_info = (name, email, revobj.timestamp, revobj.timezone)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
317
        if self._multi_author_api_available:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
318
            more_authors = revobj.get_apparent_authors()
319
            author = more_authors.pop(0)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
320
        else:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
321
            more_authors = []
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
322
            author = revobj.get_apparent_author()
0.64.291 by Jelmer Vernooij
In plain mode, don't export multiple authors.
323
        if not self.plain_format and more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
324
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
325
            author_info = (name, email, revobj.timestamp, revobj.timezone)
326
            more_author_info = []
327
            for a in more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
328
                name, email = self._get_name_email(a)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
329
                more_author_info.append(
330
                    (name, email, revobj.timestamp, revobj.timezone))
331
        elif author != committer:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
332
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
333
            author_info = (name, email, revobj.timestamp, revobj.timezone)
334
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
335
        else:
336
            author_info = None
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
337
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
338
339
        # Get the parents in terms of marks
340
        non_ghost_parents = []
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
341
        for p in revobj.parent_ids:
0.64.173 by Ian Clatworthy
add -r option to fast-export
342
            if p in self.excluded_revisions:
343
                continue
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
344
            try:
345
                parent_mark = self.revid_to_mark[p]
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
346
                non_ghost_parents.append(":%s" % parent_mark)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
347
            except KeyError:
348
                # ghost - ignore
349
                continue
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
350
        if non_ghost_parents:
351
            from_ = non_ghost_parents[0]
352
            merges = non_ghost_parents[1:]
353
        else:
354
            from_ = None
355
            merges = None
356
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
357
        # Filter the revision properties. Some metadata (like the
358
        # author information) is already exposed in other ways so
359
        # don't repeat it here.
360
        if self.plain_format:
361
            properties = None
362
        else:
363
            properties = revobj.properties
364
            for prop in self.properties_to_exclude:
365
                try:
366
                    del properties[prop]
367
                except KeyError:
368
                    pass
369
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
370
        # Build and return the result
371
        return commands.CommitCommand(git_ref, mark, author_info,
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
372
            committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds),
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
373
            more_authors=more_author_info, properties=properties)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
374
375
    def _get_revision_trees(self, parent, revision_id):
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
376
        try:
377
            tree_old = self.branch.repository.revision_tree(parent)
378
        except bazErrors.UnexpectedInventoryFormat:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
379
            self.warning("Parent is malformed - diffing against previous parent")
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
380
            # We can't find the old parent. Let's diff against his parent
381
            pp = self.branch.repository.get_revision(parent)
382
            tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
383
        tree_new = None
384
        try:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
385
            tree_new = self.branch.repository.revision_tree(revision_id)
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
386
        except bazErrors.UnexpectedInventoryFormat:
387
            # We can't really do anything anymore
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
388
            self.warning("Revision %s is malformed - skipping" % revision_id)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
389
        return tree_old, tree_new
0.68.1 by Pieter de Bie
Classify bzr-fast-export
390
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
391
    def _get_filecommands(self, parent, revision_id):
392
        """Get the list of FileCommands for the changes between two revisions."""
393
        tree_old, tree_new = self._get_revision_trees(parent, revision_id)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
394
        if not(tree_old and tree_new):
395
            # Something is wrong with this revision - ignore the filecommands
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
396
            return []
0.68.1 by Pieter de Bie
Classify bzr-fast-export
397
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
398
        changes = tree_new.changes_from(tree_old)
399
400
        # Make "modified" have 3-tuples, as added does
401
        my_modified = [ x[0:3] for x in changes.modified ]
402
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
403
        # The potential interaction between renames and deletes is messy.
404
        # Handle it here ...
405
        file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
406
            changes.renamed, changes.removed, revision_id, tree_old)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
407
408
        # Map kind changes to a delete followed by an add
409
        for path, id_, kind1, kind2 in changes.kind_changed:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
410
            path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
411
            # IGC: I don't understand why a delete is needed here.
412
            # In fact, it seems harmful? If you uncomment this line,
413
            # please file a bug explaining why you needed to.
414
            #file_cmds.append(commands.FileDeleteCommand(path))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
415
            my_modified.append((path, id_, kind2))
416
417
        # Record modifications
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
418
        for path, id_, kind in changes.added + my_modified + rd_modifies:
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
419
            if kind == 'file':
420
                text = tree_new.get_file_text(id_)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
421
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
422
                    helpers.kind_to_mode('file', tree_new.is_executable(id_)),
423
                    None, text))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
424
            elif kind == 'symlink':
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
425
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
426
                    helpers.kind_to_mode('symlink', False),
427
                    None, tree_new.get_symlink_target(id_)))
0.102.14 by Ian Clatworthy
export and import empty directories
428
            elif kind == 'directory':
0.105.1 by John Whitley
Don't emit directory info when plain format is specified.
429
                if not self.plain_format:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
430
                    file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
431
                        helpers.kind_to_mode('directory', False),
432
                        None, None))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
433
            else:
0.102.14 by Ian Clatworthy
export and import empty directories
434
                self.warning("cannot export '%s' of kind %s yet - ignoring" %
435
                    (path, kind))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
436
        return file_cmds
437
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
438
    def _process_renames_and_deletes(self, renames, deletes,
439
        revision_id, tree_old):
440
        file_cmds = []
441
        modifies = []
442
        renamed = []
443
444
        # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933.
445
        # In a nutshell, there are several nasty cases:
446
        #
447
        # 1) bzr rm a; bzr mv b a; bzr commit
448
        # 2) bzr mv x/y z; bzr rm x; commmit
449
        #
450
        # The first must come out with the delete first like this:
451
        #
452
        # D a
453
        # R b a
454
        #
455
        # The second case must come out with the rename first like this:
456
        #
457
        # R x/y z
458
        # D x
459
        #
460
        # So outputting all deletes first or all renames first won't work.
461
        # Instead, we need to make multiple passes over the various lists to
462
        # get the ordering right.
463
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
464
        must_be_renamed = {}
465
        old_to_new = {}
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
466
        deleted_paths = set([p for p, _, _ in deletes])
467
        for (oldpath, newpath, id_, kind,
468
                text_modified, meta_modified) in renames:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
469
            emit = kind != 'directory' or not self.plain_format
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
470
            if newpath in deleted_paths:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
471
                if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
472
                    file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
473
                deleted_paths.remove(newpath)
474
            if (self.is_empty_dir(tree_old, oldpath)):
475
                self.note("Skipping empty dir %s in rev %s" % (oldpath,
476
                    revision_id))
477
                continue
478
            #oldpath = self._adjust_path_for_renames(oldpath, renamed,
479
            #    revision_id)
480
            renamed.append([oldpath, newpath])
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
481
            old_to_new[oldpath] = newpath
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
482
            if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
483
                file_cmds.append(
484
                    commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
485
            if text_modified or meta_modified:
486
                modifies.append((newpath, id_, kind))
487
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
488
            # Renaming a directory implies all children must be renamed.
489
            # Note: changes_from() doesn't handle this
0.64.329 by Jelmer Vernooij
Support exporting files that are changed into directories.
490
            if kind == 'directory' and tree_old.kind(id_) == 'directory':
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
491
                for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
492
                    if e.kind == 'directory' and self.plain_format:
493
                        continue
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
494
                    old_child_path = osutils.pathjoin(oldpath, p)
495
                    new_child_path = osutils.pathjoin(newpath, p)
496
                    must_be_renamed[old_child_path] = new_child_path
497
498
        # Add children not already renamed
499
        if must_be_renamed:
500
            renamed_already = set(old_to_new.keys())
501
            still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
502
            for old_child_path in sorted(still_to_be_renamed):
503
                new_child_path = must_be_renamed[old_child_path]
504
                if self.verbose:
505
                    self.note("implicitly renaming %s => %s" % (old_child_path,
506
                        new_child_path))
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
507
                file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"),
508
                    new_child_path.encode("utf-8")))
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
509
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
510
        # Record remaining deletes
511
        for path, id_, kind in deletes:
512
            if path not in deleted_paths:
513
                continue
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
514
            if kind == 'directory' and self.plain_format:
515
                continue
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
516
            #path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
517
            file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
518
        return file_cmds, modifies, renamed
519
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
520
    def _adjust_path_for_renames(self, path, renamed, revision_id):
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
521
        # If a previous rename is found, we should adjust the path
522
        for old, new in renamed:
523
            if path == old:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
524
                self.note("Changing path %s given rename to %s in revision %s"
525
                    % (path, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
526
                path = new
527
            elif path.startswith(old + '/'):
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
528
                self.note(
529
                    "Adjusting path %s given rename of %s to %s in revision %s"
530
                    % (path, old, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
531
                path = path.replace(old + "/", new + "/")
532
        return path
533
0.68.1 by Pieter de Bie
Classify bzr-fast-export
534
    def emit_tags(self):
535
        for tag, revid in self.branch.tags.get_tag_dict().items():
536
            try:
537
                mark = self.revid_to_mark[revid]
538
            except KeyError:
0.79.4 by Ian Clatworthy
use note and warning APIs
539
                self.warning('not creating tag %r pointing to non-existent '
540
                    'revision %s' % (tag, revid))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
541
            else:
0.64.288 by Jelmer Vernooij
Cope with non-ascii characters in tag names.
542
                git_ref = 'refs/tags/%s' % tag.encode("utf-8")
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
543
                if self.plain_format and not check_ref_format(git_ref):
544
                    self.warning('not creating tag %r as its name would not be '
545
                                 'valid in git.', git_ref)
546
                    continue
0.64.133 by Ian Clatworthy
Fix str + int concat in bzr-fast-export (Stéphane Raimbault)
547
                self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
548
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
549
    def _next_tmp_branch_name(self):
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
550
        """Return a unique branch name. The name will start with "tmp"."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
551
        prefix = 'tmp'
0.68.1 by Pieter de Bie
Classify bzr-fast-export
552
        if prefix not in self.branch_names:
553
            self.branch_names[prefix] = 0
554
        else:
555
            self.branch_names[prefix] += 1
556
            prefix = '%s.%d' % (prefix, self.branch_names[prefix])
557
        return prefix