/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.128 by Ian Clatworthy
fix encoding issue in bzr_exporter (Teemu Likonen)
1
# -*- coding: utf-8 -*-
2
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
3
# Copyright (C) 2008 Canonical Ltd
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
#
15
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
16
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
17
#
0.64.333 by Jelmer Vernooij
Inline bzr-fast-export license.
18
# Based on bzr-fast-export
19
# Copyright (c) 2008 Adeodato Simó
20
#
21
# Permission is hereby granted, free of charge, to any person obtaining
22
# a copy of this software and associated documentation files (the
23
# "Software"), to deal in the Software without restriction, including
24
# without limitation the rights to use, copy, modify, merge, publish,
25
# distribute, sublicense, and/or sell copies of the Software, and to
26
# permit persons to whom the Software is furnished to do so, subject to
27
# the following conditions:
28
#
29
# The above copyright notice and this permission notice shall be included
30
# in all copies or substantial portions of the Software.
31
#
32
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
35
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
36
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
37
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
38
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0.79.10 by Ian Clatworthy
documentation clean-ups
39
#
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
40
# vim: fileencoding=utf-8
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
41
42
"""Core engine for the fast-export command."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
43
0.79.7 by Ian Clatworthy
trivial bzr_exporter clean-ups
44
# TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
45
# is not updated (because the parent of commit is already merged, so we don't
46
# set new_git_branch to the previously used name)
47
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
48
from email.Utils import parseaddr
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
49
import sys, time, re
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
50
51
import bzrlib.branch
52
import bzrlib.revision
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
53
from bzrlib import (
54
    builtins,
55
    errors as bazErrors,
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
56
    osutils,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
57
    progress,
58
    trace,
59
    )
0.79.4 by Ian Clatworthy
use note and warning APIs
60
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
61
from bzrlib.plugins.fastimport import (
62
    helpers,
63
    marks_file,
64
    )
0.79.2 by Ian Clatworthy
extend & use marks_file API
65
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
66
from fastimport import commands
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
67
from fastimport.helpers import (
68
    binary_stream,
69
    single_plural,
70
    )
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
71
72
73
def _get_output_stream(destination):
74
    if destination is None or destination == '-':
75
        return binary_stream(sys.stdout)
76
    elif destination.endswith('gz'):
77
        import gzip
78
        return gzip.open(destination, 'wb')
79
    else:
80
        return open(destination, 'wb')
81
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
82
# from dulwich.repo:
83
def check_ref_format(refname):
84
    """Check if a refname is correctly formatted.
85
86
    Implements all the same rules as git-check-ref-format[1].
87
88
    [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
89
90
    :param refname: The refname to check
91
    :return: True if refname is valid, False otherwise
92
    """
93
    # These could be combined into one big expression, but are listed separately
94
    # to parallel [1].
95
    if '/.' in refname or refname.startswith('.'):
96
        return False
97
    if '/' not in refname:
98
        return False
99
    if '..' in refname:
100
        return False
101
    for c in refname:
102
        if ord(c) < 040 or c in '\177 ~^:?*[':
103
            return False
104
    if refname[-1] in '/.':
105
        return False
106
    if refname.endswith('.lock'):
107
        return False
108
    if '@{' in refname:
109
        return False
110
    if '\\' in refname:
111
        return False
112
    return True
113
0.133.3 by Oleksandr Usov
Implement comments from patch review:
114
115
def sanitize_ref_name_for_git(refname):
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
116
    """Rewrite refname so that it will be accepted by git-fast-import.
117
    For the detailed rules see check_ref_format.
118
119
    By rewriting the refname we are breaking uniqueness guarantees provided by bzr
120
    so we have to manually
121
    verify that resulting ref names are unique.
122
123
    :param refname: refname to rewrite
124
    :return: new refname
125
    """
0.133.3 by Oleksandr Usov
Implement comments from patch review:
126
    new_refname = re.sub(
0.133.1 by Oleksandr Usov
Add function to rewrite refnames & tests for it
127
        # '/.' in refname or startswith '.'
128
        r"/\.|^\."
129
        # '..' in refname
130
        r"|\.\."
131
        # ord(c) < 040
132
        r"|[" + "".join([chr(x) for x in range(040)]) + r"]"
133
        # c in '\177 ~^:?*['
134
        r"|[\177 ~^:?*[]"
135
        # last char in "/."
136
        r"|[/.]$"
137
        # endswith '.lock'
138
        r"|.lock$"
139
        # "@{" in refname
140
        r"|@{"
141
        # "\\" in refname
142
        r"|\\",
143
        "_", refname)
0.133.3 by Oleksandr Usov
Implement comments from patch review:
144
    return new_refname
0.64.173 by Ian Clatworthy
add -r option to fast-export
145
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
146
class BzrFastExporter(object):
0.68.1 by Pieter de Bie
Classify bzr-fast-export
147
0.64.222 by Ian Clatworthy
Support an explicit output destination for bzr fast-export
148
    def __init__(self, source, destination, git_branch=None, checkpoint=-1,
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
149
        import_marks_file=None, export_marks_file=None, revision=None,
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
150
        verbose=False, plain_format=False, rewrite_tags=False):
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
151
        """Export branch data in fast import format.
152
153
        :param plain_format: if True, 'classic' fast-import format is
0.64.337 by Jelmer Vernooij
Merge support for --rewrite-tag-names.
154
            used without any extended features; if False, the generated
155
            data is richer and includes information like multiple
156
            authors, revision properties, etc.
157
        :param rewrite_tags: if True and if plain_format is set, tag names
158
            will be rewritten to be git-compatible.
159
            Otherwise tags which aren't valid for git will be skipped if
160
            plain_format is set.
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
161
        """
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
162
        self.source = source
0.64.282 by Jelmer Vernooij
Fix output stream to stdout for bzr fast-export.
163
        self.outf = _get_output_stream(destination)
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
164
        self.git_branch = git_branch
165
        self.checkpoint = checkpoint
166
        self.import_marks_file = import_marks_file
167
        self.export_marks_file = export_marks_file
0.64.173 by Ian Clatworthy
add -r option to fast-export
168
        self.revision = revision
169
        self.excluded_revisions = set()
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
170
        self.plain_format = plain_format
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
171
        self.rewrite_tags = rewrite_tags
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
172
        self._multi_author_api_available = hasattr(bzrlib.revision.Revision,
173
            'get_apparent_authors')
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
174
        self.properties_to_exclude = ['authors', 'author']
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
175
176
        # Progress reporting stuff
177
        self.verbose = verbose
178
        if verbose:
179
            self.progress_every = 100
180
        else:
181
            self.progress_every = 1000
182
        self._start_time = time.time()
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
183
        self._commit_total = 0
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
184
185
        # Load the marks and initialise things accordingly
0.68.1 by Pieter de Bie
Classify bzr-fast-export
186
        self.revid_to_mark = {}
187
        self.branch_names = {}
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
188
        if self.import_marks_file:
0.79.2 by Ian Clatworthy
extend & use marks_file API
189
            marks_info = marks_file.import_marks(self.import_marks_file)
190
            if marks_info is not None:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
191
                self.revid_to_mark = dict((r, m) for m, r in
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
192
                    marks_info.items())
193
                # These are no longer included in the marks file
194
                #self.branch_names = marks_info[1]
0.64.173 by Ian Clatworthy
add -r option to fast-export
195
 
196
    def interesting_history(self):
197
        if self.revision:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
198
            rev1, rev2 = builtins._get_revision_range(self.revision,
199
                self.branch, "fast-export")
0.64.173 by Ian Clatworthy
add -r option to fast-export
200
            start_rev_id = rev1.rev_id
201
            end_rev_id = rev2.rev_id
202
        else:
203
            start_rev_id = None
204
            end_rev_id = None
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
205
        self.note("Calculating the revisions to include ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
206
        view_revisions = reversed([rev_id for rev_id, _, _, _ in
207
            self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)])
0.64.173 by Ian Clatworthy
add -r option to fast-export
208
        # If a starting point was given, we need to later check that we don't
209
        # start emitting revisions from before that point. Collect the
210
        # revisions to exclude now ...
211
        if start_rev_id is not None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
212
            self.note("Calculating the revisions to exclude ...")
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
213
            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
214
                self.branch.iter_merge_sorted_revisions(start_rev_id)])
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
215
        return list(view_revisions)
0.64.173 by Ian Clatworthy
add -r option to fast-export
216
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
217
    def run(self):
218
        # Open the source
219
        self.branch = bzrlib.branch.Branch.open_containing(self.source)[0]
220
221
        # Export the data
0.68.1 by Pieter de Bie
Classify bzr-fast-export
222
        self.branch.repository.lock_read()
223
        try:
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
224
            interesting = self.interesting_history()
0.102.15 by Ian Clatworthy
add revision count to 'Starting export ...' message
225
            self._commit_total = len(interesting)
226
            self.note("Starting export of %d revisions ..." %
227
                self._commit_total)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
228
            if not self.plain_format:
229
                self.emit_features()
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
230
            for revid in interesting:
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
231
                self.emit_commit(revid, self.git_branch)
232
            if self.branch.supports_tags():
233
                self.emit_tags()
0.68.1 by Pieter de Bie
Classify bzr-fast-export
234
        finally:
235
            self.branch.repository.unlock()
236
0.79.1 by Ian Clatworthy
turn bzr-fast-export into a fast-export command
237
        # Save the marks if requested
0.79.2 by Ian Clatworthy
extend & use marks_file API
238
        self._save_marks()
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
239
        self.dump_stats()
240
241
    def note(self, msg, *args):
242
        """Output a note but timestamp it."""
243
        msg = "%s %s" % (self._time_of_day(), msg)
244
        trace.note(msg, *args)
245
246
    def warning(self, msg, *args):
247
        """Output a warning but timestamp it."""
248
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
249
        trace.warning(msg, *args)
250
251
    def _time_of_day(self):
252
        """Time of day as a string."""
253
        # Note: this is a separate method so tests can patch in a fixed value
254
        return time.strftime("%H:%M:%S")
255
256
    def report_progress(self, commit_count, details=''):
257
        if commit_count and commit_count % self.progress_every == 0:
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
258
            if self._commit_total:
259
                counts = "%d/%d" % (commit_count, self._commit_total)
260
            else:
261
                counts = "%d" % (commit_count,)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
262
            minutes = (time.time() - self._start_time) / 60
263
            rate = commit_count * 1.0 / minutes
264
            if rate > 10:
265
                rate_str = "at %.0f/minute " % rate
266
            else:
267
                rate_str = "at %.1f/minute " % rate
268
            self.note("%s commits exported %s%s" % (counts, rate_str, details))
269
270
    def dump_stats(self):
271
        time_required = progress.str_tdelta(time.time() - self._start_time)
272
        rc = len(self.revid_to_mark)
273
        self.note("Exported %d %s in %s",
0.64.284 by Jelmer Vernooij
Fix import of single_plural.
274
            rc, single_plural(rc, "revision", "revisions"),
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
275
            time_required)
0.79.2 by Ian Clatworthy
extend & use marks_file API
276
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
277
    def print_cmd(self, cmd):
278
        self.outf.write("%r\n" % cmd)
279
0.79.2 by Ian Clatworthy
extend & use marks_file API
280
    def _save_marks(self):
281
        if self.export_marks_file:
0.64.134 by Ian Clatworthy
fix marks importing in fast-export
282
            revision_ids = dict((m, r) for r, m in self.revid_to_mark.items())
0.125.1 by Ian Clatworthy
Use the new marks file format (introduced in git 1.6 apparently)
283
            marks_file.export_marks(self.export_marks_file, revision_ids)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
284
 
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
285
    def is_empty_dir(self, tree, path):
286
        path_id = tree.path2id(path)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
287
        if path_id is None:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
288
            self.warning("Skipping empty_dir detection - no file_id for %s" %
289
                (path,))
0.68.10 by Pieter de Bie
bzr-fast-export: Don't rename empty directories
290
            return False
291
292
        # Continue if path is not a directory
293
        if tree.kind(path_id) != 'directory':
294
            return False
295
296
        # Use treewalk to find the contents of our directory
297
        contents = list(tree.walkdirs(prefix=path))[0]
298
        if len(contents[1]) == 0:
299
            return True
300
        else:
301
            return False
302
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
303
    def emit_features(self):
0.102.5 by Ian Clatworthy
Define feature names in one place
304
        for feature in sorted(commands.FEATURE_NAMES):
305
            self.print_cmd(commands.FeatureCommand(feature))
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
306
0.68.1 by Pieter de Bie
Classify bzr-fast-export
307
    def emit_commit(self, revid, git_branch):
0.64.173 by Ian Clatworthy
add -r option to fast-export
308
        if revid in self.revid_to_mark or revid in self.excluded_revisions:
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
309
            return
310
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
311
        # Get the Revision object
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
312
        try:
313
            revobj = self.branch.repository.get_revision(revid)
314
        except bazErrors.NoSuchRevision:
315
            # This is a ghost revision. Mark it as not found and next!
316
            self.revid_to_mark[revid] = -1
317
            return
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
318
 
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
319
        # Get the primary parent
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
320
        # TODO: Consider the excluded revisions when deciding the parents.
321
        # Currently, a commit with parents that are excluded ought to be
322
        # triggering the git_branch calculation below (and it is not).
323
        # IGC 20090824
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
324
        ncommits = len(self.revid_to_mark)
0.100.1 by Ian Clatworthy
Stop fast-export from exceeding the maximum recursion depth
325
        nparents = len(revobj.parent_ids)
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
326
        if nparents == 0:
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
327
            if ncommits:
328
                # This is a parentless commit but it's not the first one
329
                # output. We need to create a new temporary branch for it
330
                # otherwise git-fast-import will assume the previous commit
331
                # was this one's parent
332
                git_branch = self._next_tmp_branch_name()
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
333
            parent = bzrlib.revision.NULL_REVISION
334
        else:
335
            parent = revobj.parent_ids[0]
336
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
337
        # Print the commit
338
        git_ref = 'refs/heads/%s' % (git_branch,)
0.98.1 by Gonéri Le Bouder
add the missing ":" since revid_to_mark are "committish"
339
        mark = ncommits + 1
0.64.221 by Ian Clatworthy
backout git-bzr fix as it was breaking fast-export
340
        self.revid_to_mark[revid] = mark
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
341
        file_cmds = self._get_filecommands(parent, revid)
342
        self.print_cmd(self._get_commit_command(git_ref, mark, revobj,
343
            file_cmds))
344
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
345
        # Report progress and checkpoint if it's time for that
346
        self.report_progress(ncommits)
347
        if (self.checkpoint > 0 and ncommits
348
            and ncommits % self.checkpoint == 0):
349
            self.note("Exported %i commits - adding checkpoint to output"
350
                % ncommits)
351
            self._save_marks()
352
            self.print_cmd(commands.CheckpointCommand())
353
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
354
    def _get_name_email(self, user):
355
        if user.find('<') == -1:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
356
            # If the email isn't inside <>, we need to use it as the name
357
            # in order for things to round-trip correctly.
358
            # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com')
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
359
            name = user
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
360
            email = ''
361
        else:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
362
            name, email = parseaddr(user)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
363
        return name.encode("utf-8"), email.encode("utf-8")
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
364
365
    def _get_commit_command(self, git_ref, mark, revobj, file_cmds):
366
        # Get the committer and author info
367
        committer = revobj.committer
368
        name, email = self._get_name_email(committer)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
369
        committer_info = (name, email, revobj.timestamp, revobj.timezone)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
370
        if self._multi_author_api_available:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
371
            more_authors = revobj.get_apparent_authors()
372
            author = more_authors.pop(0)
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
373
        else:
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
374
            more_authors = []
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
375
            author = revobj.get_apparent_author()
0.64.291 by Jelmer Vernooij
In plain mode, don't export multiple authors.
376
        if not self.plain_format and more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
377
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
378
            author_info = (name, email, revobj.timestamp, revobj.timezone)
379
            more_author_info = []
380
            for a in more_authors:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
381
                name, email = self._get_name_email(a)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
382
                more_author_info.append(
383
                    (name, email, revobj.timestamp, revobj.timezone))
384
        elif author != committer:
0.102.16 by Ian Clatworthy
tweak author formatting to use same smart rule as used for committer
385
            name, email = self._get_name_email(author)
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
386
            author_info = (name, email, revobj.timestamp, revobj.timezone)
387
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
388
        else:
389
            author_info = None
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
390
            more_author_info = None
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
391
392
        # Get the parents in terms of marks
393
        non_ghost_parents = []
0.68.4 by Pieter de Bie
bzr-fast-export.py: Add support for ghost commits
394
        for p in revobj.parent_ids:
0.64.173 by Ian Clatworthy
add -r option to fast-export
395
            if p in self.excluded_revisions:
396
                continue
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
397
            try:
398
                parent_mark = self.revid_to_mark[p]
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
399
                non_ghost_parents.append(":%s" % parent_mark)
0.64.230 by Ian Clatworthy
Fix ghost handling and improve progress tracking in fast-export
400
            except KeyError:
401
                # ghost - ignore
402
                continue
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
403
        if non_ghost_parents:
404
            from_ = non_ghost_parents[0]
405
            merges = non_ghost_parents[1:]
406
        else:
407
            from_ = None
408
            merges = None
409
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
410
        # Filter the revision properties. Some metadata (like the
411
        # author information) is already exposed in other ways so
412
        # don't repeat it here.
413
        if self.plain_format:
414
            properties = None
415
        else:
416
            properties = revobj.properties
417
            for prop in self.properties_to_exclude:
418
                try:
419
                    del properties[prop]
420
                except KeyError:
421
                    pass
422
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
423
        # Build and return the result
424
        return commands.CommitCommand(git_ref, mark, author_info,
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
425
            committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds),
0.102.3 by Ian Clatworthy
First cut at exporting additional metadata via 'features'
426
            more_authors=more_author_info, properties=properties)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
427
428
    def _get_revision_trees(self, parent, revision_id):
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
429
        try:
430
            tree_old = self.branch.repository.revision_tree(parent)
431
        except bazErrors.UnexpectedInventoryFormat:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
432
            self.warning("Parent is malformed - diffing against previous parent")
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
433
            # We can't find the old parent. Let's diff against his parent
434
            pp = self.branch.repository.get_revision(parent)
435
            tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
436
        tree_new = None
437
        try:
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
438
            tree_new = self.branch.repository.revision_tree(revision_id)
0.68.6 by Pieter de Bie
bzr-fast-export.py: Skip over broken commits.
439
        except bazErrors.UnexpectedInventoryFormat:
440
            # We can't really do anything anymore
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
441
            self.warning("Revision %s is malformed - skipping" % revision_id)
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
442
        return tree_old, tree_new
0.68.1 by Pieter de Bie
Classify bzr-fast-export
443
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
444
    def _get_filecommands(self, parent, revision_id):
445
        """Get the list of FileCommands for the changes between two revisions."""
446
        tree_old, tree_new = self._get_revision_trees(parent, revision_id)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
447
        if not(tree_old and tree_new):
448
            # Something is wrong with this revision - ignore the filecommands
0.87.1 by David Reitter
fix bug #348038 (call to warning() with two arguments) and handle malformed revisions gracefully by not generating any output
449
            return []
0.68.1 by Pieter de Bie
Classify bzr-fast-export
450
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
451
        changes = tree_new.changes_from(tree_old)
452
453
        # Make "modified" have 3-tuples, as added does
454
        my_modified = [ x[0:3] for x in changes.modified ]
455
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
456
        # The potential interaction between renames and deletes is messy.
457
        # Handle it here ...
458
        file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
459
            changes.renamed, changes.removed, revision_id, tree_old)
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
460
461
        # Map kind changes to a delete followed by an add
462
        for path, id_, kind1, kind2 in changes.kind_changed:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
463
            path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
464
            # IGC: I don't understand why a delete is needed here.
465
            # In fact, it seems harmful? If you uncomment this line,
466
            # please file a bug explaining why you needed to.
467
            #file_cmds.append(commands.FileDeleteCommand(path))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
468
            my_modified.append((path, id_, kind2))
469
470
        # Record modifications
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
471
        for path, id_, kind in changes.added + my_modified + rd_modifies:
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
472
            if kind == 'file':
473
                text = tree_new.get_file_text(id_)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
474
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
475
                    helpers.kind_to_mode('file', tree_new.is_executable(id_)),
476
                    None, text))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
477
            elif kind == 'symlink':
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
478
                file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
479
                    helpers.kind_to_mode('symlink', False),
480
                    None, tree_new.get_symlink_target(id_)))
0.102.14 by Ian Clatworthy
export and import empty directories
481
            elif kind == 'directory':
0.105.1 by John Whitley
Don't emit directory info when plain format is specified.
482
                if not self.plain_format:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
483
                    file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"),
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
484
                        helpers.kind_to_mode('directory', False),
485
                        None, None))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
486
            else:
0.102.14 by Ian Clatworthy
export and import empty directories
487
                self.warning("cannot export '%s' of kind %s yet - ignoring" %
488
                    (path, kind))
0.64.166 by Ian Clatworthy
graceful handling of faulty revisions (David Reitter)
489
        return file_cmds
490
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
491
    def _process_renames_and_deletes(self, renames, deletes,
492
        revision_id, tree_old):
493
        file_cmds = []
494
        modifies = []
495
        renamed = []
496
497
        # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933.
498
        # In a nutshell, there are several nasty cases:
499
        #
500
        # 1) bzr rm a; bzr mv b a; bzr commit
501
        # 2) bzr mv x/y z; bzr rm x; commmit
502
        #
503
        # The first must come out with the delete first like this:
504
        #
505
        # D a
506
        # R b a
507
        #
508
        # The second case must come out with the rename first like this:
509
        #
510
        # R x/y z
511
        # D x
512
        #
513
        # So outputting all deletes first or all renames first won't work.
514
        # Instead, we need to make multiple passes over the various lists to
515
        # get the ordering right.
516
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
517
        must_be_renamed = {}
518
        old_to_new = {}
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
519
        deleted_paths = set([p for p, _, _ in deletes])
520
        for (oldpath, newpath, id_, kind,
521
                text_modified, meta_modified) in renames:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
522
            emit = kind != 'directory' or not self.plain_format
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
523
            if newpath in deleted_paths:
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
524
                if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
525
                    file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
526
                deleted_paths.remove(newpath)
527
            if (self.is_empty_dir(tree_old, oldpath)):
528
                self.note("Skipping empty dir %s in rev %s" % (oldpath,
529
                    revision_id))
530
                continue
531
            #oldpath = self._adjust_path_for_renames(oldpath, renamed,
532
            #    revision_id)
533
            renamed.append([oldpath, newpath])
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
534
            old_to_new[oldpath] = newpath
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
535
            if emit:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
536
                file_cmds.append(
537
                    commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
538
            if text_modified or meta_modified:
539
                modifies.append((newpath, id_, kind))
540
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
541
            # Renaming a directory implies all children must be renamed.
542
            # Note: changes_from() doesn't handle this
0.64.329 by Jelmer Vernooij
Support exporting files that are changed into directories.
543
            if kind == 'directory' and tree_old.kind(id_) == 'directory':
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
544
                for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_):
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
545
                    if e.kind == 'directory' and self.plain_format:
546
                        continue
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
547
                    old_child_path = osutils.pathjoin(oldpath, p)
548
                    new_child_path = osutils.pathjoin(newpath, p)
549
                    must_be_renamed[old_child_path] = new_child_path
550
551
        # Add children not already renamed
552
        if must_be_renamed:
553
            renamed_already = set(old_to_new.keys())
554
            still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already
555
            for old_child_path in sorted(still_to_be_renamed):
556
                new_child_path = must_be_renamed[old_child_path]
557
                if self.verbose:
558
                    self.note("implicitly renaming %s => %s" % (old_child_path,
559
                        new_child_path))
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
560
                file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"),
561
                    new_child_path.encode("utf-8")))
0.64.237 by Ian Clatworthy
implicitly rename children on export when directory renamed
562
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
563
        # Record remaining deletes
564
        for path, id_, kind in deletes:
565
            if path not in deleted_paths:
566
                continue
0.106.2 by Harry Hirsch
Don't emit directory info for renames operations when using plain format
567
            if kind == 'directory' and self.plain_format:
568
                continue
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
569
            #path = self._adjust_path_for_renames(path, renamed, revision_id)
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
570
            file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8")))
0.64.178 by Ian Clatworthy
improve fast-export's handling of rename+delete combinations
571
        return file_cmds, modifies, renamed
572
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
573
    def _adjust_path_for_renames(self, path, renamed, revision_id):
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
574
        # If a previous rename is found, we should adjust the path
575
        for old, new in renamed:
576
            if path == old:
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
577
                self.note("Changing path %s given rename to %s in revision %s"
578
                    % (path, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
579
                path = new
580
            elif path.startswith(old + '/'):
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
581
                self.note(
582
                    "Adjusting path %s given rename of %s to %s in revision %s"
583
                    % (path, old, new, revision_id))
0.64.174 by Ian Clatworthy
fix rename adjustment & kind change logic in fast-export
584
                path = path.replace(old + "/", new + "/")
585
        return path
586
0.68.1 by Pieter de Bie
Classify bzr-fast-export
587
    def emit_tags(self):
588
        for tag, revid in self.branch.tags.get_tag_dict().items():
589
            try:
590
                mark = self.revid_to_mark[revid]
591
            except KeyError:
0.79.4 by Ian Clatworthy
use note and warning APIs
592
                self.warning('not creating tag %r pointing to non-existent '
593
                    'revision %s' % (tag, revid))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
594
            else:
0.64.288 by Jelmer Vernooij
Cope with non-ascii characters in tag names.
595
                git_ref = 'refs/tags/%s' % tag.encode("utf-8")
0.64.328 by Jelmer Vernooij
In "plain" mode, skip tags that contain characters not valid in Git.
596
                if self.plain_format and not check_ref_format(git_ref):
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
597
                    if self.rewrite_tags:
0.133.3 by Oleksandr Usov
Implement comments from patch review:
598
                        new_ref = sanitize_ref_name_for_git(git_ref)
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
599
                        self.warning('tag %r is exported as %r to be valid in git.',
600
                                     git_ref, new_ref)
601
                        git_ref = new_ref
0.133.3 by Oleksandr Usov
Implement comments from patch review:
602
                    else:
0.133.2 by Oleksandr Usov
Rewrite tag names when exporting plain stream
603
                        self.warning('not creating tag %r as its name would not be '
604
                                     'valid in git.', git_ref)
605
                        continue
0.64.133 by Ian Clatworthy
Fix str + int concat in bzr-fast-export (Stéphane Raimbault)
606
                self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark)))
0.68.1 by Pieter de Bie
Classify bzr-fast-export
607
0.79.9 by Ian Clatworthy
fix branch of first commit to not be refs/heads/tmp
608
    def _next_tmp_branch_name(self):
0.79.6 by Ian Clatworthy
refactor bzr_exporter to use Command objects
609
        """Return a unique branch name. The name will start with "tmp"."""
0.64.57 by Ian Clatworthy
integrate dato's bzr-fast-export
610
        prefix = 'tmp'
0.68.1 by Pieter de Bie
Classify bzr-fast-export
611
        if prefix not in self.branch_names:
612
            self.branch_names[prefix] = 0
613
        else:
614
            self.branch_names[prefix] += 1
615
            prefix = '%s.%d' % (prefix, self.branch_names[prefix])
616
        return prefix