1
# Copyright (C) 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
"""Fastimport/fastexport commands."""
18
from __future__ import absolute_import
20
from ... import controldir
21
from ...commands import Command
22
from ...option import Option, ListOption, RegistryOption
30
def _run(source, processor_factory, verbose=False, user_map=None, **kwargs):
31
"""Create and run a processor.
33
:param source: a filename or '-' for standard input. If the
34
filename ends in .gz, it will be opened as a gzip file and
35
the stream will be implicitly uncompressed
36
:param processor_factory: a callable for creating a processor
37
:param user_map: if not None, the file containing the user map.
39
from fastimport.errors import ParsingError
40
from ...errors import BzrCommandError
41
from fastimport import parser
42
stream = _get_source_stream(source)
43
user_mapper = _get_user_mapper(user_map)
44
proc = processor_factory(verbose=verbose, **kwargs)
45
p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
47
return proc.process(p.iter_commands)
48
except ParsingError, e:
49
raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
52
def _get_source_stream(source):
53
if source == '-' or source is None:
55
stream = helpers.binary_stream(sys.stdin)
56
elif source.endswith('.gz'):
58
stream = gzip.open(source, "rb")
60
stream = open(source, "rb")
64
def _get_user_mapper(filename):
65
from . import user_mapper
71
return user_mapper.UserMapper(lines)
74
class cmd_fast_import(Command):
75
"""Backend for fast Bazaar data importers.
77
This command reads a mixed command/data stream and creates
78
branches in a Bazaar repository accordingly. The preferred
81
bzr fast-import project.fi project.bzr
83
Numerous commands are provided for generating a fast-import file
85
To specify standard input as the input stream, use a
86
source name of '-' (instead of project.fi). If the source name
87
ends in '.gz', it is assumed to be compressed in gzip format.
89
project.bzr will be created if it doesn't exist. If it exists
90
already, it should be empty or be an existing Bazaar repository
91
or branch. If not specified, the current directory is assumed.
93
fast-import will intelligently select the format to use when
94
creating a repository or branch. If you are running Bazaar 1.17
95
up to Bazaar 2.0, the default format for Bazaar 2.x ("2a") is used.
96
Otherwise, the current default format ("pack-0.92" for Bazaar 1.x)
97
is used. If you wish to specify a custom format, use the `--format`
102
To maintain backwards compatibility, fast-import lets you
103
create the target repository or standalone branch yourself.
104
It is recommended though that you let fast-import create
105
these for you instead.
107
:Branch mapping rules:
109
Git reference names are mapped to Bazaar branch names as follows:
111
* refs/heads/foo is mapped to foo
112
* refs/remotes/origin/foo is mapped to foo.remote
113
* refs/tags/foo is mapped to foo.tag
114
* */master is mapped to trunk, trunk.remote, etc.
115
* */trunk is mapped to git-trunk, git-trunk.remote, etc.
117
:Branch creation rules:
119
When a shared repository is created or found at the destination,
120
branches are created inside it. In the simple case of a single
121
branch (refs/heads/master) inside the input file, the branch is
124
When a standalone branch is found at the destination, the trunk
125
is imported there and warnings are output about any other branches
126
found in the input file.
128
When a branch in a shared repository is found at the destination,
129
that branch is made the trunk and other branches, if any, are
130
created in sister directories.
132
:Working tree updates:
134
The working tree is generated for the trunk branch. If multiple
135
branches are created, a message is output on completion explaining
136
how to create the working trees for other branches.
140
The fast-export-from-xxx commands typically call more advanced
141
xxx-fast-export scripts. You are welcome to use the advanced
142
scripts if you prefer.
144
If you wish to write a custom exporter for your project, see
145
http://bazaar-vcs.org/BzrFastImport for the detailed protocol
146
specification. In many cases, exporters can be written quite
147
quickly using whatever scripting/programming language you like.
151
Some source repositories store just the user name while Bazaar
152
prefers a full email address. You can adjust user-ids while
153
importing by using the --user-map option. The argument is a
154
text file with lines in the format::
158
Blank lines and lines beginning with # are ignored.
159
If old-id has the special value '@', then users without an
160
email address will get one created by using the matching new-id
161
as the domain, unless a more explicit address is given for them.
162
For example, given the user-map of::
165
bill = William Jones <bill@example.com>
167
then user-ids are mapped as follows::
169
maria => maria <maria@example.com>
170
bill => William Jones <bill@example.com>
174
User mapping is supported by both the fast-import and
175
fast-import-filter commands.
179
As some exporters (like git-fast-export) reuse blob data across
180
commits, fast-import makes two passes over the input file by
181
default. In the first pass, it collects data about what blobs are
182
used when, along with some other statistics (e.g. total number of
183
commits). In the second pass, it generates the repository and
188
The initial pass isn't done if the --info option is used
189
to explicitly pass in information about the input stream.
190
It also isn't done if the source is standard input. In the
191
latter case, memory consumption may be higher than otherwise
192
because some blobs may be kept in memory longer than necessary.
194
:Restarting an import:
196
At checkpoints and on completion, the commit-id -> revision-id
197
map is saved to a file called 'fastimport-id-map' in the control
198
directory for the repository (e.g. .bzr/repository). If the import
199
is interrupted or unexpectedly crashes, it can be started again
200
and this file will be used to skip over already loaded revisions.
201
As long as subsequent exports from the original source begin
202
with exactly the same revisions, you can use this feature to
203
maintain a mirror of a repository managed by a foreign tool.
204
If and when Bazaar is used to manage the repository, this file
205
can be safely deleted.
209
Import a Subversion repository into Bazaar::
211
svn-fast-export /svn/repo/path > project.fi
212
bzr fast-import project.fi project.bzr
214
Import a CVS repository into Bazaar::
216
cvs2git /cvs/repo/path > project.fi
217
bzr fast-import project.fi project.bzr
219
Import a Git repository into Bazaar::
222
git fast-export --all > project.fi
223
bzr fast-import project.fi project.bzr
225
Import a Mercurial repository into Bazaar::
228
hg fast-export > project.fi
229
bzr fast-import project.fi project.bzr
231
Import a Darcs repository into Bazaar::
234
darcs-fast-export > project.fi
235
bzr fast-import project.fi project.bzr
238
_see_also = ['fast-export', 'fast-import-filter', 'fast-import-info']
239
takes_args = ['source', 'destination?']
240
takes_options = ['verbose',
241
Option('user-map', type=str,
242
help="Path to file containing a map of user-ids.",
244
Option('info', type=str,
245
help="Path to file containing caching hints.",
248
help="Update all working trees, not just trunk's.",
250
Option('count', type=int,
251
help="Import this many revisions then exit.",
253
Option('checkpoint', type=int,
254
help="Checkpoint automatically every N revisions."
255
" The default is 10000.",
257
Option('autopack', type=int,
258
help="Pack every N checkpoints. The default is 4.",
260
Option('inv-cache', type=int,
261
help="Number of inventories to cache.",
263
RegistryOption.from_kwargs('mode',
264
'The import algorithm to use.',
265
title='Import Algorithm',
266
default='Use the preferred algorithm (inventory deltas).',
267
classic="Use the original algorithm (mutable inventories).",
268
experimental="Enable experimental features.",
269
value_switches=True, enum_switch=False,
271
Option('import-marks', type=str,
272
help="Import marks from file."
274
Option('export-marks', type=str,
275
help="Export marks to file."
277
RegistryOption('format',
278
help='Specify a format for the created repository. See'
279
' "bzr help formats" for details.',
280
lazy_registry=('breezy.controldir', 'format_registry'),
281
converter=lambda name: controldir.format_registry.make_controldir(name),
282
value_switches=False, title='Repository format'),
284
def run(self, source, destination='.', verbose=False, info=None,
285
trees=False, count=-1, checkpoint=10000, autopack=4, inv_cache=-1,
286
mode=None, import_marks=None, export_marks=None, format=None,
289
from .processors import generic_processor
290
from .helpers import (
291
open_destination_directory,
293
control = open_destination_directory(destination, format=format)
295
# If an information file was given and the source isn't stdin,
296
# generate the information by reading the source file as a first pass
297
if info is None and source != '-':
298
info = self._generate_info(source)
307
'checkpoint': checkpoint,
308
'autopack': autopack,
309
'inv-cache': inv_cache,
311
'import-marks': import_marks,
312
'export-marks': export_marks,
314
return _run(source, generic_processor.GenericProcessor,
315
bzrdir=control, params=params, verbose=verbose,
318
def _generate_info(self, source):
319
from cStringIO import StringIO
320
from fastimport import parser
321
from fastimport.errors import ParsingError
322
from ...errors import BzrCommandError
323
from .processors import info_processor
324
stream = _get_source_stream(source)
327
proc = info_processor.InfoProcessor(verbose=True, outf=output)
328
p = parser.ImportParser(stream)
330
return_code = proc.process(p.iter_commands)
331
except ParsingError, e:
332
raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
333
lines = output.getvalue().splitlines()
340
class cmd_fast_import_filter(Command):
341
"""Filter a fast-import stream to include/exclude files & directories.
343
This command is useful for splitting a subdirectory or bunch of
344
files out from a project to create a new project complete with history
345
for just those files. It can also be used to create a new project
346
repository that removes all references to files that should not have
347
been committed, e.g. security-related information (like passwords),
348
commercially sensitive material, files with an incompatible license or
349
large binary files like CD images.
351
To specify standard input as the input stream, use a source name
352
of '-'. If the source name ends in '.gz', it is assumed to be
353
compressed in gzip format.
355
:File/directory filtering:
357
This is supported by the -i and -x options. Excludes take precedence
360
When filtering out a subdirectory (or file), the new stream uses the
361
subdirectory (or subdirectory containing the file) as the root. As
362
fast-import doesn't know in advance whether a path is a file or
363
directory in the stream, you need to specify a trailing '/' on
364
directories passed to the `--includes option`. If multiple files or
365
directories are given, the new root is the deepest common directory.
367
Note: If a path has been renamed, take care to specify the *original*
368
path name, not the final name that it ends up with.
372
Some source repositories store just the user name while Bazaar
373
prefers a full email address. You can adjust user-ids
374
by using the --user-map option. The argument is a
375
text file with lines in the format::
379
Blank lines and lines beginning with # are ignored.
380
If old-id has the special value '@', then users without an
381
email address will get one created by using the matching new-id
382
as the domain, unless a more explicit address is given for them.
383
For example, given the user-map of::
386
bill = William Jones <bill@example.com>
388
then user-ids are mapped as follows::
390
maria => maria <maria@example.com>
391
bill => William Jones <bill@example.com>
395
User mapping is supported by both the fast-import and
396
fast-import-filter commands.
400
By default fast-import-filter does quite aggressive history rewriting.
401
Empty commits (or commits which had all their content filtered out) will
402
be removed, and so are the references to commits not included in the stream.
404
Flag --dont-squash-empty-commits reverses this behavior and makes it possible to
405
use fast-import-filter on incremental streams.
409
Create a new project from a library (note the trailing / on the
410
directory name of the library)::
412
front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi
413
bzr fast-import xxx.fi mylibrary.bzr
414
(lib/xxx/foo is now foo)
416
Create a new repository without a sensitive file::
418
front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi
419
bzr fast-import clean.fi clean.bzr
422
_see_also = ['fast-import']
423
takes_args = ['source?']
424
takes_options = ['verbose',
425
ListOption('include_paths', short_name='i', type=str,
426
help="Only include commits affecting these paths."
427
" Directories should have a trailing /."
429
ListOption('exclude_paths', short_name='x', type=str,
430
help="Exclude these paths from commits."
432
Option('user-map', type=str,
433
help="Path to file containing a map of user-ids.",
435
Option('dont-squash-empty-commits',
436
help="Preserve all commits and links between them"
439
encoding_type = 'exact'
440
def run(self, source=None, verbose=False, include_paths=None,
441
exclude_paths=None, user_map=None, dont_squash_empty_commits=False):
442
from ...errors import BzrCommandError
444
from fastimport.processors import filter_processor
446
'include_paths': include_paths,
447
'exclude_paths': exclude_paths,
449
if ('squash_empty_commits' in
450
filter_processor.FilterProcessor.known_params):
451
params['squash_empty_commits'] = (not dont_squash_empty_commits)
453
if dont_squash_empty_commits:
454
raise BzrCommandError("installed python-fastimport does not "
455
"support not squashing empty commits. Please install "
456
" a newer python-fastimport to use "
457
"--dont-squash-empty-commits")
459
from fastimport.errors import ParsingError
460
from fastimport import parser
461
stream = _get_source_stream(source)
462
user_mapper = _get_user_mapper(user_map)
463
proc = filter_processor.FilterProcessor(params=params, verbose=verbose)
464
p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
466
return proc.process(p.iter_commands)
467
except ParsingError, e:
468
raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
471
class cmd_fast_import_info(Command):
472
"""Output information about a fast-import stream.
474
This command reads a fast-import stream and outputs
475
statistics and interesting properties about what it finds.
476
When run in verbose mode, the information is output as a
477
configuration file that can be passed to fast-import to
478
assist it in intelligently caching objects.
480
To specify standard input as the input stream, use a source name
481
of '-'. If the source name ends in '.gz', it is assumed to be
482
compressed in gzip format.
486
Display statistics about the import stream produced by front-end::
488
front-end | bzr fast-import-info -
490
Create a hints file for running fast-import on a large repository::
492
front-end | bzr fast-import-info -v - > front-end.cfg
495
_see_also = ['fast-import']
496
takes_args = ['source']
497
takes_options = ['verbose']
498
def run(self, source, verbose=False):
500
from .processors import info_processor
501
return _run(source, info_processor.InfoProcessor, verbose=verbose)
504
class cmd_fast_import_query(Command):
505
"""Query a fast-import stream displaying selected commands.
507
To specify standard input as the input stream, use a source name
508
of '-'. If the source name ends in '.gz', it is assumed to be
509
compressed in gzip format.
511
To specify a commit to display, give its mark using the
512
--commit-mark option. The commit will be displayed with
513
file-commands included but with inline blobs hidden.
515
To specify the commands to display, use the -C option one or
516
more times. To specify just some fields for a command, use the
521
By default, the nominated fields for the nominated commands
522
are displayed tab separated. To see the information in
523
a name:value format, use verbose mode.
525
Note: Binary fields (e.g. data for blobs) are masked out
526
so it is generally safe to view the output in a terminal.
530
Show the commit with mark 429::
532
bzr fast-import-query xxx.fi -m429
534
Show all the fields of the reset and tag commands::
536
bzr fast-import-query xxx.fi -Creset -Ctag
538
Show the mark and merge fields of the commit commands::
540
bzr fast-import-query xxx.fi -Ccommit=mark,merge
543
_see_also = ['fast-import', 'fast-import-filter']
544
takes_args = ['source']
545
takes_options = ['verbose',
546
Option('commit-mark', short_name='m', type=str,
547
help="Mark of the commit to display."
549
ListOption('commands', short_name='C', type=str,
550
help="Display fields for these commands."
553
def run(self, source, verbose=False, commands=None, commit_mark=None):
555
from fastimport.processors import query_processor
556
from . import helpers
557
params = helpers.defines_to_dict(commands) or {}
559
params['commit-mark'] = commit_mark
560
return _run(source, query_processor.QueryProcessor, params=params,
564
class cmd_fast_export(Command):
565
"""Generate a fast-import stream from a Bazaar branch.
567
This program generates a stream from a Bazaar branch in fast-import
568
format used by tools such as bzr fast-import, git-fast-import and
571
It takes two optional arguments: the source bzr branch to export and
572
the destination to write the file to write the fastimport stream to.
574
If no source is specified, it will search for a branch in the
577
If no destination is given or the destination is '-', standard output
578
is used. Otherwise, the destination is the name of a file. If the
579
destination ends in '.gz', the output will be compressed into gzip
584
Recent versions of the fast-import specification support features
585
that allow effective round-tripping most of the metadata in Bazaar
586
branches. As such, fast-exporting a branch and fast-importing the data
587
produced will create a new repository with roughly equivalent history, i.e.
588
"bzr log -v -p --include-merges --forward" on the old branch and
589
new branch should produce similar, if not identical, results.
593
Be aware that the new repository may appear to have similar history
594
but internally it is quite different with new revision-ids and
595
file-ids assigned. As a consequence, the ability to easily merge
596
with branches based on the old repository is lost. Depending on your
597
reasons for producing a new repository, this may or may not be an
602
fast-export can use the following "extended features" to
603
produce a richer data stream:
605
* *multiple-authors* - if a commit has multiple authors (as commonly
606
occurs in pair-programming), all authors will be included in the
607
output, not just the first author
609
* *commit-properties* - custom metadata per commit that Bazaar stores
610
in revision properties (e.g. branch-nick and bugs fixed by this
611
change) will be included in the output.
613
* *empty-directories* - directories, even the empty ones, will be
614
included in the output.
616
To disable these features and produce output acceptable to git 1.6,
617
use the --plain option. To enable these features, use --no-plain.
618
Currently, --plain is the default but that will change in the near
619
future once the feature names and definitions are formally agreed
620
to by the broader fast-import developer community.
622
Git has stricter naming rules for tags and fast-export --plain
623
will skip tags which can't be imported into git. To replace characters
624
unsupported in git with an underscore instead, specify
629
It is sometimes convenient to simply truncate the revision history at a
630
certain point. The --baseline option, to be used in conjunction with -r,
631
emits a baseline commit containing the state of the entire source tree at
632
the first requested revision. This allows a user to produce a tree
633
identical to the original without munging multiple exports.
637
To produce data destined for import into Bazaar::
639
bzr fast-export --no-plain my-bzr-branch my.fi.gz
641
To produce data destined for Git 1.6::
643
bzr fast-export --plain my-bzr-branch my.fi
645
To import several unmerged but related branches into the same repository,
646
use the --{export,import}-marks options, and specify a name for the git
649
bzr fast-export --export-marks=marks.bzr project.dev |
650
GIT_DIR=project/.git git-fast-import --export-marks=marks.git
652
bzr fast-export --import-marks=marks.bzr -b other project.other |
653
GIT_DIR=project/.git git-fast-import --import-marks=marks.git
655
If you get a "Missing space after source" error from git-fast-import,
656
see the top of the commands.py module for a work-around.
658
Since bzr uses per-branch tags and git/hg use per-repo tags, the
659
way bzr fast-export presently emits tags (unconditional reset &
660
new ref) may result in clashes when several different branches
661
are imported into single git/hg repo. If this occurs, use the
662
bzr fast-export option --no-tags during the export of one or more
663
branches to avoid the issue.
666
_see_also = ['fast-import', 'fast-import-filter']
667
takes_args = ['source?', 'destination?']
668
takes_options = ['verbose', 'revision',
669
Option('git-branch', short_name='b', type=str,
671
help='Name of the git branch to create (default=master).'
673
Option('checkpoint', type=int, argname='N',
674
help="Checkpoint every N revisions (default=10000)."
676
Option('marks', type=str, argname='FILE',
677
help="Import marks from and export marks to file."
679
Option('import-marks', type=str, argname='FILE',
680
help="Import marks from file."
682
Option('export-marks', type=str, argname='FILE',
683
help="Export marks to file."
686
help="Exclude metadata to maximise interoperability."
688
Option('rewrite-tag-names',
689
help="Replace characters invalid in git with '_'"
690
" (plain mode only).",
693
help="Export an 'absolute' baseline commit prior to"
694
"the first relative commit",
697
help="Don't export tags"
700
encoding_type = 'exact'
701
def run(self, source=None, destination=None, verbose=False,
702
git_branch="master", checkpoint=10000, marks=None,
703
import_marks=None, export_marks=None, revision=None,
704
plain=True, rewrite_tag_names=False, no_tags=False, baseline=False):
706
from ...branch import Branch
707
from . import exporter
710
import_marks = export_marks = marks
715
branch = Branch.open_containing(source)[0]
716
outf = exporter._get_output_stream(destination)
717
exporter = exporter.BzrFastExporter(branch,
718
outf=outf, ref="refs/heads/%s" % git_branch, checkpoint=checkpoint,
719
import_marks_file=import_marks, export_marks_file=export_marks,
720
revision=revision, verbose=verbose, plain_format=plain,
721
rewrite_tags=rewrite_tag_names, no_tags=no_tags, baseline=baseline)
722
return exporter.run()