1
# Copyright (C) 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
"""Fastimport/fastexport commands."""
18
from __future__ import absolute_import
20
from ... import controldir
21
from ...commands import Command
22
from ...option import Option, ListOption, RegistryOption
23
from ...sixish import (
34
def _run(source, processor_factory, verbose=False, user_map=None, **kwargs):
35
"""Create and run a processor.
37
:param source: a filename or '-' for standard input. If the
38
filename ends in .gz, it will be opened as a gzip file and
39
the stream will be implicitly uncompressed
40
:param processor_factory: a callable for creating a processor
41
:param user_map: if not None, the file containing the user map.
43
from fastimport.errors import ParsingError
44
from ...errors import BzrCommandError
45
from fastimport import parser
46
stream = _get_source_stream(source)
47
user_mapper = _get_user_mapper(user_map)
48
proc = processor_factory(verbose=verbose, **kwargs)
49
p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
51
return proc.process(p.iter_commands)
52
except ParsingError as e:
53
raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
56
def _get_source_stream(source):
57
if source == '-' or source is None:
59
stream = helpers.binary_stream(sys.stdin)
60
elif source.endswith('.gz'):
62
stream = gzip.open(source, "rb")
64
stream = open(source, "rb")
68
def _get_user_mapper(filename):
69
from . import user_mapper
75
return user_mapper.UserMapper(lines)
78
class cmd_fast_import(Command):
79
"""Backend for fast Bazaar data importers.
81
This command reads a mixed command/data stream and creates
82
branches in a Bazaar repository accordingly. The preferred
85
bzr fast-import project.fi project.bzr
87
Numerous commands are provided for generating a fast-import file
89
To specify standard input as the input stream, use a
90
source name of '-' (instead of project.fi). If the source name
91
ends in '.gz', it is assumed to be compressed in gzip format.
93
project.bzr will be created if it doesn't exist. If it exists
94
already, it should be empty or be an existing Bazaar repository
95
or branch. If not specified, the current directory is assumed.
97
fast-import will intelligently select the format to use when
98
creating a repository or branch. If you are running Bazaar 1.17
99
up to Bazaar 2.0, the default format for Bazaar 2.x ("2a") is used.
100
Otherwise, the current default format ("pack-0.92" for Bazaar 1.x)
101
is used. If you wish to specify a custom format, use the `--format`
106
To maintain backwards compatibility, fast-import lets you
107
create the target repository or standalone branch yourself.
108
It is recommended though that you let fast-import create
109
these for you instead.
111
:Branch mapping rules:
113
Git reference names are mapped to Bazaar branch names as follows:
115
* refs/heads/foo is mapped to foo
116
* refs/remotes/origin/foo is mapped to foo.remote
117
* refs/tags/foo is mapped to foo.tag
118
* */master is mapped to trunk, trunk.remote, etc.
119
* */trunk is mapped to git-trunk, git-trunk.remote, etc.
121
:Branch creation rules:
123
When a shared repository is created or found at the destination,
124
branches are created inside it. In the simple case of a single
125
branch (refs/heads/master) inside the input file, the branch is
128
When a standalone branch is found at the destination, the trunk
129
is imported there and warnings are output about any other branches
130
found in the input file.
132
When a branch in a shared repository is found at the destination,
133
that branch is made the trunk and other branches, if any, are
134
created in sister directories.
136
:Working tree updates:
138
The working tree is generated for the trunk branch. If multiple
139
branches are created, a message is output on completion explaining
140
how to create the working trees for other branches.
144
The fast-export-from-xxx commands typically call more advanced
145
xxx-fast-export scripts. You are welcome to use the advanced
146
scripts if you prefer.
148
If you wish to write a custom exporter for your project, see
149
http://bazaar-vcs.org/BzrFastImport for the detailed protocol
150
specification. In many cases, exporters can be written quite
151
quickly using whatever scripting/programming language you like.
155
Some source repositories store just the user name while Bazaar
156
prefers a full email address. You can adjust user-ids while
157
importing by using the --user-map option. The argument is a
158
text file with lines in the format::
162
Blank lines and lines beginning with # are ignored.
163
If old-id has the special value '@', then users without an
164
email address will get one created by using the matching new-id
165
as the domain, unless a more explicit address is given for them.
166
For example, given the user-map of::
169
bill = William Jones <bill@example.com>
171
then user-ids are mapped as follows::
173
maria => maria <maria@example.com>
174
bill => William Jones <bill@example.com>
178
User mapping is supported by both the fast-import and
179
fast-import-filter commands.
183
As some exporters (like git-fast-export) reuse blob data across
184
commits, fast-import makes two passes over the input file by
185
default. In the first pass, it collects data about what blobs are
186
used when, along with some other statistics (e.g. total number of
187
commits). In the second pass, it generates the repository and
192
The initial pass isn't done if the --info option is used
193
to explicitly pass in information about the input stream.
194
It also isn't done if the source is standard input. In the
195
latter case, memory consumption may be higher than otherwise
196
because some blobs may be kept in memory longer than necessary.
198
:Restarting an import:
200
At checkpoints and on completion, the commit-id -> revision-id
201
map is saved to a file called 'fastimport-id-map' in the control
202
directory for the repository (e.g. .bzr/repository). If the import
203
is interrupted or unexpectedly crashes, it can be started again
204
and this file will be used to skip over already loaded revisions.
205
As long as subsequent exports from the original source begin
206
with exactly the same revisions, you can use this feature to
207
maintain a mirror of a repository managed by a foreign tool.
208
If and when Bazaar is used to manage the repository, this file
209
can be safely deleted.
213
Import a Subversion repository into Bazaar::
215
svn-fast-export /svn/repo/path > project.fi
216
bzr fast-import project.fi project.bzr
218
Import a CVS repository into Bazaar::
220
cvs2git /cvs/repo/path > project.fi
221
bzr fast-import project.fi project.bzr
223
Import a Git repository into Bazaar::
226
git fast-export --all > project.fi
227
bzr fast-import project.fi project.bzr
229
Import a Mercurial repository into Bazaar::
232
hg fast-export > project.fi
233
bzr fast-import project.fi project.bzr
235
Import a Darcs repository into Bazaar::
238
darcs-fast-export > project.fi
239
bzr fast-import project.fi project.bzr
242
_see_also = ['fast-export', 'fast-import-filter', 'fast-import-info']
243
takes_args = ['source', 'destination?']
244
takes_options = ['verbose',
245
Option('user-map', type=text_type,
246
help="Path to file containing a map of user-ids.",
248
Option('info', type=text_type,
249
help="Path to file containing caching hints.",
252
help="Update all working trees, not just trunk's.",
254
Option('count', type=int,
255
help="Import this many revisions then exit.",
257
Option('checkpoint', type=int,
258
help="Checkpoint automatically every N revisions."
259
" The default is 10000.",
261
Option('autopack', type=int,
262
help="Pack every N checkpoints. The default is 4.",
264
Option('inv-cache', type=int,
265
help="Number of inventories to cache.",
267
RegistryOption.from_kwargs('mode',
268
'The import algorithm to use.',
269
title='Import Algorithm',
270
default='Use the preferred algorithm (inventory deltas).',
271
classic="Use the original algorithm (mutable inventories).",
272
experimental="Enable experimental features.",
273
value_switches=True, enum_switch=False,
275
Option('import-marks', type=text_type,
276
help="Import marks from file."
278
Option('export-marks', type=text_type,
279
help="Export marks to file."
281
RegistryOption('format',
282
help='Specify a format for the created repository. See'
283
' "bzr help formats" for details.',
284
lazy_registry=('breezy.controldir', 'format_registry'),
285
converter=lambda name: controldir.format_registry.make_controldir(name),
286
value_switches=False, title='Repository format'),
288
def run(self, source, destination='.', verbose=False, info=None,
289
trees=False, count=-1, checkpoint=10000, autopack=4, inv_cache=-1,
290
mode=None, import_marks=None, export_marks=None, format=None,
293
from .processors import generic_processor
294
from .helpers import (
295
open_destination_directory,
297
control = open_destination_directory(destination, format=format)
299
# If an information file was given and the source isn't stdin,
300
# generate the information by reading the source file as a first pass
301
if info is None and source != '-':
302
info = self._generate_info(source)
311
'checkpoint': checkpoint,
312
'autopack': autopack,
313
'inv-cache': inv_cache,
315
'import-marks': import_marks,
316
'export-marks': export_marks,
318
return _run(source, generic_processor.GenericProcessor,
319
bzrdir=control, params=params, verbose=verbose,
322
def _generate_info(self, source):
323
from ...sixish import StringIO
324
from fastimport import parser
325
from fastimport.errors import ParsingError
326
from ...errors import BzrCommandError
327
from .processors import info_processor
328
stream = _get_source_stream(source)
331
proc = info_processor.InfoProcessor(verbose=True, outf=output)
332
p = parser.ImportParser(stream)
334
return_code = proc.process(p.iter_commands)
335
except ParsingError as e:
336
raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
337
lines = output.getvalue().splitlines()
344
class cmd_fast_import_filter(Command):
345
"""Filter a fast-import stream to include/exclude files & directories.
347
This command is useful for splitting a subdirectory or bunch of
348
files out from a project to create a new project complete with history
349
for just those files. It can also be used to create a new project
350
repository that removes all references to files that should not have
351
been committed, e.g. security-related information (like passwords),
352
commercially sensitive material, files with an incompatible license or
353
large binary files like CD images.
355
To specify standard input as the input stream, use a source name
356
of '-'. If the source name ends in '.gz', it is assumed to be
357
compressed in gzip format.
359
:File/directory filtering:
361
This is supported by the -i and -x options. Excludes take precedence
364
When filtering out a subdirectory (or file), the new stream uses the
365
subdirectory (or subdirectory containing the file) as the root. As
366
fast-import doesn't know in advance whether a path is a file or
367
directory in the stream, you need to specify a trailing '/' on
368
directories passed to the `--includes option`. If multiple files or
369
directories are given, the new root is the deepest common directory.
371
Note: If a path has been renamed, take care to specify the *original*
372
path name, not the final name that it ends up with.
376
Some source repositories store just the user name while Bazaar
377
prefers a full email address. You can adjust user-ids
378
by using the --user-map option. The argument is a
379
text file with lines in the format::
383
Blank lines and lines beginning with # are ignored.
384
If old-id has the special value '@', then users without an
385
email address will get one created by using the matching new-id
386
as the domain, unless a more explicit address is given for them.
387
For example, given the user-map of::
390
bill = William Jones <bill@example.com>
392
then user-ids are mapped as follows::
394
maria => maria <maria@example.com>
395
bill => William Jones <bill@example.com>
399
User mapping is supported by both the fast-import and
400
fast-import-filter commands.
404
By default fast-import-filter does quite aggressive history rewriting.
405
Empty commits (or commits which had all their content filtered out) will
406
be removed, and so are the references to commits not included in the stream.
408
Flag --dont-squash-empty-commits reverses this behavior and makes it possible to
409
use fast-import-filter on incremental streams.
413
Create a new project from a library (note the trailing / on the
414
directory name of the library)::
416
front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi
417
bzr fast-import xxx.fi mylibrary.bzr
418
(lib/xxx/foo is now foo)
420
Create a new repository without a sensitive file::
422
front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi
423
bzr fast-import clean.fi clean.bzr
426
_see_also = ['fast-import']
427
takes_args = ['source?']
428
takes_options = ['verbose',
429
ListOption('include_paths', short_name='i', type=text_type,
430
help="Only include commits affecting these paths."
431
" Directories should have a trailing /."
433
ListOption('exclude_paths', short_name='x', type=text_type,
434
help="Exclude these paths from commits."
436
Option('user-map', type=text_type,
437
help="Path to file containing a map of user-ids.",
439
Option('dont-squash-empty-commits',
440
help="Preserve all commits and links between them"
443
encoding_type = 'exact'
444
def run(self, source=None, verbose=False, include_paths=None,
445
exclude_paths=None, user_map=None, dont_squash_empty_commits=False):
446
from ...errors import BzrCommandError
448
from fastimport.processors import filter_processor
450
'include_paths': include_paths,
451
'exclude_paths': exclude_paths,
453
if ('squash_empty_commits' in
454
filter_processor.FilterProcessor.known_params):
455
params['squash_empty_commits'] = (not dont_squash_empty_commits)
457
if dont_squash_empty_commits:
458
raise BzrCommandError("installed python-fastimport does not "
459
"support not squashing empty commits. Please install "
460
" a newer python-fastimport to use "
461
"--dont-squash-empty-commits")
463
from fastimport.errors import ParsingError
464
from fastimport import parser
465
stream = _get_source_stream(source)
466
user_mapper = _get_user_mapper(user_map)
467
proc = filter_processor.FilterProcessor(params=params, verbose=verbose)
468
p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
470
return proc.process(p.iter_commands)
471
except ParsingError as e:
472
raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
475
class cmd_fast_import_info(Command):
476
"""Output information about a fast-import stream.
478
This command reads a fast-import stream and outputs
479
statistics and interesting properties about what it finds.
480
When run in verbose mode, the information is output as a
481
configuration file that can be passed to fast-import to
482
assist it in intelligently caching objects.
484
To specify standard input as the input stream, use a source name
485
of '-'. If the source name ends in '.gz', it is assumed to be
486
compressed in gzip format.
490
Display statistics about the import stream produced by front-end::
492
front-end | bzr fast-import-info -
494
Create a hints file for running fast-import on a large repository::
496
front-end | bzr fast-import-info -v - > front-end.cfg
499
_see_also = ['fast-import']
500
takes_args = ['source']
501
takes_options = ['verbose']
502
def run(self, source, verbose=False):
504
from .processors import info_processor
505
return _run(source, info_processor.InfoProcessor, verbose=verbose)
508
class cmd_fast_import_query(Command):
509
"""Query a fast-import stream displaying selected commands.
511
To specify standard input as the input stream, use a source name
512
of '-'. If the source name ends in '.gz', it is assumed to be
513
compressed in gzip format.
515
To specify a commit to display, give its mark using the
516
--commit-mark option. The commit will be displayed with
517
file-commands included but with inline blobs hidden.
519
To specify the commands to display, use the -C option one or
520
more times. To specify just some fields for a command, use the
525
By default, the nominated fields for the nominated commands
526
are displayed tab separated. To see the information in
527
a name:value format, use verbose mode.
529
Note: Binary fields (e.g. data for blobs) are masked out
530
so it is generally safe to view the output in a terminal.
534
Show the commit with mark 429::
536
bzr fast-import-query xxx.fi -m429
538
Show all the fields of the reset and tag commands::
540
bzr fast-import-query xxx.fi -Creset -Ctag
542
Show the mark and merge fields of the commit commands::
544
bzr fast-import-query xxx.fi -Ccommit=mark,merge
547
_see_also = ['fast-import', 'fast-import-filter']
548
takes_args = ['source']
549
takes_options = ['verbose',
550
Option('commit-mark', short_name='m', type=text_type,
551
help="Mark of the commit to display."
553
ListOption('commands', short_name='C', type=text_type,
554
help="Display fields for these commands."
557
def run(self, source, verbose=False, commands=None, commit_mark=None):
559
from fastimport.processors import query_processor
560
from . import helpers
561
params = helpers.defines_to_dict(commands) or {}
563
params['commit-mark'] = commit_mark
564
return _run(source, query_processor.QueryProcessor, params=params,
568
class cmd_fast_export(Command):
569
"""Generate a fast-import stream from a Bazaar branch.
571
This program generates a stream from a Bazaar branch in fast-import
572
format used by tools such as bzr fast-import, git-fast-import and
575
It takes two optional arguments: the source bzr branch to export and
576
the destination to write the file to write the fastimport stream to.
578
If no source is specified, it will search for a branch in the
581
If no destination is given or the destination is '-', standard output
582
is used. Otherwise, the destination is the name of a file. If the
583
destination ends in '.gz', the output will be compressed into gzip
588
Recent versions of the fast-import specification support features
589
that allow effective round-tripping most of the metadata in Bazaar
590
branches. As such, fast-exporting a branch and fast-importing the data
591
produced will create a new repository with roughly equivalent history, i.e.
592
"bzr log -v -p --include-merges --forward" on the old branch and
593
new branch should produce similar, if not identical, results.
597
Be aware that the new repository may appear to have similar history
598
but internally it is quite different with new revision-ids and
599
file-ids assigned. As a consequence, the ability to easily merge
600
with branches based on the old repository is lost. Depending on your
601
reasons for producing a new repository, this may or may not be an
606
fast-export can use the following "extended features" to
607
produce a richer data stream:
609
* *multiple-authors* - if a commit has multiple authors (as commonly
610
occurs in pair-programming), all authors will be included in the
611
output, not just the first author
613
* *commit-properties* - custom metadata per commit that Bazaar stores
614
in revision properties (e.g. branch-nick and bugs fixed by this
615
change) will be included in the output.
617
* *empty-directories* - directories, even the empty ones, will be
618
included in the output.
620
To disable these features and produce output acceptable to git 1.6,
621
use the --plain option. To enable these features, use --no-plain.
622
Currently, --plain is the default but that will change in the near
623
future once the feature names and definitions are formally agreed
624
to by the broader fast-import developer community.
626
Git has stricter naming rules for tags and fast-export --plain
627
will skip tags which can't be imported into git. To replace characters
628
unsupported in git with an underscore instead, specify
633
It is sometimes convenient to simply truncate the revision history at a
634
certain point. The --baseline option, to be used in conjunction with -r,
635
emits a baseline commit containing the state of the entire source tree at
636
the first requested revision. This allows a user to produce a tree
637
identical to the original without munging multiple exports.
641
To produce data destined for import into Bazaar::
643
bzr fast-export --no-plain my-bzr-branch my.fi.gz
645
To produce data destined for Git 1.6::
647
bzr fast-export --plain my-bzr-branch my.fi
649
To import several unmerged but related branches into the same repository,
650
use the --{export,import}-marks options, and specify a name for the git
653
bzr fast-export --export-marks=marks.bzr project.dev |
654
GIT_DIR=project/.git git-fast-import --export-marks=marks.git
656
bzr fast-export --import-marks=marks.bzr -b other project.other |
657
GIT_DIR=project/.git git-fast-import --import-marks=marks.git
659
If you get a "Missing space after source" error from git-fast-import,
660
see the top of the commands.py module for a work-around.
662
Since bzr uses per-branch tags and git/hg use per-repo tags, the
663
way bzr fast-export presently emits tags (unconditional reset &
664
new ref) may result in clashes when several different branches
665
are imported into single git/hg repo. If this occurs, use the
666
bzr fast-export option --no-tags during the export of one or more
667
branches to avoid the issue.
670
_see_also = ['fast-import', 'fast-import-filter']
671
takes_args = ['source?', 'destination?']
672
takes_options = ['verbose', 'revision',
673
Option('git-branch', short_name='b', type=text_type,
675
help='Name of the git branch to create (default=master).'
677
Option('checkpoint', type=int, argname='N',
678
help="Checkpoint every N revisions (default=10000)."
680
Option('marks', type=text_type, argname='FILE',
681
help="Import marks from and export marks to file."
683
Option('import-marks', type=text_type, argname='FILE',
684
help="Import marks from file."
686
Option('export-marks', type=text_type, argname='FILE',
687
help="Export marks to file."
690
help="Exclude metadata to maximise interoperability."
692
Option('rewrite-tag-names',
693
help="Replace characters invalid in git with '_'"
694
" (plain mode only).",
697
help="Export an 'absolute' baseline commit prior to"
698
"the first relative commit",
701
help="Don't export tags"
704
encoding_type = 'exact'
705
def run(self, source=None, destination=None, verbose=False,
706
git_branch="master", checkpoint=10000, marks=None,
707
import_marks=None, export_marks=None, revision=None,
708
plain=True, rewrite_tag_names=False, no_tags=False, baseline=False):
710
from ...branch import Branch
711
from . import exporter
714
import_marks = export_marks = marks
719
branch = Branch.open_containing(source)[0]
720
outf = exporter._get_output_stream(destination)
721
exporter = exporter.BzrFastExporter(branch,
722
outf=outf, ref="refs/heads/%s" % git_branch, checkpoint=checkpoint,
723
import_marks_file=import_marks, export_marks_file=export_marks,
724
revision=revision, verbose=verbose, plain_format=plain,
725
rewrite_tags=rewrite_tag_names, no_tags=no_tags, baseline=baseline)
726
return exporter.run()