/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
15
16
"""Import processor that supports all Bazaar repository formats."""
17
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
18
from __future__ import absolute_import
19
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
20
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
22
from .... import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
23
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
24
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
25
    errors,
26
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
27
    progress,
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
28
    revision as _mod_revision,
0.64.5 by Ian Clatworthy
first cut at generic processing method
29
    )
6670.4.5 by Jelmer Vernooij
Move breezy.repofmt contents to breezy.bzr.
30
from ....bzr.knitpack_repo import KnitPackRepository
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
31
from ....trace import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
32
    mutter,
33
    note,
34
    warning,
35
    )
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
36
import configobj
37
from .. import (
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
38
    branch_updater,
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
39
    cache_manager,
0.139.1 by Jelmer Vernooij
Import helper functions that have been removed from python-fastimport.
40
    helpers,
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
41
    idmapfile,
0.123.1 by Jelmer Vernooij
Move pure-fastimport code into its own directory, in preparation of splitting it into a separate package.
42
    marks_file,
43
    revision_store,
44
    )
0.123.2 by Jelmer Vernooij
Split out fastimport, import it from the system.
45
from fastimport import (
0.102.13 by Ian Clatworthy
Fix feature checking
46
    commands,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
47
    errors as plugin_errors,
0.64.5 by Ian Clatworthy
first cut at generic processing method
48
    processor,
49
    )
6929.13.2 by Jelmer Vernooij
Remove functionality moved to fastimport.
50
from fastimport.helpers import (
51
    invert_dictset,
52
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
53
54
0.64.41 by Ian Clatworthy
update multiple working trees if requested
55
# How many commits before automatically reporting progress
56
_DEFAULT_AUTO_PROGRESS = 1000
57
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
58
# How many commits before automatically checkpointing
59
_DEFAULT_AUTO_CHECKPOINT = 10000
60
0.64.170 by Ian Clatworthy
add autopack option to fast-import
61
# How many checkpoints before automatically packing
62
_DEFAULT_AUTO_PACK = 4
63
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
64
# How many inventories to cache
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
65
_DEFAULT_INV_CACHE_SIZE = 1
66
_DEFAULT_CHK_INV_CACHE_SIZE = 1
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
67
0.64.41 by Ian Clatworthy
update multiple working trees if requested
68
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
69
class GenericProcessor(processor.ImportProcessor):
70
    """An import processor that handles basic imports.
71
72
    Current features supported:
73
0.64.16 by Ian Clatworthy
safe processing tweaks
74
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
75
    * files and symlinks commits are supported
76
    * checkpoints automatically happen at a configurable frequency
77
      over and above the stream requested checkpoints
78
    * timestamped progress reporting, both automatic and stream requested
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
79
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
80
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
81
    At checkpoints and on completion, the commit-id -> revision-id map is
82
    saved to a file called 'fastimport-id-map'. If the import crashes
83
    or is interrupted, it can be started again and this file will be
84
    used to skip over already loaded revisions. The format of each line
85
    is "commit-id revision-id" so commit-ids cannot include spaces.
86
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
87
    Here are the supported parameters:
88
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
89
    * info - name of a hints file holding the analysis generated
90
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
91
      importing large repositories, this parameter is needed so
92
      that the importer knows what blobs to intelligently cache.
93
0.64.41 by Ian Clatworthy
update multiple working trees if requested
94
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
95
      By default, the importer updates the repository
96
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
97
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
98
0.64.170 by Ian Clatworthy
add autopack option to fast-import
99
    * count - only import this many commits then exit. If not set
100
      or negative, all commits are imported.
101
    
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
102
    * checkpoint - automatically checkpoint every n commits over and
103
      above any checkpoints contained in the import stream.
104
      The default is 10000.
105
0.64.170 by Ian Clatworthy
add autopack option to fast-import
106
    * autopack - pack every n checkpoints. The default is 4.
107
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
108
    * inv-cache - number of inventories to cache.
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
109
      If not set, the default is 1.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
110
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
111
    * mode - import algorithm to use: default, experimental or classic.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
112
113
    * import-marks - name of file to read to load mark information from
114
115
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
116
    """
117
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
118
    known_params = [
119
        'info',
120
        'trees',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
121
        'count',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
122
        'checkpoint',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
123
        'autopack',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
124
        'inv-cache',
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
125
        'mode',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
126
        'import-marks',
127
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
128
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
129
0.64.215 by Ian Clatworthy
tweak GenericProcessor __init__ method
130
    def __init__(self, bzrdir, params=None, verbose=False, outf=None,
0.64.196 by Ian Clatworthy
get tests passing again
131
            prune_empty_dirs=True):
0.123.4 by Jelmer Vernooij
Only require passing BzrDir to bzr-specific processors.
132
        processor.ImportProcessor.__init__(self, params, verbose)
0.64.196 by Ian Clatworthy
get tests passing again
133
        self.prune_empty_dirs = prune_empty_dirs
6653.6.1 by Jelmer Vernooij
Rename a number of attributes from bzrdir to controldir.
134
        self.controldir = bzrdir
0.123.4 by Jelmer Vernooij
Only require passing BzrDir to bzr-specific processors.
135
        try:
136
            # Might be inside a branch
137
            (self.working_tree, self.branch) = bzrdir._get_tree_branch()
138
            self.repo = self.branch.repository
139
        except errors.NotBranchError:
140
            # Must be inside a repository
141
            self.working_tree = None
142
            self.branch = None
143
            self.repo = bzrdir.open_repository()
0.64.196 by Ian Clatworthy
get tests passing again
144
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
145
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
146
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
147
        self._load_info_and_params()
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
148
        if self.total_commits:
149
            self.note("Starting import of %d commits ..." %
150
                (self.total_commits,))
151
        else:
152
            self.note("Starting import ...")
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
153
        self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
154
            self.inventory_cache_size)
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
155
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
156
        if self.params.get("import-marks") is not None:
0.79.2 by Ian Clatworthy
extend & use marks_file API
157
            mark_info = marks_file.import_marks(self.params.get("import-marks"))
158
            if mark_info is not None:
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
159
                self.cache_mgr.marks = mark_info
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
160
            self.skip_total = False
161
            self.first_incremental_commit = True
162
        else:
163
            self.first_incremental_commit = False
164
            self.skip_total = self._init_id_map()
165
            if self.skip_total:
166
                self.note("Found %d commits already loaded - "
167
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
168
        self._revision_count = 0
169
170
        # mapping of tag name to revision_id
171
        self.tags = {}
172
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
173
        # Create the revision store to use for committing, if any
174
        self.rev_store = self._revision_store_factory()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
175
0.64.51 by Ian Clatworthy
disable autopacking
176
        # Disable autopacking if the repo format supports it.
177
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
0.64.313 by Jelmer Vernooij
Support both locations for KnitPackRepository.
178
        if isinstance(self.repo, KnitPackRepository):
0.64.51 by Ian Clatworthy
disable autopacking
179
            self._original_max_pack_count = \
180
                self.repo._pack_collection._max_pack_count
181
            def _max_pack_count_for_import(total_revisions):
182
                return total_revisions + 1
183
            self.repo._pack_collection._max_pack_count = \
184
                _max_pack_count_for_import
185
        else:
186
            self._original_max_pack_count = None
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
187
 
188
        # Make groupcompress use the fast algorithm during importing.
189
        # We want to repack at the end anyhow when more information
190
        # is available to do a better job of saving space.
191
        try:
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
192
            from .... import groupcompress
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
193
            groupcompress._FAST = True
194
        except ImportError:
195
            pass
196
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
197
        # Create a write group. This is committed at the end of the import.
198
        # Checkpointing closes the current one and starts a new one.
199
        self.repo.start_write_group()
200
201
    def _load_info_and_params(self):
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
202
        from .. import bzr_commit_handler
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
203
        self._mode = bool(self.params.get('mode', 'default'))
204
        self._experimental = self._mode == 'experimental'
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
205
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
206
        # This is currently hard-coded but might be configurable via
207
        # parameters one day if that's needed
208
        repo_transport = self.repo.control_files._transport
209
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
210
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
211
        # Load the info file, if any
212
        info_path = self.params.get('info')
213
        if info_path is not None:
214
            self.info = configobj.ConfigObj(info_path)
215
        else:
216
            self.info = None
217
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
218
        # Decide which CommitHandler to use
0.64.167 by Ian Clatworthy
incremental packing for chk formats
219
        self.supports_chk = getattr(self.repo._format, 'supports_chks', False)
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
220
        if self.supports_chk and self._mode == 'classic':
221
            note("Cannot use classic algorithm on CHK repositories"
222
                 " - using default one instead")
223
            self._mode = 'default'
224
        if self._mode == 'classic':
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
225
            self.commit_handler_factory = \
226
                bzr_commit_handler.InventoryCommitHandler
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
227
        else:
228
            self.commit_handler_factory = \
229
                bzr_commit_handler.InventoryDeltaCommitHandler
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
230
0.64.41 by Ian Clatworthy
update multiple working trees if requested
231
        # Decide how often to automatically report progress
232
        # (not a parameter yet)
233
        self.progress_every = _DEFAULT_AUTO_PROGRESS
234
        if self.verbose:
235
            self.progress_every = self.progress_every / 10
236
0.64.170 by Ian Clatworthy
add autopack option to fast-import
237
        # Decide how often (# of commits) to automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
238
        self.checkpoint_every = int(self.params.get('checkpoint',
239
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
240
0.64.170 by Ian Clatworthy
add autopack option to fast-import
241
        # Decide how often (# of checkpoints) to automatically pack
242
        self.checkpoint_count = 0
243
        self.autopack_every = int(self.params.get('autopack',
244
            _DEFAULT_AUTO_PACK))
245
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
246
        # Decide how big to make the inventory cache
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
247
        cache_size = int(self.params.get('inv-cache', -1))
248
        if cache_size == -1:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
249
            if self.supports_chk:
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
250
                cache_size = _DEFAULT_CHK_INV_CACHE_SIZE
251
            else:
252
                cache_size = _DEFAULT_INV_CACHE_SIZE
253
        self.inventory_cache_size = cache_size
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
254
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
255
        # Find the maximum number of commits to import (None means all)
256
        # and prepare progress reporting. Just in case the info file
257
        # has an outdated count of commits, we store the max counts
258
        # at which we need to terminate separately to the total used
259
        # for progress tracking.
260
        try:
261
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
262
            if self.max_commits < 0:
263
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
264
        except KeyError:
265
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
266
        if self.info is not None:
267
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
268
            if (self.max_commits is not None and
269
                self.total_commits > self.max_commits):
270
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
271
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
272
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
273
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
274
    def _revision_store_factory(self):
275
        """Make a RevisionStore based on what the repository supports."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
276
        new_repo_api = hasattr(self.repo, 'revisions')
277
        if new_repo_api:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
278
            return revision_store.RevisionStore2(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
279
        elif not self._experimental:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
280
            return revision_store.RevisionStore1(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
281
        else:
282
            def fulltext_when(count):
283
                total = self.total_commits
284
                if total is not None and count == total:
285
                    fulltext = True
286
                else:
287
                    # Create an inventory fulltext every 200 revisions
288
                    fulltext = count % 200 == 0
289
                if fulltext:
290
                    self.note("%d commits - storing inventory as full-text",
291
                        count)
292
                return fulltext
293
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
294
            return revision_store.ImportRevisionStore1(
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
295
                self.repo, self.inventory_cache_size,
296
                fulltext_when=fulltext_when)
297
0.123.5 by Jelmer Vernooij
Fix typo, handle bzr-specific locking in GenericProcessor.
298
    def process(self, command_iter):
299
        """Import data into Bazaar by processing a stream of commands.
300
301
        :param command_iter: an iterator providing commands
302
        """
303
        if self.working_tree is not None:
304
            self.working_tree.lock_write()
305
        elif self.branch is not None:
306
            self.branch.lock_write()
307
        elif self.repo is not None:
308
            self.repo.lock_write()
309
        try:
310
            super(GenericProcessor, self)._process(command_iter)
311
        finally:
312
            # If an unhandled exception occurred, abort the write group
313
            if self.repo is not None and self.repo.is_in_write_group():
314
                self.repo.abort_write_group()
315
            # Release the locks
316
            if self.working_tree is not None:
317
                self.working_tree.unlock()
318
            elif self.branch is not None:
319
                self.branch.unlock()
320
            elif self.repo is not None:
321
                self.repo.unlock()
322
0.64.27 by Ian Clatworthy
1st cut at performance tuning
323
    def _process(self, command_iter):
324
        # if anything goes wrong, abort the write group if any
325
        try:
326
            processor.ImportProcessor._process(self, command_iter)
327
        except:
328
            if self.repo is not None and self.repo.is_in_write_group():
329
                self.repo.abort_write_group()
330
            raise
331
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
332
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
333
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
334
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
335
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
336
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
337
        if self.params.get("export-marks") is not None:
0.78.5 by Ian Clatworthy
move import/export of marks into a module
338
            marks_file.export_marks(self.params.get("export-marks"),
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
339
                self.cache_mgr.marks)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
340
0.123.6 by Jelmer Vernooij
Split out reftracker.
341
        if self.cache_mgr.reftracker.last_ref == None:
0.97.1 by Gonéri Le Bouder
avoid STDERR crash
342
            """Nothing to refresh"""
343
            return
344
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
345
        # Update the branches
346
        self.note("Updating branch information ...")
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
347
        updater = branch_updater.BranchUpdater(self.repo, self.branch,
6929.13.2 by Jelmer Vernooij
Remove functionality moved to fastimport.
348
            self.cache_mgr, invert_dictset(
0.123.6 by Jelmer Vernooij
Split out reftracker.
349
                self.cache_mgr.reftracker.heads),
350
            self.cache_mgr.reftracker.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
351
        branches_updated, branches_lost = updater.update()
352
        self._branch_count = len(branches_updated)
353
354
        # Tell the user about branches that were not created
355
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
356
            if not self.repo.is_shared():
357
                self.warning("Cannot import multiple branches into "
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
358
                    "a standalone branch")
0.64.37 by Ian Clatworthy
create branches as required
359
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
360
            for lost_info in branches_lost:
361
                head_revision = lost_info[1]
362
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
363
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
364
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
365
        # Update the working trees as requested
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
366
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
367
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
368
        if self._branch_count == 0:
369
            self.note("no branches to update")
370
            self.note("no working trees to update")
371
            remind_about_update = False
372
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
373
            trees = self._get_working_trees(branches_updated)
374
            if trees:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
375
                self._update_working_trees(trees)
0.64.34 by Ian Clatworthy
report lost branches
376
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
377
            else:
378
                self.warning("No working trees available to update")
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
379
        else:
380
            # Update just the trunk. (This is always the first branch
381
            # returned by the branch updater.)
382
            trunk_branch = branches_updated[0]
383
            trees = self._get_working_trees([trunk_branch])
384
            if trees:
385
                self._update_working_trees(trees)
386
                remind_about_update = self._branch_count > 1
0.64.51 by Ian Clatworthy
disable autopacking
387
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
388
        # Dump the cache stats now because we clear it before the final pack
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
389
        if self.verbose:
390
            self.cache_mgr.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
391
        if self._original_max_pack_count:
392
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
393
            # checkpoint instead. We now pack the repository to optimise
394
            # how data is stored.
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
395
            self.cache_mgr.clear_all()
0.64.162 by Ian Clatworthy
always repack the repository on completion
396
            self._pack_repository()
397
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
398
        # Finish up by dumping stats & telling the user what to do next.
399
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
400
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
401
            # This message is explicitly not timestamped.
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
402
            note("To refresh the working tree for other branches, "
403
                "use 'bzr update' inside that branch.")
404
405
    def _update_working_trees(self, trees):
406
        if self.verbose:
407
            reporter = delta._ChangeReporter()
408
        else:
409
            reporter = None
410
        for wt in trees:
411
            self.note("Updating the working tree for %s ...", wt.basedir)
412
            wt.update(reporter)
413
            self._tree_count += 1
0.64.41 by Ian Clatworthy
update multiple working trees if requested
414
0.64.167 by Ian Clatworthy
incremental packing for chk formats
415
    def _pack_repository(self, final=True):
0.64.162 by Ian Clatworthy
always repack the repository on completion
416
        # Before packing, free whatever memory we can and ensure
417
        # that groupcompress is configured to optimise disk space
418
        import gc
0.64.167 by Ian Clatworthy
incremental packing for chk formats
419
        if final:
420
            try:
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
421
                from .... import groupcompress
0.64.167 by Ian Clatworthy
incremental packing for chk formats
422
            except ImportError:
423
                pass
424
            else:
425
                groupcompress._FAST = False
0.64.162 by Ian Clatworthy
always repack the repository on completion
426
        gc.collect()
427
        self.note("Packing repository ...")
428
        self.repo.pack()
429
430
        # To be conservative, packing puts the old packs and
431
        # indices in obsolete_packs. We err on the side of
432
        # optimism and clear out that directory to save space.
433
        self.note("Removing obsolete packs ...")
434
        # TODO: Use a public API for this once one exists
435
        repo_transport = self.repo._pack_collection.transport
6926.1.1 by Jelmer Vernooij
Remove delete_multi.
436
        obsolete_pack_transport = repo_transport.clone('obsolete_packs')
437
        for name in obsolete_pack_transport.list_dir('.'):
438
            obsolete_pack_transport.delete(name)
0.64.162 by Ian Clatworthy
always repack the repository on completion
439
0.64.167 by Ian Clatworthy
incremental packing for chk formats
440
        # If we're not done, free whatever memory we can
441
        if not final:
442
            gc.collect()
443
0.64.41 by Ian Clatworthy
update multiple working trees if requested
444
    def _get_working_trees(self, branches):
445
        """Get the working trees for branches in the repository."""
446
        result = []
447
        wt_expected = self.repo.make_working_trees()
448
        for br in branches:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
449
            if br is None:
450
                continue
451
            elif br == self.branch:
452
                if self.working_tree:
453
                    result.append(self.working_tree)
0.64.41 by Ian Clatworthy
update multiple working trees if requested
454
            elif wt_expected:
455
                try:
6653.6.1 by Jelmer Vernooij
Rename a number of attributes from bzrdir to controldir.
456
                    result.append(br.controldir.open_workingtree())
0.64.41 by Ian Clatworthy
update multiple working trees if requested
457
                except errors.NoWorkingTree:
458
                    self.warning("No working tree for branch %s", br)
459
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
460
461
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
462
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
463
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
464
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
465
        wtc = self._tree_count
466
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
467
            rc, helpers.single_plural(rc, "revision", "revisions"),
468
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
469
            wtc, helpers.single_plural(wtc, "tree", "trees"),
470
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
471
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
472
    def _init_id_map(self):
473
        """Load the id-map and check it matches the repository.
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
474
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
475
        :return: the number of entries in the map
476
        """
477
        # Currently, we just check the size. In the future, we might
478
        # decide to be more paranoid and check that the revision-ids
479
        # are identical as well.
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
480
        self.cache_mgr.marks, known = idmapfile.load_id_map(
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
481
            self.id_map_path)
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
482
        if self.cache_mgr.add_mark('0', _mod_revision.NULL_REVISION):
483
            known += 1
484
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
485
        existing_count = len(self.repo.all_revision_ids())
0.64.106 by Ian Clatworthy
let the id-map file have more revisions than the repository
486
        if existing_count < known:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
487
            raise plugin_errors.BadRepositorySize(known, existing_count)
488
        return known
489
490
    def _save_id_map(self):
491
        """Save the id-map."""
492
        # Save the whole lot every time. If this proves a problem, we can
493
        # change to 'append just the new ones' at a later time.
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
494
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.marks)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
495
0.64.5 by Ian Clatworthy
first cut at generic processing method
496
    def blob_handler(self, cmd):
497
        """Process a BlobCommand."""
498
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
499
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
500
        else:
501
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
502
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
503
0.64.170 by Ian Clatworthy
add autopack option to fast-import
504
    def checkpoint_handler(self, cmd):
0.64.5 by Ian Clatworthy
first cut at generic processing method
505
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
506
        # Commit the current write group and start a new one
507
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
508
        self._save_id_map()
0.64.220 by Ian Clatworthy
Only count implicit checkpoints when deciding when to auto-pack
509
        # track the number of automatic checkpoints done
510
        if cmd is None:
511
            self.checkpoint_count += 1
512
            if self.checkpoint_count % self.autopack_every == 0:
513
                self._pack_repository(final=False)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
514
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
515
516
    def commit_handler(self, cmd):
517
        """Process a CommitCommand."""
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
518
        mark = cmd.id.lstrip(':')
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
519
        if self.skip_total and self._revision_count < self.skip_total:
0.123.6 by Jelmer Vernooij
Split out reftracker.
520
            self.cache_mgr.reftracker.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
521
            # Check that we really do know about this commit-id
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
522
            if not self.cache_mgr.marks.has_key(mark):
523
                raise plugin_errors.BadRestart(mark)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
524
            self.cache_mgr._blobs = {}
525
            self._revision_count += 1
0.111.2 by Max Bowsher
Also catch tagging via commit when resuming a crashed import.
526
            if cmd.ref.startswith('refs/tags/'):
527
                tag_name = cmd.ref[len('refs/tags/'):]
528
                self._set_tag(tag_name, cmd.id)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
529
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
530
        if self.first_incremental_commit:
531
            self.first_incremental_commit = None
0.123.6 by Jelmer Vernooij
Split out reftracker.
532
            parents = self.cache_mgr.reftracker.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
533
534
        # 'Commit' the revision and report progress
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
535
        handler = self.commit_handler_factory(cmd, self.cache_mgr,
0.64.196 by Ian Clatworthy
get tests passing again
536
            self.rev_store, verbose=self.verbose,
537
            prune_empty_dirs=self.prune_empty_dirs)
0.64.180 by Ian Clatworthy
report triggering commit when exception occurs
538
        try:
539
            handler.process()
540
        except:
6855.3.1 by Jelmer Vernooij
Several more fixes.
541
            print("ABORT: exception occurred processing commit %s" % (cmd.id))
0.64.180 by Ian Clatworthy
report triggering commit when exception occurs
542
            raise
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
543
        self.cache_mgr.add_mark(mark, handler.revision_id)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
544
        self._revision_count += 1
0.129.1 by termie
Add a bunch of mark id normalization.
545
        self.report_progress("(%s)" % cmd.id.lstrip(':'))
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
546
0.111.1 by Max Bowsher
Set a tag when touching a refs/tags/ ref with a commit command.
547
        if cmd.ref.startswith('refs/tags/'):
548
            tag_name = cmd.ref[len('refs/tags/'):]
549
            self._set_tag(tag_name, cmd.id)
550
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
551
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
552
        if (self.max_commits is not None and
553
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
554
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
555
            self.finished = True
556
        elif self._revision_count % self.checkpoint_every == 0:
557
            self.note("%d commits - automatic checkpoint triggered",
558
                self._revision_count)
0.64.170 by Ian Clatworthy
add autopack option to fast-import
559
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
560
0.64.25 by Ian Clatworthy
slightly better progress reporting
561
    def report_progress(self, details=''):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
562
        if self._revision_count % self.progress_every == 0:
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
563
            if self.total_commits is not None:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
564
                counts = "%d/%d" % (self._revision_count, self.total_commits)
565
            else:
566
                counts = "%d" % (self._revision_count,)
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
567
            minutes = (time.time() - self._start_time) / 60
568
            revisions_added = self._revision_count - self.skip_total
569
            rate = revisions_added * 1.0 / minutes
570
            if rate > 10:
571
                rate_str = "at %.0f/minute " % rate
572
            else:
573
                rate_str = "at %.1f/minute " % rate
0.64.150 by Ian Clatworthy
show commit rate rather than meaningless ETA in verbose mode
574
            self.note("%s commits processed %s%s" % (counts, rate_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
575
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
576
    def progress_handler(self, cmd):
577
        """Process a ProgressCommand."""
0.64.271 by Ian Clatworthy
Ignore progress messages unless in verbose mode
578
        # Most progress messages embedded in streams are annoying.
579
        # Ignore them unless in verbose mode.
580
        if self.verbose:
581
            self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
582
583
    def reset_handler(self, cmd):
584
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
585
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
586
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
587
            if cmd.from_ is not None:
588
                self._set_tag(tag_name, cmd.from_)
589
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
590
                self.warning("ignoring reset refs/tags/%s - no from clause"
591
                    % tag_name)
0.64.109 by Ian Clatworthy
initial cut at reset support
592
            return
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
593
594
        if cmd.from_ is not None:
0.123.6 by Jelmer Vernooij
Split out reftracker.
595
            self.cache_mgr.reftracker.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.5 by Ian Clatworthy
first cut at generic processing method
596
597
    def tag_handler(self, cmd):
598
        """Process a TagCommand."""
0.64.107 by Ian Clatworthy
warn on tags with a missing from clause
599
        if cmd.from_ is not None:
600
            self._set_tag(cmd.id, cmd.from_)
601
        else:
602
            self.warning("ignoring tag %s - no from clause" % cmd.id)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
603
604
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
605
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
606
        bzr_tag_name = name.decode('utf-8', 'replace')
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
607
        bzr_rev_id = self.cache_mgr.lookup_committish(from_)
0.64.11 by Ian Clatworthy
tag support
608
        self.tags[bzr_tag_name] = bzr_rev_id
0.102.9 by Ian Clatworthy
parsing of multiple authors and commit properties
609
610
    def feature_handler(self, cmd):
611
        """Process a FeatureCommand."""
0.102.11 by Ian Clatworthy
Validate features are known before importing
612
        feature = cmd.feature_name
613
        if feature not in commands.FEATURE_NAMES:
614
            raise plugin_errors.UnknownFeature(feature)
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
615
0.64.297 by Jelmer Vernooij
Fix typo.
616
    def debug(self, msg, *args):
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
617
        """Output a debug message if the appropriate -D option was given."""
618
        if "fast-import" in debug.debug_flags:
619
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
620
            mutter(msg, *args)
621
622
    def note(self, msg, *args):
623
        """Output a note but timestamp it."""
624
        msg = "%s %s" % (self._time_of_day(), msg)
625
        note(msg, *args)
626
627
    def warning(self, msg, *args):
628
        """Output a warning but timestamp it."""
629
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
630
        warning(msg, *args)