/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
22
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
26
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    )
0.64.51 by Ian Clatworthy
disable autopacking
28
from bzrlib.repofmt import pack_repo
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
29
from bzrlib.trace import note, mutter
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
30
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
31
from bzrlib.plugins.fastimport import (
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
32
    branch_updater,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
33
    bzr_commit_handler,
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
34
    cache_manager,
0.102.13 by Ian Clatworthy
Fix feature checking
35
    commands,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
36
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
37
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
38
    idmapfile,
0.78.5 by Ian Clatworthy
move import/export of marks into a module
39
    marks_file,
0.64.5 by Ian Clatworthy
first cut at generic processing method
40
    processor,
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
41
    revision_store,
0.64.5 by Ian Clatworthy
first cut at generic processing method
42
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
43
44
0.64.41 by Ian Clatworthy
update multiple working trees if requested
45
# How many commits before automatically reporting progress
46
_DEFAULT_AUTO_PROGRESS = 1000
47
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
48
# How many commits before automatically checkpointing
49
_DEFAULT_AUTO_CHECKPOINT = 10000
50
0.64.170 by Ian Clatworthy
add autopack option to fast-import
51
# How many checkpoints before automatically packing
52
_DEFAULT_AUTO_PACK = 4
53
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
54
# How many inventories to cache
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
55
_DEFAULT_INV_CACHE_SIZE = 1
56
_DEFAULT_CHK_INV_CACHE_SIZE = 1
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
57
0.64.41 by Ian Clatworthy
update multiple working trees if requested
58
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
59
class GenericProcessor(processor.ImportProcessor):
60
    """An import processor that handles basic imports.
61
62
    Current features supported:
63
0.64.16 by Ian Clatworthy
safe processing tweaks
64
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
65
    * files and symlinks commits are supported
66
    * checkpoints automatically happen at a configurable frequency
67
      over and above the stream requested checkpoints
68
    * timestamped progress reporting, both automatic and stream requested
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
69
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
70
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
71
    At checkpoints and on completion, the commit-id -> revision-id map is
72
    saved to a file called 'fastimport-id-map'. If the import crashes
73
    or is interrupted, it can be started again and this file will be
74
    used to skip over already loaded revisions. The format of each line
75
    is "commit-id revision-id" so commit-ids cannot include spaces.
76
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
77
    Here are the supported parameters:
78
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
79
    * info - name of a hints file holding the analysis generated
80
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
81
      importing large repositories, this parameter is needed so
82
      that the importer knows what blobs to intelligently cache.
83
0.64.41 by Ian Clatworthy
update multiple working trees if requested
84
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
85
      By default, the importer updates the repository
86
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
87
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
88
0.64.170 by Ian Clatworthy
add autopack option to fast-import
89
    * count - only import this many commits then exit. If not set
90
      or negative, all commits are imported.
91
    
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
92
    * checkpoint - automatically checkpoint every n commits over and
93
      above any checkpoints contained in the import stream.
94
      The default is 10000.
95
0.64.170 by Ian Clatworthy
add autopack option to fast-import
96
    * autopack - pack every n checkpoints. The default is 4.
97
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
98
    * inv-cache - number of inventories to cache.
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
99
      If not set, the default is 1.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
100
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
101
    * mode - import algorithm to use: default, experimental or classic.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
102
103
    * import-marks - name of file to read to load mark information from
104
105
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
106
    """
107
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
108
    known_params = [
109
        'info',
110
        'trees',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
111
        'count',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
112
        'checkpoint',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
113
        'autopack',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
114
        'inv-cache',
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
115
        'mode',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
116
        'import-marks',
117
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
118
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
119
0.64.215 by Ian Clatworthy
tweak GenericProcessor __init__ method
120
    def __init__(self, bzrdir, params=None, verbose=False, outf=None,
0.64.196 by Ian Clatworthy
get tests passing again
121
            prune_empty_dirs=True):
122
        processor.ImportProcessor.__init__(self, bzrdir, params, verbose)
123
        self.prune_empty_dirs = prune_empty_dirs
124
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
125
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
126
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
127
        self._load_info_and_params()
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
128
        if self.total_commits:
129
            self.note("Starting import of %d commits ..." %
130
                (self.total_commits,))
131
        else:
132
            self.note("Starting import ...")
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
133
        self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
134
            self.inventory_cache_size)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
135
        
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
136
        if self.params.get("import-marks") is not None:
0.79.2 by Ian Clatworthy
extend & use marks_file API
137
            mark_info = marks_file.import_marks(self.params.get("import-marks"))
138
            if mark_info is not None:
139
                self.cache_mgr.revision_ids = mark_info[0]
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
140
            self.skip_total = False
141
            self.first_incremental_commit = True
142
        else:
143
            self.first_incremental_commit = False
144
            self.skip_total = self._init_id_map()
145
            if self.skip_total:
146
                self.note("Found %d commits already loaded - "
147
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
148
        self._revision_count = 0
149
150
        # mapping of tag name to revision_id
151
        self.tags = {}
152
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
153
        # Create the revision store to use for committing, if any
154
        self.rev_store = self._revision_store_factory()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
155
0.64.51 by Ian Clatworthy
disable autopacking
156
        # Disable autopacking if the repo format supports it.
157
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
158
        if isinstance(self.repo, pack_repo.KnitPackRepository):
159
            self._original_max_pack_count = \
160
                self.repo._pack_collection._max_pack_count
161
            def _max_pack_count_for_import(total_revisions):
162
                return total_revisions + 1
163
            self.repo._pack_collection._max_pack_count = \
164
                _max_pack_count_for_import
165
        else:
166
            self._original_max_pack_count = None
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
167
 
168
        # Make groupcompress use the fast algorithm during importing.
169
        # We want to repack at the end anyhow when more information
170
        # is available to do a better job of saving space.
171
        try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
172
            from bzrlib import groupcompress
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
173
            groupcompress._FAST = True
174
        except ImportError:
175
            pass
176
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
177
        # Create a write group. This is committed at the end of the import.
178
        # Checkpointing closes the current one and starts a new one.
179
        self.repo.start_write_group()
180
181
    def _load_info_and_params(self):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
182
        self._mode = bool(self.params.get('mode', 'default'))
183
        self._experimental = self._mode == 'experimental'
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
184
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
185
        # This is currently hard-coded but might be configurable via
186
        # parameters one day if that's needed
187
        repo_transport = self.repo.control_files._transport
188
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
189
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
190
        # Load the info file, if any
191
        info_path = self.params.get('info')
192
        if info_path is not None:
193
            self.info = configobj.ConfigObj(info_path)
194
        else:
195
            self.info = None
196
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
197
        # Decide which CommitHandler to use
0.64.167 by Ian Clatworthy
incremental packing for chk formats
198
        self.supports_chk = getattr(self.repo._format, 'supports_chks', False)
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
199
        if self.supports_chk and self._mode == 'classic':
200
            note("Cannot use classic algorithm on CHK repositories"
201
                 " - using default one instead")
202
            self._mode = 'default'
203
        if self._mode == 'classic':
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
204
            self.commit_handler_factory = \
205
                bzr_commit_handler.InventoryCommitHandler
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
206
        else:
207
            self.commit_handler_factory = \
208
                bzr_commit_handler.InventoryDeltaCommitHandler
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
209
0.64.41 by Ian Clatworthy
update multiple working trees if requested
210
        # Decide how often to automatically report progress
211
        # (not a parameter yet)
212
        self.progress_every = _DEFAULT_AUTO_PROGRESS
213
        if self.verbose:
214
            self.progress_every = self.progress_every / 10
215
0.64.170 by Ian Clatworthy
add autopack option to fast-import
216
        # Decide how often (# of commits) to automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
217
        self.checkpoint_every = int(self.params.get('checkpoint',
218
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
219
0.64.170 by Ian Clatworthy
add autopack option to fast-import
220
        # Decide how often (# of checkpoints) to automatically pack
221
        self.checkpoint_count = 0
222
        self.autopack_every = int(self.params.get('autopack',
223
            _DEFAULT_AUTO_PACK))
224
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
225
        # Decide how big to make the inventory cache
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
226
        cache_size = int(self.params.get('inv-cache', -1))
227
        if cache_size == -1:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
228
            if self.supports_chk:
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
229
                cache_size = _DEFAULT_CHK_INV_CACHE_SIZE
230
            else:
231
                cache_size = _DEFAULT_INV_CACHE_SIZE
232
        self.inventory_cache_size = cache_size
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
233
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
234
        # Find the maximum number of commits to import (None means all)
235
        # and prepare progress reporting. Just in case the info file
236
        # has an outdated count of commits, we store the max counts
237
        # at which we need to terminate separately to the total used
238
        # for progress tracking.
239
        try:
240
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
241
            if self.max_commits < 0:
242
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
243
        except KeyError:
244
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
245
        if self.info is not None:
246
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
247
            if (self.max_commits is not None and
248
                self.total_commits > self.max_commits):
249
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
250
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
251
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
252
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
253
    def _revision_store_factory(self):
254
        """Make a RevisionStore based on what the repository supports."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
255
        new_repo_api = hasattr(self.repo, 'revisions')
256
        if new_repo_api:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
257
            return revision_store.RevisionStore2(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
258
        elif not self._experimental:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
259
            return revision_store.RevisionStore1(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
260
        else:
261
            def fulltext_when(count):
262
                total = self.total_commits
263
                if total is not None and count == total:
264
                    fulltext = True
265
                else:
266
                    # Create an inventory fulltext every 200 revisions
267
                    fulltext = count % 200 == 0
268
                if fulltext:
269
                    self.note("%d commits - storing inventory as full-text",
270
                        count)
271
                return fulltext
272
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
273
            return revision_store.ImportRevisionStore1(
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
274
                self.repo, self.inventory_cache_size,
275
                fulltext_when=fulltext_when)
276
0.64.27 by Ian Clatworthy
1st cut at performance tuning
277
    def _process(self, command_iter):
278
        # if anything goes wrong, abort the write group if any
279
        try:
280
            processor.ImportProcessor._process(self, command_iter)
281
        except:
282
            if self.repo is not None and self.repo.is_in_write_group():
283
                self.repo.abort_write_group()
284
            raise
285
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
286
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
287
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
288
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
289
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
290
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
291
        if self.params.get("export-marks") is not None:
0.78.5 by Ian Clatworthy
move import/export of marks into a module
292
            marks_file.export_marks(self.params.get("export-marks"),
293
                self.cache_mgr.revision_ids)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
294
0.97.1 by Gonéri Le Bouder
avoid STDERR crash
295
        if self.cache_mgr.last_ref == None:
296
            """Nothing to refresh"""
297
            return
298
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
299
        # Update the branches
300
        self.note("Updating branch information ...")
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
301
        updater = branch_updater.BranchUpdater(self.repo, self.branch,
302
            self.cache_mgr, helpers.invert_dictset(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
303
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
304
        branches_updated, branches_lost = updater.update()
305
        self._branch_count = len(branches_updated)
306
307
        # Tell the user about branches that were not created
308
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
309
            if not self.repo.is_shared():
310
                self.warning("Cannot import multiple branches into "
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
311
                    "a standalone branch")
0.64.37 by Ian Clatworthy
create branches as required
312
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
313
            for lost_info in branches_lost:
314
                head_revision = lost_info[1]
315
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
316
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
317
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
318
        # Update the working trees as requested
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
319
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
320
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
321
        if self._branch_count == 0:
322
            self.note("no branches to update")
323
            self.note("no working trees to update")
324
            remind_about_update = False
325
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
326
            trees = self._get_working_trees(branches_updated)
327
            if trees:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
328
                self._update_working_trees(trees)
0.64.34 by Ian Clatworthy
report lost branches
329
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
330
            else:
331
                self.warning("No working trees available to update")
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
332
        else:
333
            # Update just the trunk. (This is always the first branch
334
            # returned by the branch updater.)
335
            trunk_branch = branches_updated[0]
336
            trees = self._get_working_trees([trunk_branch])
337
            if trees:
338
                self._update_working_trees(trees)
339
                remind_about_update = self._branch_count > 1
0.64.51 by Ian Clatworthy
disable autopacking
340
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
341
        # Dump the cache stats now because we clear it before the final pack
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
342
        if self.verbose:
343
            self.cache_mgr.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
344
        if self._original_max_pack_count:
345
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
346
            # checkpoint instead. We now pack the repository to optimise
347
            # how data is stored.
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
348
            self.cache_mgr.clear_all()
0.64.162 by Ian Clatworthy
always repack the repository on completion
349
            self._pack_repository()
350
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
351
        # Finish up by dumping stats & telling the user what to do next.
352
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
353
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
354
            # This message is explicitly not timestamped.
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
355
            note("To refresh the working tree for other branches, "
356
                "use 'bzr update' inside that branch.")
357
358
    def _update_working_trees(self, trees):
359
        if self.verbose:
360
            reporter = delta._ChangeReporter()
361
        else:
362
            reporter = None
363
        for wt in trees:
364
            self.note("Updating the working tree for %s ...", wt.basedir)
365
            wt.update(reporter)
366
            self._tree_count += 1
0.64.41 by Ian Clatworthy
update multiple working trees if requested
367
0.64.167 by Ian Clatworthy
incremental packing for chk formats
368
    def _pack_repository(self, final=True):
0.64.162 by Ian Clatworthy
always repack the repository on completion
369
        # Before packing, free whatever memory we can and ensure
370
        # that groupcompress is configured to optimise disk space
371
        import gc
0.64.167 by Ian Clatworthy
incremental packing for chk formats
372
        if final:
373
            try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
374
                from bzrlib import groupcompress
0.64.167 by Ian Clatworthy
incremental packing for chk formats
375
            except ImportError:
376
                pass
377
            else:
378
                groupcompress._FAST = False
0.64.162 by Ian Clatworthy
always repack the repository on completion
379
        gc.collect()
380
        self.note("Packing repository ...")
381
        self.repo.pack()
382
383
        # To be conservative, packing puts the old packs and
384
        # indices in obsolete_packs. We err on the side of
385
        # optimism and clear out that directory to save space.
386
        self.note("Removing obsolete packs ...")
387
        # TODO: Use a public API for this once one exists
388
        repo_transport = self.repo._pack_collection.transport
389
        repo_transport.clone('obsolete_packs').delete_multi(
390
            repo_transport.list_dir('obsolete_packs'))
391
0.64.167 by Ian Clatworthy
incremental packing for chk formats
392
        # If we're not done, free whatever memory we can
393
        if not final:
394
            gc.collect()
395
0.64.41 by Ian Clatworthy
update multiple working trees if requested
396
    def _get_working_trees(self, branches):
397
        """Get the working trees for branches in the repository."""
398
        result = []
399
        wt_expected = self.repo.make_working_trees()
400
        for br in branches:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
401
            if br is None:
402
                continue
403
            elif br == self.branch:
404
                if self.working_tree:
405
                    result.append(self.working_tree)
0.64.41 by Ian Clatworthy
update multiple working trees if requested
406
            elif wt_expected:
407
                try:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
408
                    result.append(br.bzrdir.open_workingtree())
0.64.41 by Ian Clatworthy
update multiple working trees if requested
409
                except errors.NoWorkingTree:
410
                    self.warning("No working tree for branch %s", br)
411
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
412
413
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
414
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
415
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
416
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
417
        wtc = self._tree_count
418
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
419
            rc, helpers.single_plural(rc, "revision", "revisions"),
420
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
421
            wtc, helpers.single_plural(wtc, "tree", "trees"),
422
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
423
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
424
    def _init_id_map(self):
425
        """Load the id-map and check it matches the repository.
426
        
427
        :return: the number of entries in the map
428
        """
429
        # Currently, we just check the size. In the future, we might
430
        # decide to be more paranoid and check that the revision-ids
431
        # are identical as well.
432
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
433
            self.id_map_path)
434
        existing_count = len(self.repo.all_revision_ids())
0.64.106 by Ian Clatworthy
let the id-map file have more revisions than the repository
435
        if existing_count < known:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
436
            raise plugin_errors.BadRepositorySize(known, existing_count)
437
        return known
438
439
    def _save_id_map(self):
440
        """Save the id-map."""
441
        # Save the whole lot every time. If this proves a problem, we can
442
        # change to 'append just the new ones' at a later time.
443
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
444
0.64.5 by Ian Clatworthy
first cut at generic processing method
445
    def blob_handler(self, cmd):
446
        """Process a BlobCommand."""
447
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
448
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
449
        else:
450
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
451
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
452
0.64.170 by Ian Clatworthy
add autopack option to fast-import
453
    def checkpoint_handler(self, cmd):
0.64.5 by Ian Clatworthy
first cut at generic processing method
454
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
455
        # Commit the current write group and start a new one
456
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
457
        self._save_id_map()
0.64.220 by Ian Clatworthy
Only count implicit checkpoints when deciding when to auto-pack
458
        # track the number of automatic checkpoints done
459
        if cmd is None:
460
            self.checkpoint_count += 1
461
            if self.checkpoint_count % self.autopack_every == 0:
462
                self._pack_repository(final=False)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
463
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
464
465
    def commit_handler(self, cmd):
466
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
467
        if self.skip_total and self._revision_count < self.skip_total:
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
468
            self.cache_mgr.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
469
            # Check that we really do know about this commit-id
470
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
471
                raise plugin_errors.BadRestart(cmd.id)
472
            # Consume the file commands and free any non-sticky blobs
473
            for fc in cmd.file_iter():
474
                pass
475
            self.cache_mgr._blobs = {}
476
            self._revision_count += 1
0.111.2 by Max Bowsher
Also catch tagging via commit when resuming a crashed import.
477
            if cmd.ref.startswith('refs/tags/'):
478
                tag_name = cmd.ref[len('refs/tags/'):]
479
                self._set_tag(tag_name, cmd.id)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
480
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
481
        if self.first_incremental_commit:
482
            self.first_incremental_commit = None
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
483
            parents = self.cache_mgr.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
484
485
        # 'Commit' the revision and report progress
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
486
        handler = self.commit_handler_factory(cmd, self.cache_mgr,
0.64.196 by Ian Clatworthy
get tests passing again
487
            self.rev_store, verbose=self.verbose,
488
            prune_empty_dirs=self.prune_empty_dirs)
0.64.180 by Ian Clatworthy
report triggering commit when exception occurs
489
        try:
490
            handler.process()
491
        except:
492
            print "ABORT: exception occurred processing commit %s" % (cmd.id)
493
            raise
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
494
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
495
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
496
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
497
0.111.1 by Max Bowsher
Set a tag when touching a refs/tags/ ref with a commit command.
498
        if cmd.ref.startswith('refs/tags/'):
499
            tag_name = cmd.ref[len('refs/tags/'):]
500
            self._set_tag(tag_name, cmd.id)
501
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
502
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
503
        if (self.max_commits is not None and
504
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
505
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
506
            self.finished = True
507
        elif self._revision_count % self.checkpoint_every == 0:
508
            self.note("%d commits - automatic checkpoint triggered",
509
                self._revision_count)
0.64.170 by Ian Clatworthy
add autopack option to fast-import
510
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
511
0.64.25 by Ian Clatworthy
slightly better progress reporting
512
    def report_progress(self, details=''):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
513
        if self._revision_count % self.progress_every == 0:
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
514
            if self.total_commits is not None:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
515
                counts = "%d/%d" % (self._revision_count, self.total_commits)
516
            else:
517
                counts = "%d" % (self._revision_count,)
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
518
            minutes = (time.time() - self._start_time) / 60
519
            revisions_added = self._revision_count - self.skip_total
520
            rate = revisions_added * 1.0 / minutes
521
            if rate > 10:
522
                rate_str = "at %.0f/minute " % rate
523
            else:
524
                rate_str = "at %.1f/minute " % rate
0.64.150 by Ian Clatworthy
show commit rate rather than meaningless ETA in verbose mode
525
            self.note("%s commits processed %s%s" % (counts, rate_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
526
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
527
    def progress_handler(self, cmd):
528
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
529
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
530
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
531
532
    def reset_handler(self, cmd):
533
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
534
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
535
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
536
            if cmd.from_ is not None:
537
                self._set_tag(tag_name, cmd.from_)
538
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
539
                self.warning("ignoring reset refs/tags/%s - no from clause"
540
                    % tag_name)
0.64.109 by Ian Clatworthy
initial cut at reset support
541
            return
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
542
543
        if cmd.from_ is not None:
0.64.109 by Ian Clatworthy
initial cut at reset support
544
            self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.5 by Ian Clatworthy
first cut at generic processing method
545
546
    def tag_handler(self, cmd):
547
        """Process a TagCommand."""
0.64.107 by Ian Clatworthy
warn on tags with a missing from clause
548
        if cmd.from_ is not None:
549
            self._set_tag(cmd.id, cmd.from_)
550
        else:
551
            self.warning("ignoring tag %s - no from clause" % cmd.id)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
552
553
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
554
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
555
        bzr_tag_name = name.decode('utf-8', 'replace')
556
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
557
        self.tags[bzr_tag_name] = bzr_rev_id
0.102.9 by Ian Clatworthy
parsing of multiple authors and commit properties
558
559
    def feature_handler(self, cmd):
560
        """Process a FeatureCommand."""
0.102.11 by Ian Clatworthy
Validate features are known before importing
561
        feature = cmd.feature_name
562
        if feature not in commands.FEATURE_NAMES:
563
            raise plugin_errors.UnknownFeature(feature)