/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
22
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
26
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    )
0.64.51 by Ian Clatworthy
disable autopacking
28
from bzrlib.repofmt import pack_repo
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
29
from bzrlib.trace import (
30
    mutter,
31
    note,
32
    warning,
33
    )
0.118.1 by Jelmer Vernooij
Cope with Debian's bzr using the system configobj, not shipping with its own.
34
try:
35
    import bzrlib.util.configobj.configobj as configobj
36
except ImportError:
37
    import configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
38
from bzrlib.plugins.fastimport import (
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
39
    branch_updater,
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
40
    cache_manager,
0.123.1 by Jelmer Vernooij
Move pure-fastimport code into its own directory, in preparation of splitting it into a separate package.
41
    marks_file,
42
    revision_store,
43
    )
0.123.2 by Jelmer Vernooij
Split out fastimport, import it from the system.
44
from fastimport import (
0.102.13 by Ian Clatworthy
Fix feature checking
45
    commands,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
47
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
48
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
    processor,
50
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
51
52
0.64.41 by Ian Clatworthy
update multiple working trees if requested
53
# How many commits before automatically reporting progress
54
_DEFAULT_AUTO_PROGRESS = 1000
55
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
56
# How many commits before automatically checkpointing
57
_DEFAULT_AUTO_CHECKPOINT = 10000
58
0.64.170 by Ian Clatworthy
add autopack option to fast-import
59
# How many checkpoints before automatically packing
60
_DEFAULT_AUTO_PACK = 4
61
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
62
# How many inventories to cache
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
63
_DEFAULT_INV_CACHE_SIZE = 1
64
_DEFAULT_CHK_INV_CACHE_SIZE = 1
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
65
0.64.41 by Ian Clatworthy
update multiple working trees if requested
66
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
67
class GenericProcessor(processor.ImportProcessor):
68
    """An import processor that handles basic imports.
69
70
    Current features supported:
71
0.64.16 by Ian Clatworthy
safe processing tweaks
72
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
73
    * files and symlinks commits are supported
74
    * checkpoints automatically happen at a configurable frequency
75
      over and above the stream requested checkpoints
76
    * timestamped progress reporting, both automatic and stream requested
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
77
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
78
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
79
    At checkpoints and on completion, the commit-id -> revision-id map is
80
    saved to a file called 'fastimport-id-map'. If the import crashes
81
    or is interrupted, it can be started again and this file will be
82
    used to skip over already loaded revisions. The format of each line
83
    is "commit-id revision-id" so commit-ids cannot include spaces.
84
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
85
    Here are the supported parameters:
86
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
87
    * info - name of a hints file holding the analysis generated
88
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
89
      importing large repositories, this parameter is needed so
90
      that the importer knows what blobs to intelligently cache.
91
0.64.41 by Ian Clatworthy
update multiple working trees if requested
92
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
93
      By default, the importer updates the repository
94
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
95
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
96
0.64.170 by Ian Clatworthy
add autopack option to fast-import
97
    * count - only import this many commits then exit. If not set
98
      or negative, all commits are imported.
99
    
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
100
    * checkpoint - automatically checkpoint every n commits over and
101
      above any checkpoints contained in the import stream.
102
      The default is 10000.
103
0.64.170 by Ian Clatworthy
add autopack option to fast-import
104
    * autopack - pack every n checkpoints. The default is 4.
105
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
106
    * inv-cache - number of inventories to cache.
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
107
      If not set, the default is 1.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
108
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
109
    * mode - import algorithm to use: default, experimental or classic.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
110
111
    * import-marks - name of file to read to load mark information from
112
113
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
114
    """
115
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
116
    known_params = [
117
        'info',
118
        'trees',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
119
        'count',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
120
        'checkpoint',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
121
        'autopack',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
122
        'inv-cache',
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
123
        'mode',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
124
        'import-marks',
125
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
126
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
127
0.64.215 by Ian Clatworthy
tweak GenericProcessor __init__ method
128
    def __init__(self, bzrdir, params=None, verbose=False, outf=None,
0.64.196 by Ian Clatworthy
get tests passing again
129
            prune_empty_dirs=True):
0.123.4 by Jelmer Vernooij
Only require passing BzrDir to bzr-specific processors.
130
        processor.ImportProcessor.__init__(self, params, verbose)
0.64.196 by Ian Clatworthy
get tests passing again
131
        self.prune_empty_dirs = prune_empty_dirs
0.123.4 by Jelmer Vernooij
Only require passing BzrDir to bzr-specific processors.
132
        self.bzrdir = bzrdir
133
        try:
134
            # Might be inside a branch
135
            (self.working_tree, self.branch) = bzrdir._get_tree_branch()
136
            self.repo = self.branch.repository
137
        except errors.NotBranchError:
138
            # Must be inside a repository
139
            self.working_tree = None
140
            self.branch = None
141
            self.repo = bzrdir.open_repository()
0.64.196 by Ian Clatworthy
get tests passing again
142
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
143
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
144
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
145
        self._load_info_and_params()
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
146
        if self.total_commits:
147
            self.note("Starting import of %d commits ..." %
148
                (self.total_commits,))
149
        else:
150
            self.note("Starting import ...")
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
151
        self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
152
            self.inventory_cache_size)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
153
        
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
154
        if self.params.get("import-marks") is not None:
0.79.2 by Ian Clatworthy
extend & use marks_file API
155
            mark_info = marks_file.import_marks(self.params.get("import-marks"))
156
            if mark_info is not None:
157
                self.cache_mgr.revision_ids = mark_info[0]
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
158
            self.skip_total = False
159
            self.first_incremental_commit = True
160
        else:
161
            self.first_incremental_commit = False
162
            self.skip_total = self._init_id_map()
163
            if self.skip_total:
164
                self.note("Found %d commits already loaded - "
165
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
166
        self._revision_count = 0
167
168
        # mapping of tag name to revision_id
169
        self.tags = {}
170
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
171
        # Create the revision store to use for committing, if any
172
        self.rev_store = self._revision_store_factory()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
173
0.64.51 by Ian Clatworthy
disable autopacking
174
        # Disable autopacking if the repo format supports it.
175
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
176
        if isinstance(self.repo, pack_repo.KnitPackRepository):
177
            self._original_max_pack_count = \
178
                self.repo._pack_collection._max_pack_count
179
            def _max_pack_count_for_import(total_revisions):
180
                return total_revisions + 1
181
            self.repo._pack_collection._max_pack_count = \
182
                _max_pack_count_for_import
183
        else:
184
            self._original_max_pack_count = None
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
185
 
186
        # Make groupcompress use the fast algorithm during importing.
187
        # We want to repack at the end anyhow when more information
188
        # is available to do a better job of saving space.
189
        try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
190
            from bzrlib import groupcompress
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
191
            groupcompress._FAST = True
192
        except ImportError:
193
            pass
194
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
195
        # Create a write group. This is committed at the end of the import.
196
        # Checkpointing closes the current one and starts a new one.
197
        self.repo.start_write_group()
198
199
    def _load_info_and_params(self):
0.123.15 by Jelmer Vernooij
Fix some more bugs found by ad-hoc testing.
200
        from bzrlib.plugins.fastimport import bzr_commit_handler
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
201
        self._mode = bool(self.params.get('mode', 'default'))
202
        self._experimental = self._mode == 'experimental'
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
203
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
204
        # This is currently hard-coded but might be configurable via
205
        # parameters one day if that's needed
206
        repo_transport = self.repo.control_files._transport
207
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
208
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
209
        # Load the info file, if any
210
        info_path = self.params.get('info')
211
        if info_path is not None:
212
            self.info = configobj.ConfigObj(info_path)
213
        else:
214
            self.info = None
215
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
216
        # Decide which CommitHandler to use
0.64.167 by Ian Clatworthy
incremental packing for chk formats
217
        self.supports_chk = getattr(self.repo._format, 'supports_chks', False)
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
218
        if self.supports_chk and self._mode == 'classic':
219
            note("Cannot use classic algorithm on CHK repositories"
220
                 " - using default one instead")
221
            self._mode = 'default'
222
        if self._mode == 'classic':
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
223
            self.commit_handler_factory = \
224
                bzr_commit_handler.InventoryCommitHandler
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
225
        else:
226
            self.commit_handler_factory = \
227
                bzr_commit_handler.InventoryDeltaCommitHandler
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
228
0.64.41 by Ian Clatworthy
update multiple working trees if requested
229
        # Decide how often to automatically report progress
230
        # (not a parameter yet)
231
        self.progress_every = _DEFAULT_AUTO_PROGRESS
232
        if self.verbose:
233
            self.progress_every = self.progress_every / 10
234
0.64.170 by Ian Clatworthy
add autopack option to fast-import
235
        # Decide how often (# of commits) to automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
236
        self.checkpoint_every = int(self.params.get('checkpoint',
237
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
238
0.64.170 by Ian Clatworthy
add autopack option to fast-import
239
        # Decide how often (# of checkpoints) to automatically pack
240
        self.checkpoint_count = 0
241
        self.autopack_every = int(self.params.get('autopack',
242
            _DEFAULT_AUTO_PACK))
243
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
244
        # Decide how big to make the inventory cache
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
245
        cache_size = int(self.params.get('inv-cache', -1))
246
        if cache_size == -1:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
247
            if self.supports_chk:
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
248
                cache_size = _DEFAULT_CHK_INV_CACHE_SIZE
249
            else:
250
                cache_size = _DEFAULT_INV_CACHE_SIZE
251
        self.inventory_cache_size = cache_size
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
252
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
253
        # Find the maximum number of commits to import (None means all)
254
        # and prepare progress reporting. Just in case the info file
255
        # has an outdated count of commits, we store the max counts
256
        # at which we need to terminate separately to the total used
257
        # for progress tracking.
258
        try:
259
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
260
            if self.max_commits < 0:
261
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
262
        except KeyError:
263
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
264
        if self.info is not None:
265
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
266
            if (self.max_commits is not None and
267
                self.total_commits > self.max_commits):
268
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
269
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
270
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
271
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
272
    def _revision_store_factory(self):
273
        """Make a RevisionStore based on what the repository supports."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
274
        new_repo_api = hasattr(self.repo, 'revisions')
275
        if new_repo_api:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
276
            return revision_store.RevisionStore2(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
277
        elif not self._experimental:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
278
            return revision_store.RevisionStore1(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
279
        else:
280
            def fulltext_when(count):
281
                total = self.total_commits
282
                if total is not None and count == total:
283
                    fulltext = True
284
                else:
285
                    # Create an inventory fulltext every 200 revisions
286
                    fulltext = count % 200 == 0
287
                if fulltext:
288
                    self.note("%d commits - storing inventory as full-text",
289
                        count)
290
                return fulltext
291
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
292
            return revision_store.ImportRevisionStore1(
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
293
                self.repo, self.inventory_cache_size,
294
                fulltext_when=fulltext_when)
295
0.123.5 by Jelmer Vernooij
Fix typo, handle bzr-specific locking in GenericProcessor.
296
    def process(self, command_iter):
297
        """Import data into Bazaar by processing a stream of commands.
298
299
        :param command_iter: an iterator providing commands
300
        """
301
        if self.working_tree is not None:
302
            self.working_tree.lock_write()
303
        elif self.branch is not None:
304
            self.branch.lock_write()
305
        elif self.repo is not None:
306
            self.repo.lock_write()
307
        try:
308
            super(GenericProcessor, self)._process(command_iter)
309
        finally:
310
            # If an unhandled exception occurred, abort the write group
311
            if self.repo is not None and self.repo.is_in_write_group():
312
                self.repo.abort_write_group()
313
            # Release the locks
314
            if self.working_tree is not None:
315
                self.working_tree.unlock()
316
            elif self.branch is not None:
317
                self.branch.unlock()
318
            elif self.repo is not None:
319
                self.repo.unlock()
320
0.64.27 by Ian Clatworthy
1st cut at performance tuning
321
    def _process(self, command_iter):
322
        # if anything goes wrong, abort the write group if any
323
        try:
324
            processor.ImportProcessor._process(self, command_iter)
325
        except:
326
            if self.repo is not None and self.repo.is_in_write_group():
327
                self.repo.abort_write_group()
328
            raise
329
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
330
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
331
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
332
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
333
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
334
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
335
        if self.params.get("export-marks") is not None:
0.78.5 by Ian Clatworthy
move import/export of marks into a module
336
            marks_file.export_marks(self.params.get("export-marks"),
337
                self.cache_mgr.revision_ids)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
338
0.123.6 by Jelmer Vernooij
Split out reftracker.
339
        if self.cache_mgr.reftracker.last_ref == None:
0.97.1 by Gonéri Le Bouder
avoid STDERR crash
340
            """Nothing to refresh"""
341
            return
342
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
343
        # Update the branches
344
        self.note("Updating branch information ...")
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
345
        updater = branch_updater.BranchUpdater(self.repo, self.branch,
0.123.6 by Jelmer Vernooij
Split out reftracker.
346
            self.cache_mgr, helpers.invert_dictset(
347
                self.cache_mgr.reftracker.heads),
348
            self.cache_mgr.reftracker.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
349
        branches_updated, branches_lost = updater.update()
350
        self._branch_count = len(branches_updated)
351
352
        # Tell the user about branches that were not created
353
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
354
            if not self.repo.is_shared():
355
                self.warning("Cannot import multiple branches into "
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
356
                    "a standalone branch")
0.64.37 by Ian Clatworthy
create branches as required
357
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
358
            for lost_info in branches_lost:
359
                head_revision = lost_info[1]
360
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
361
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
362
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
363
        # Update the working trees as requested
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
364
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
365
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
366
        if self._branch_count == 0:
367
            self.note("no branches to update")
368
            self.note("no working trees to update")
369
            remind_about_update = False
370
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
371
            trees = self._get_working_trees(branches_updated)
372
            if trees:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
373
                self._update_working_trees(trees)
0.64.34 by Ian Clatworthy
report lost branches
374
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
375
            else:
376
                self.warning("No working trees available to update")
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
377
        else:
378
            # Update just the trunk. (This is always the first branch
379
            # returned by the branch updater.)
380
            trunk_branch = branches_updated[0]
381
            trees = self._get_working_trees([trunk_branch])
382
            if trees:
383
                self._update_working_trees(trees)
384
                remind_about_update = self._branch_count > 1
0.64.51 by Ian Clatworthy
disable autopacking
385
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
386
        # Dump the cache stats now because we clear it before the final pack
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
387
        if self.verbose:
388
            self.cache_mgr.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
389
        if self._original_max_pack_count:
390
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
391
            # checkpoint instead. We now pack the repository to optimise
392
            # how data is stored.
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
393
            self.cache_mgr.clear_all()
0.64.162 by Ian Clatworthy
always repack the repository on completion
394
            self._pack_repository()
395
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
396
        # Finish up by dumping stats & telling the user what to do next.
397
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
398
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
399
            # This message is explicitly not timestamped.
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
400
            note("To refresh the working tree for other branches, "
401
                "use 'bzr update' inside that branch.")
402
403
    def _update_working_trees(self, trees):
404
        if self.verbose:
405
            reporter = delta._ChangeReporter()
406
        else:
407
            reporter = None
408
        for wt in trees:
409
            self.note("Updating the working tree for %s ...", wt.basedir)
410
            wt.update(reporter)
411
            self._tree_count += 1
0.64.41 by Ian Clatworthy
update multiple working trees if requested
412
0.64.167 by Ian Clatworthy
incremental packing for chk formats
413
    def _pack_repository(self, final=True):
0.64.162 by Ian Clatworthy
always repack the repository on completion
414
        # Before packing, free whatever memory we can and ensure
415
        # that groupcompress is configured to optimise disk space
416
        import gc
0.64.167 by Ian Clatworthy
incremental packing for chk formats
417
        if final:
418
            try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
419
                from bzrlib import groupcompress
0.64.167 by Ian Clatworthy
incremental packing for chk formats
420
            except ImportError:
421
                pass
422
            else:
423
                groupcompress._FAST = False
0.64.162 by Ian Clatworthy
always repack the repository on completion
424
        gc.collect()
425
        self.note("Packing repository ...")
426
        self.repo.pack()
427
428
        # To be conservative, packing puts the old packs and
429
        # indices in obsolete_packs. We err on the side of
430
        # optimism and clear out that directory to save space.
431
        self.note("Removing obsolete packs ...")
432
        # TODO: Use a public API for this once one exists
433
        repo_transport = self.repo._pack_collection.transport
434
        repo_transport.clone('obsolete_packs').delete_multi(
435
            repo_transport.list_dir('obsolete_packs'))
436
0.64.167 by Ian Clatworthy
incremental packing for chk formats
437
        # If we're not done, free whatever memory we can
438
        if not final:
439
            gc.collect()
440
0.64.41 by Ian Clatworthy
update multiple working trees if requested
441
    def _get_working_trees(self, branches):
442
        """Get the working trees for branches in the repository."""
443
        result = []
444
        wt_expected = self.repo.make_working_trees()
445
        for br in branches:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
446
            if br is None:
447
                continue
448
            elif br == self.branch:
449
                if self.working_tree:
450
                    result.append(self.working_tree)
0.64.41 by Ian Clatworthy
update multiple working trees if requested
451
            elif wt_expected:
452
                try:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
453
                    result.append(br.bzrdir.open_workingtree())
0.64.41 by Ian Clatworthy
update multiple working trees if requested
454
                except errors.NoWorkingTree:
455
                    self.warning("No working tree for branch %s", br)
456
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
457
458
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
459
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
460
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
461
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
462
        wtc = self._tree_count
463
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
464
            rc, helpers.single_plural(rc, "revision", "revisions"),
465
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
466
            wtc, helpers.single_plural(wtc, "tree", "trees"),
467
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
468
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
469
    def _init_id_map(self):
470
        """Load the id-map and check it matches the repository.
471
        
472
        :return: the number of entries in the map
473
        """
474
        # Currently, we just check the size. In the future, we might
475
        # decide to be more paranoid and check that the revision-ids
476
        # are identical as well.
477
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
478
            self.id_map_path)
479
        existing_count = len(self.repo.all_revision_ids())
0.64.106 by Ian Clatworthy
let the id-map file have more revisions than the repository
480
        if existing_count < known:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
481
            raise plugin_errors.BadRepositorySize(known, existing_count)
482
        return known
483
484
    def _save_id_map(self):
485
        """Save the id-map."""
486
        # Save the whole lot every time. If this proves a problem, we can
487
        # change to 'append just the new ones' at a later time.
488
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
489
0.64.5 by Ian Clatworthy
first cut at generic processing method
490
    def blob_handler(self, cmd):
491
        """Process a BlobCommand."""
492
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
493
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
494
        else:
495
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
496
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
497
0.64.170 by Ian Clatworthy
add autopack option to fast-import
498
    def checkpoint_handler(self, cmd):
0.64.5 by Ian Clatworthy
first cut at generic processing method
499
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
500
        # Commit the current write group and start a new one
501
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
502
        self._save_id_map()
0.64.220 by Ian Clatworthy
Only count implicit checkpoints when deciding when to auto-pack
503
        # track the number of automatic checkpoints done
504
        if cmd is None:
505
            self.checkpoint_count += 1
506
            if self.checkpoint_count % self.autopack_every == 0:
507
                self._pack_repository(final=False)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
508
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
509
510
    def commit_handler(self, cmd):
511
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
512
        if self.skip_total and self._revision_count < self.skip_total:
0.123.6 by Jelmer Vernooij
Split out reftracker.
513
            self.cache_mgr.reftracker.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
514
            # Check that we really do know about this commit-id
515
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
516
                raise plugin_errors.BadRestart(cmd.id)
517
            self.cache_mgr._blobs = {}
518
            self._revision_count += 1
0.111.2 by Max Bowsher
Also catch tagging via commit when resuming a crashed import.
519
            if cmd.ref.startswith('refs/tags/'):
520
                tag_name = cmd.ref[len('refs/tags/'):]
521
                self._set_tag(tag_name, cmd.id)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
522
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
523
        if self.first_incremental_commit:
524
            self.first_incremental_commit = None
0.123.6 by Jelmer Vernooij
Split out reftracker.
525
            parents = self.cache_mgr.reftracker.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
526
527
        # 'Commit' the revision and report progress
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
528
        handler = self.commit_handler_factory(cmd, self.cache_mgr,
0.64.196 by Ian Clatworthy
get tests passing again
529
            self.rev_store, verbose=self.verbose,
530
            prune_empty_dirs=self.prune_empty_dirs)
0.64.180 by Ian Clatworthy
report triggering commit when exception occurs
531
        try:
532
            handler.process()
533
        except:
534
            print "ABORT: exception occurred processing commit %s" % (cmd.id)
535
            raise
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
536
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
537
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
538
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
539
0.111.1 by Max Bowsher
Set a tag when touching a refs/tags/ ref with a commit command.
540
        if cmd.ref.startswith('refs/tags/'):
541
            tag_name = cmd.ref[len('refs/tags/'):]
542
            self._set_tag(tag_name, cmd.id)
543
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
544
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
545
        if (self.max_commits is not None and
546
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
547
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
548
            self.finished = True
549
        elif self._revision_count % self.checkpoint_every == 0:
550
            self.note("%d commits - automatic checkpoint triggered",
551
                self._revision_count)
0.64.170 by Ian Clatworthy
add autopack option to fast-import
552
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
553
0.64.25 by Ian Clatworthy
slightly better progress reporting
554
    def report_progress(self, details=''):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
555
        if self._revision_count % self.progress_every == 0:
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
556
            if self.total_commits is not None:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
557
                counts = "%d/%d" % (self._revision_count, self.total_commits)
558
            else:
559
                counts = "%d" % (self._revision_count,)
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
560
            minutes = (time.time() - self._start_time) / 60
561
            revisions_added = self._revision_count - self.skip_total
562
            rate = revisions_added * 1.0 / minutes
563
            if rate > 10:
564
                rate_str = "at %.0f/minute " % rate
565
            else:
566
                rate_str = "at %.1f/minute " % rate
0.64.150 by Ian Clatworthy
show commit rate rather than meaningless ETA in verbose mode
567
            self.note("%s commits processed %s%s" % (counts, rate_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
568
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
569
    def progress_handler(self, cmd):
570
        """Process a ProgressCommand."""
0.64.271 by Ian Clatworthy
Ignore progress messages unless in verbose mode
571
        # Most progress messages embedded in streams are annoying.
572
        # Ignore them unless in verbose mode.
573
        if self.verbose:
574
            self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
575
576
    def reset_handler(self, cmd):
577
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
578
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
579
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
580
            if cmd.from_ is not None:
581
                self._set_tag(tag_name, cmd.from_)
582
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
583
                self.warning("ignoring reset refs/tags/%s - no from clause"
584
                    % tag_name)
0.64.109 by Ian Clatworthy
initial cut at reset support
585
            return
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
586
587
        if cmd.from_ is not None:
0.123.6 by Jelmer Vernooij
Split out reftracker.
588
            self.cache_mgr.reftracker.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.5 by Ian Clatworthy
first cut at generic processing method
589
590
    def tag_handler(self, cmd):
591
        """Process a TagCommand."""
0.64.107 by Ian Clatworthy
warn on tags with a missing from clause
592
        if cmd.from_ is not None:
593
            self._set_tag(cmd.id, cmd.from_)
594
        else:
595
            self.warning("ignoring tag %s - no from clause" % cmd.id)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
596
597
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
598
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
599
        bzr_tag_name = name.decode('utf-8', 'replace')
600
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
601
        self.tags[bzr_tag_name] = bzr_rev_id
0.102.9 by Ian Clatworthy
parsing of multiple authors and commit properties
602
603
    def feature_handler(self, cmd):
604
        """Process a FeatureCommand."""
0.102.11 by Ian Clatworthy
Validate features are known before importing
605
        feature = cmd.feature_name
606
        if feature not in commands.FEATURE_NAMES:
607
            raise plugin_errors.UnknownFeature(feature)
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
608
609
    def debug(self, mgs, *args):
610
        """Output a debug message if the appropriate -D option was given."""
611
        if "fast-import" in debug.debug_flags:
612
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
613
            mutter(msg, *args)
614
615
    def note(self, msg, *args):
616
        """Output a note but timestamp it."""
617
        msg = "%s %s" % (self._time_of_day(), msg)
618
        note(msg, *args)
619
620
    def warning(self, msg, *args):
621
        """Output a warning but timestamp it."""
622
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
623
        warning(msg, *args)