/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
15
16
"""Import processor that supports all Bazaar repository formats."""
17
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
18
from __future__ import absolute_import
19
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
20
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
22
from .... import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
23
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
24
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
25
    errors,
26
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
27
    progress,
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
28
    revision as _mod_revision,
0.64.5 by Ian Clatworthy
first cut at generic processing method
29
    )
6670.4.5 by Jelmer Vernooij
Move breezy.repofmt contents to breezy.bzr.
30
from ....bzr.knitpack_repo import KnitPackRepository
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
31
from ....trace import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
32
    mutter,
33
    note,
34
    warning,
35
    )
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
36
import configobj
37
from .. import (
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
38
    branch_updater,
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
39
    cache_manager,
0.139.1 by Jelmer Vernooij
Import helper functions that have been removed from python-fastimport.
40
    helpers,
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
41
    idmapfile,
0.123.1 by Jelmer Vernooij
Move pure-fastimport code into its own directory, in preparation of splitting it into a separate package.
42
    marks_file,
43
    revision_store,
44
    )
0.123.2 by Jelmer Vernooij
Split out fastimport, import it from the system.
45
from fastimport import (
0.102.13 by Ian Clatworthy
Fix feature checking
46
    commands,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
47
    errors as plugin_errors,
0.64.5 by Ian Clatworthy
first cut at generic processing method
48
    processor,
49
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
50
51
0.64.41 by Ian Clatworthy
update multiple working trees if requested
52
# How many commits before automatically reporting progress
53
_DEFAULT_AUTO_PROGRESS = 1000
54
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
55
# How many commits before automatically checkpointing
56
_DEFAULT_AUTO_CHECKPOINT = 10000
57
0.64.170 by Ian Clatworthy
add autopack option to fast-import
58
# How many checkpoints before automatically packing
59
_DEFAULT_AUTO_PACK = 4
60
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
61
# How many inventories to cache
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
62
_DEFAULT_INV_CACHE_SIZE = 1
63
_DEFAULT_CHK_INV_CACHE_SIZE = 1
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
64
0.64.41 by Ian Clatworthy
update multiple working trees if requested
65
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
66
class GenericProcessor(processor.ImportProcessor):
67
    """An import processor that handles basic imports.
68
69
    Current features supported:
70
0.64.16 by Ian Clatworthy
safe processing tweaks
71
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
72
    * files and symlinks commits are supported
73
    * checkpoints automatically happen at a configurable frequency
74
      over and above the stream requested checkpoints
75
    * timestamped progress reporting, both automatic and stream requested
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
76
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
77
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
78
    At checkpoints and on completion, the commit-id -> revision-id map is
79
    saved to a file called 'fastimport-id-map'. If the import crashes
80
    or is interrupted, it can be started again and this file will be
81
    used to skip over already loaded revisions. The format of each line
82
    is "commit-id revision-id" so commit-ids cannot include spaces.
83
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
84
    Here are the supported parameters:
85
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
86
    * info - name of a hints file holding the analysis generated
87
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
88
      importing large repositories, this parameter is needed so
89
      that the importer knows what blobs to intelligently cache.
90
0.64.41 by Ian Clatworthy
update multiple working trees if requested
91
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
92
      By default, the importer updates the repository
93
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
94
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
95
0.64.170 by Ian Clatworthy
add autopack option to fast-import
96
    * count - only import this many commits then exit. If not set
97
      or negative, all commits are imported.
98
    
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
99
    * checkpoint - automatically checkpoint every n commits over and
100
      above any checkpoints contained in the import stream.
101
      The default is 10000.
102
0.64.170 by Ian Clatworthy
add autopack option to fast-import
103
    * autopack - pack every n checkpoints. The default is 4.
104
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
105
    * inv-cache - number of inventories to cache.
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
106
      If not set, the default is 1.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
107
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
108
    * mode - import algorithm to use: default, experimental or classic.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
109
110
    * import-marks - name of file to read to load mark information from
111
112
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
113
    """
114
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
115
    known_params = [
116
        'info',
117
        'trees',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
118
        'count',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
119
        'checkpoint',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
120
        'autopack',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
121
        'inv-cache',
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
122
        'mode',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
123
        'import-marks',
124
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
125
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
126
0.64.215 by Ian Clatworthy
tweak GenericProcessor __init__ method
127
    def __init__(self, bzrdir, params=None, verbose=False, outf=None,
0.64.196 by Ian Clatworthy
get tests passing again
128
            prune_empty_dirs=True):
0.123.4 by Jelmer Vernooij
Only require passing BzrDir to bzr-specific processors.
129
        processor.ImportProcessor.__init__(self, params, verbose)
0.64.196 by Ian Clatworthy
get tests passing again
130
        self.prune_empty_dirs = prune_empty_dirs
6653.6.1 by Jelmer Vernooij
Rename a number of attributes from bzrdir to controldir.
131
        self.controldir = bzrdir
0.123.4 by Jelmer Vernooij
Only require passing BzrDir to bzr-specific processors.
132
        try:
133
            # Might be inside a branch
134
            (self.working_tree, self.branch) = bzrdir._get_tree_branch()
135
            self.repo = self.branch.repository
136
        except errors.NotBranchError:
137
            # Must be inside a repository
138
            self.working_tree = None
139
            self.branch = None
140
            self.repo = bzrdir.open_repository()
0.64.196 by Ian Clatworthy
get tests passing again
141
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
142
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
143
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
144
        self._load_info_and_params()
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
145
        if self.total_commits:
146
            self.note("Starting import of %d commits ..." %
147
                (self.total_commits,))
148
        else:
149
            self.note("Starting import ...")
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
150
        self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
151
            self.inventory_cache_size)
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
152
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
153
        if self.params.get("import-marks") is not None:
0.79.2 by Ian Clatworthy
extend & use marks_file API
154
            mark_info = marks_file.import_marks(self.params.get("import-marks"))
155
            if mark_info is not None:
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
156
                self.cache_mgr.marks = mark_info
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
157
            self.skip_total = False
158
            self.first_incremental_commit = True
159
        else:
160
            self.first_incremental_commit = False
161
            self.skip_total = self._init_id_map()
162
            if self.skip_total:
163
                self.note("Found %d commits already loaded - "
164
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
165
        self._revision_count = 0
166
167
        # mapping of tag name to revision_id
168
        self.tags = {}
169
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
170
        # Create the revision store to use for committing, if any
171
        self.rev_store = self._revision_store_factory()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
172
0.64.51 by Ian Clatworthy
disable autopacking
173
        # Disable autopacking if the repo format supports it.
174
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
0.64.313 by Jelmer Vernooij
Support both locations for KnitPackRepository.
175
        if isinstance(self.repo, KnitPackRepository):
0.64.51 by Ian Clatworthy
disable autopacking
176
            self._original_max_pack_count = \
177
                self.repo._pack_collection._max_pack_count
178
            def _max_pack_count_for_import(total_revisions):
179
                return total_revisions + 1
180
            self.repo._pack_collection._max_pack_count = \
181
                _max_pack_count_for_import
182
        else:
183
            self._original_max_pack_count = None
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
184
 
185
        # Make groupcompress use the fast algorithm during importing.
186
        # We want to repack at the end anyhow when more information
187
        # is available to do a better job of saving space.
188
        try:
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
189
            from .... import groupcompress
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
190
            groupcompress._FAST = True
191
        except ImportError:
192
            pass
193
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
194
        # Create a write group. This is committed at the end of the import.
195
        # Checkpointing closes the current one and starts a new one.
196
        self.repo.start_write_group()
197
198
    def _load_info_and_params(self):
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
199
        from .. import bzr_commit_handler
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
200
        self._mode = bool(self.params.get('mode', 'default'))
201
        self._experimental = self._mode == 'experimental'
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
202
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
203
        # This is currently hard-coded but might be configurable via
204
        # parameters one day if that's needed
205
        repo_transport = self.repo.control_files._transport
206
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
207
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
208
        # Load the info file, if any
209
        info_path = self.params.get('info')
210
        if info_path is not None:
211
            self.info = configobj.ConfigObj(info_path)
212
        else:
213
            self.info = None
214
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
215
        # Decide which CommitHandler to use
0.64.167 by Ian Clatworthy
incremental packing for chk formats
216
        self.supports_chk = getattr(self.repo._format, 'supports_chks', False)
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
217
        if self.supports_chk and self._mode == 'classic':
218
            note("Cannot use classic algorithm on CHK repositories"
219
                 " - using default one instead")
220
            self._mode = 'default'
221
        if self._mode == 'classic':
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
222
            self.commit_handler_factory = \
223
                bzr_commit_handler.InventoryCommitHandler
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
224
        else:
225
            self.commit_handler_factory = \
226
                bzr_commit_handler.InventoryDeltaCommitHandler
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
227
0.64.41 by Ian Clatworthy
update multiple working trees if requested
228
        # Decide how often to automatically report progress
229
        # (not a parameter yet)
230
        self.progress_every = _DEFAULT_AUTO_PROGRESS
231
        if self.verbose:
232
            self.progress_every = self.progress_every / 10
233
0.64.170 by Ian Clatworthy
add autopack option to fast-import
234
        # Decide how often (# of commits) to automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
235
        self.checkpoint_every = int(self.params.get('checkpoint',
236
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
237
0.64.170 by Ian Clatworthy
add autopack option to fast-import
238
        # Decide how often (# of checkpoints) to automatically pack
239
        self.checkpoint_count = 0
240
        self.autopack_every = int(self.params.get('autopack',
241
            _DEFAULT_AUTO_PACK))
242
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
243
        # Decide how big to make the inventory cache
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
244
        cache_size = int(self.params.get('inv-cache', -1))
245
        if cache_size == -1:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
246
            if self.supports_chk:
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
247
                cache_size = _DEFAULT_CHK_INV_CACHE_SIZE
248
            else:
249
                cache_size = _DEFAULT_INV_CACHE_SIZE
250
        self.inventory_cache_size = cache_size
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
251
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
252
        # Find the maximum number of commits to import (None means all)
253
        # and prepare progress reporting. Just in case the info file
254
        # has an outdated count of commits, we store the max counts
255
        # at which we need to terminate separately to the total used
256
        # for progress tracking.
257
        try:
258
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
259
            if self.max_commits < 0:
260
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
261
        except KeyError:
262
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
263
        if self.info is not None:
264
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
265
            if (self.max_commits is not None and
266
                self.total_commits > self.max_commits):
267
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
268
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
269
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
270
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
271
    def _revision_store_factory(self):
272
        """Make a RevisionStore based on what the repository supports."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
273
        new_repo_api = hasattr(self.repo, 'revisions')
274
        if new_repo_api:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
275
            return revision_store.RevisionStore2(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
276
        elif not self._experimental:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
277
            return revision_store.RevisionStore1(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
278
        else:
279
            def fulltext_when(count):
280
                total = self.total_commits
281
                if total is not None and count == total:
282
                    fulltext = True
283
                else:
284
                    # Create an inventory fulltext every 200 revisions
285
                    fulltext = count % 200 == 0
286
                if fulltext:
287
                    self.note("%d commits - storing inventory as full-text",
288
                        count)
289
                return fulltext
290
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
291
            return revision_store.ImportRevisionStore1(
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
292
                self.repo, self.inventory_cache_size,
293
                fulltext_when=fulltext_when)
294
0.123.5 by Jelmer Vernooij
Fix typo, handle bzr-specific locking in GenericProcessor.
295
    def process(self, command_iter):
296
        """Import data into Bazaar by processing a stream of commands.
297
298
        :param command_iter: an iterator providing commands
299
        """
300
        if self.working_tree is not None:
301
            self.working_tree.lock_write()
302
        elif self.branch is not None:
303
            self.branch.lock_write()
304
        elif self.repo is not None:
305
            self.repo.lock_write()
306
        try:
307
            super(GenericProcessor, self)._process(command_iter)
308
        finally:
309
            # If an unhandled exception occurred, abort the write group
310
            if self.repo is not None and self.repo.is_in_write_group():
311
                self.repo.abort_write_group()
312
            # Release the locks
313
            if self.working_tree is not None:
314
                self.working_tree.unlock()
315
            elif self.branch is not None:
316
                self.branch.unlock()
317
            elif self.repo is not None:
318
                self.repo.unlock()
319
0.64.27 by Ian Clatworthy
1st cut at performance tuning
320
    def _process(self, command_iter):
321
        # if anything goes wrong, abort the write group if any
322
        try:
323
            processor.ImportProcessor._process(self, command_iter)
324
        except:
325
            if self.repo is not None and self.repo.is_in_write_group():
326
                self.repo.abort_write_group()
327
            raise
328
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
329
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
330
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
331
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
332
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
333
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
334
        if self.params.get("export-marks") is not None:
0.78.5 by Ian Clatworthy
move import/export of marks into a module
335
            marks_file.export_marks(self.params.get("export-marks"),
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
336
                self.cache_mgr.marks)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
337
0.123.6 by Jelmer Vernooij
Split out reftracker.
338
        if self.cache_mgr.reftracker.last_ref == None:
0.97.1 by Gonéri Le Bouder
avoid STDERR crash
339
            """Nothing to refresh"""
340
            return
341
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
342
        # Update the branches
343
        self.note("Updating branch information ...")
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
344
        updater = branch_updater.BranchUpdater(self.repo, self.branch,
0.123.6 by Jelmer Vernooij
Split out reftracker.
345
            self.cache_mgr, helpers.invert_dictset(
346
                self.cache_mgr.reftracker.heads),
347
            self.cache_mgr.reftracker.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
348
        branches_updated, branches_lost = updater.update()
349
        self._branch_count = len(branches_updated)
350
351
        # Tell the user about branches that were not created
352
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
353
            if not self.repo.is_shared():
354
                self.warning("Cannot import multiple branches into "
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
355
                    "a standalone branch")
0.64.37 by Ian Clatworthy
create branches as required
356
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
357
            for lost_info in branches_lost:
358
                head_revision = lost_info[1]
359
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
360
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
361
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
362
        # Update the working trees as requested
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
363
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
364
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
365
        if self._branch_count == 0:
366
            self.note("no branches to update")
367
            self.note("no working trees to update")
368
            remind_about_update = False
369
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
370
            trees = self._get_working_trees(branches_updated)
371
            if trees:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
372
                self._update_working_trees(trees)
0.64.34 by Ian Clatworthy
report lost branches
373
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
374
            else:
375
                self.warning("No working trees available to update")
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
376
        else:
377
            # Update just the trunk. (This is always the first branch
378
            # returned by the branch updater.)
379
            trunk_branch = branches_updated[0]
380
            trees = self._get_working_trees([trunk_branch])
381
            if trees:
382
                self._update_working_trees(trees)
383
                remind_about_update = self._branch_count > 1
0.64.51 by Ian Clatworthy
disable autopacking
384
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
385
        # Dump the cache stats now because we clear it before the final pack
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
386
        if self.verbose:
387
            self.cache_mgr.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
388
        if self._original_max_pack_count:
389
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
390
            # checkpoint instead. We now pack the repository to optimise
391
            # how data is stored.
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
392
            self.cache_mgr.clear_all()
0.64.162 by Ian Clatworthy
always repack the repository on completion
393
            self._pack_repository()
394
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
395
        # Finish up by dumping stats & telling the user what to do next.
396
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
397
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
398
            # This message is explicitly not timestamped.
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
399
            note("To refresh the working tree for other branches, "
400
                "use 'bzr update' inside that branch.")
401
402
    def _update_working_trees(self, trees):
403
        if self.verbose:
404
            reporter = delta._ChangeReporter()
405
        else:
406
            reporter = None
407
        for wt in trees:
408
            self.note("Updating the working tree for %s ...", wt.basedir)
409
            wt.update(reporter)
410
            self._tree_count += 1
0.64.41 by Ian Clatworthy
update multiple working trees if requested
411
0.64.167 by Ian Clatworthy
incremental packing for chk formats
412
    def _pack_repository(self, final=True):
0.64.162 by Ian Clatworthy
always repack the repository on completion
413
        # Before packing, free whatever memory we can and ensure
414
        # that groupcompress is configured to optimise disk space
415
        import gc
0.64.167 by Ian Clatworthy
incremental packing for chk formats
416
        if final:
417
            try:
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
418
                from .... import groupcompress
0.64.167 by Ian Clatworthy
incremental packing for chk formats
419
            except ImportError:
420
                pass
421
            else:
422
                groupcompress._FAST = False
0.64.162 by Ian Clatworthy
always repack the repository on completion
423
        gc.collect()
424
        self.note("Packing repository ...")
425
        self.repo.pack()
426
427
        # To be conservative, packing puts the old packs and
428
        # indices in obsolete_packs. We err on the side of
429
        # optimism and clear out that directory to save space.
430
        self.note("Removing obsolete packs ...")
431
        # TODO: Use a public API for this once one exists
432
        repo_transport = self.repo._pack_collection.transport
6926.1.1 by Jelmer Vernooij
Remove delete_multi.
433
        obsolete_pack_transport = repo_transport.clone('obsolete_packs')
434
        for name in obsolete_pack_transport.list_dir('.'):
435
            obsolete_pack_transport.delete(name)
0.64.162 by Ian Clatworthy
always repack the repository on completion
436
0.64.167 by Ian Clatworthy
incremental packing for chk formats
437
        # If we're not done, free whatever memory we can
438
        if not final:
439
            gc.collect()
440
0.64.41 by Ian Clatworthy
update multiple working trees if requested
441
    def _get_working_trees(self, branches):
442
        """Get the working trees for branches in the repository."""
443
        result = []
444
        wt_expected = self.repo.make_working_trees()
445
        for br in branches:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
446
            if br is None:
447
                continue
448
            elif br == self.branch:
449
                if self.working_tree:
450
                    result.append(self.working_tree)
0.64.41 by Ian Clatworthy
update multiple working trees if requested
451
            elif wt_expected:
452
                try:
6653.6.1 by Jelmer Vernooij
Rename a number of attributes from bzrdir to controldir.
453
                    result.append(br.controldir.open_workingtree())
0.64.41 by Ian Clatworthy
update multiple working trees if requested
454
                except errors.NoWorkingTree:
455
                    self.warning("No working tree for branch %s", br)
456
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
457
458
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
459
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
460
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
461
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
462
        wtc = self._tree_count
463
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
464
            rc, helpers.single_plural(rc, "revision", "revisions"),
465
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
466
            wtc, helpers.single_plural(wtc, "tree", "trees"),
467
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
468
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
469
    def _init_id_map(self):
470
        """Load the id-map and check it matches the repository.
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
471
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
472
        :return: the number of entries in the map
473
        """
474
        # Currently, we just check the size. In the future, we might
475
        # decide to be more paranoid and check that the revision-ids
476
        # are identical as well.
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
477
        self.cache_mgr.marks, known = idmapfile.load_id_map(
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
478
            self.id_map_path)
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
479
        if self.cache_mgr.add_mark('0', _mod_revision.NULL_REVISION):
480
            known += 1
481
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
482
        existing_count = len(self.repo.all_revision_ids())
0.64.106 by Ian Clatworthy
let the id-map file have more revisions than the repository
483
        if existing_count < known:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
484
            raise plugin_errors.BadRepositorySize(known, existing_count)
485
        return known
486
487
    def _save_id_map(self):
488
        """Save the id-map."""
489
        # Save the whole lot every time. If this proves a problem, we can
490
        # change to 'append just the new ones' at a later time.
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
491
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.marks)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
492
0.64.5 by Ian Clatworthy
first cut at generic processing method
493
    def blob_handler(self, cmd):
494
        """Process a BlobCommand."""
495
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
496
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
497
        else:
498
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
499
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
500
0.64.170 by Ian Clatworthy
add autopack option to fast-import
501
    def checkpoint_handler(self, cmd):
0.64.5 by Ian Clatworthy
first cut at generic processing method
502
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
503
        # Commit the current write group and start a new one
504
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
505
        self._save_id_map()
0.64.220 by Ian Clatworthy
Only count implicit checkpoints when deciding when to auto-pack
506
        # track the number of automatic checkpoints done
507
        if cmd is None:
508
            self.checkpoint_count += 1
509
            if self.checkpoint_count % self.autopack_every == 0:
510
                self._pack_repository(final=False)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
511
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
512
513
    def commit_handler(self, cmd):
514
        """Process a CommitCommand."""
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
515
        mark = cmd.id.lstrip(':')
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
516
        if self.skip_total and self._revision_count < self.skip_total:
0.123.6 by Jelmer Vernooij
Split out reftracker.
517
            self.cache_mgr.reftracker.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
518
            # Check that we really do know about this commit-id
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
519
            if not self.cache_mgr.marks.has_key(mark):
520
                raise plugin_errors.BadRestart(mark)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
521
            self.cache_mgr._blobs = {}
522
            self._revision_count += 1
0.111.2 by Max Bowsher
Also catch tagging via commit when resuming a crashed import.
523
            if cmd.ref.startswith('refs/tags/'):
524
                tag_name = cmd.ref[len('refs/tags/'):]
525
                self._set_tag(tag_name, cmd.id)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
526
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
527
        if self.first_incremental_commit:
528
            self.first_incremental_commit = None
0.123.6 by Jelmer Vernooij
Split out reftracker.
529
            parents = self.cache_mgr.reftracker.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
530
531
        # 'Commit' the revision and report progress
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
532
        handler = self.commit_handler_factory(cmd, self.cache_mgr,
0.64.196 by Ian Clatworthy
get tests passing again
533
            self.rev_store, verbose=self.verbose,
534
            prune_empty_dirs=self.prune_empty_dirs)
0.64.180 by Ian Clatworthy
report triggering commit when exception occurs
535
        try:
536
            handler.process()
537
        except:
6855.3.1 by Jelmer Vernooij
Several more fixes.
538
            print("ABORT: exception occurred processing commit %s" % (cmd.id))
0.64.180 by Ian Clatworthy
report triggering commit when exception occurs
539
            raise
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
540
        self.cache_mgr.add_mark(mark, handler.revision_id)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
541
        self._revision_count += 1
0.129.1 by termie
Add a bunch of mark id normalization.
542
        self.report_progress("(%s)" % cmd.id.lstrip(':'))
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
543
0.111.1 by Max Bowsher
Set a tag when touching a refs/tags/ ref with a commit command.
544
        if cmd.ref.startswith('refs/tags/'):
545
            tag_name = cmd.ref[len('refs/tags/'):]
546
            self._set_tag(tag_name, cmd.id)
547
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
548
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
549
        if (self.max_commits is not None and
550
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
551
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
552
            self.finished = True
553
        elif self._revision_count % self.checkpoint_every == 0:
554
            self.note("%d commits - automatic checkpoint triggered",
555
                self._revision_count)
0.64.170 by Ian Clatworthy
add autopack option to fast-import
556
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
557
0.64.25 by Ian Clatworthy
slightly better progress reporting
558
    def report_progress(self, details=''):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
559
        if self._revision_count % self.progress_every == 0:
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
560
            if self.total_commits is not None:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
561
                counts = "%d/%d" % (self._revision_count, self.total_commits)
562
            else:
563
                counts = "%d" % (self._revision_count,)
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
564
            minutes = (time.time() - self._start_time) / 60
565
            revisions_added = self._revision_count - self.skip_total
566
            rate = revisions_added * 1.0 / minutes
567
            if rate > 10:
568
                rate_str = "at %.0f/minute " % rate
569
            else:
570
                rate_str = "at %.1f/minute " % rate
0.64.150 by Ian Clatworthy
show commit rate rather than meaningless ETA in verbose mode
571
            self.note("%s commits processed %s%s" % (counts, rate_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
572
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
573
    def progress_handler(self, cmd):
574
        """Process a ProgressCommand."""
0.64.271 by Ian Clatworthy
Ignore progress messages unless in verbose mode
575
        # Most progress messages embedded in streams are annoying.
576
        # Ignore them unless in verbose mode.
577
        if self.verbose:
578
            self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
579
580
    def reset_handler(self, cmd):
581
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
582
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
583
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
584
            if cmd.from_ is not None:
585
                self._set_tag(tag_name, cmd.from_)
586
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
587
                self.warning("ignoring reset refs/tags/%s - no from clause"
588
                    % tag_name)
0.64.109 by Ian Clatworthy
initial cut at reset support
589
            return
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
590
591
        if cmd.from_ is not None:
0.123.6 by Jelmer Vernooij
Split out reftracker.
592
            self.cache_mgr.reftracker.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.5 by Ian Clatworthy
first cut at generic processing method
593
594
    def tag_handler(self, cmd):
595
        """Process a TagCommand."""
0.64.107 by Ian Clatworthy
warn on tags with a missing from clause
596
        if cmd.from_ is not None:
597
            self._set_tag(cmd.id, cmd.from_)
598
        else:
599
            self.warning("ignoring tag %s - no from clause" % cmd.id)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
600
601
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
602
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
603
        bzr_tag_name = name.decode('utf-8', 'replace')
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
604
        bzr_rev_id = self.cache_mgr.lookup_committish(from_)
0.64.11 by Ian Clatworthy
tag support
605
        self.tags[bzr_tag_name] = bzr_rev_id
0.102.9 by Ian Clatworthy
parsing of multiple authors and commit properties
606
607
    def feature_handler(self, cmd):
608
        """Process a FeatureCommand."""
0.102.11 by Ian Clatworthy
Validate features are known before importing
609
        feature = cmd.feature_name
610
        if feature not in commands.FEATURE_NAMES:
611
            raise plugin_errors.UnknownFeature(feature)
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
612
0.64.297 by Jelmer Vernooij
Fix typo.
613
    def debug(self, msg, *args):
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
614
        """Output a debug message if the appropriate -D option was given."""
615
        if "fast-import" in debug.debug_flags:
616
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
617
            mutter(msg, *args)
618
619
    def note(self, msg, *args):
620
        """Output a note but timestamp it."""
621
        msg = "%s %s" % (self._time_of_day(), msg)
622
        note(msg, *args)
623
624
    def warning(self, msg, *args):
625
        """Output a warning but timestamp it."""
626
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
627
        warning(msg, *args)