/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
22
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
26
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    )
0.64.51 by Ian Clatworthy
disable autopacking
28
from bzrlib.repofmt import pack_repo
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
29
from bzrlib.trace import note, mutter
0.118.1 by Jelmer Vernooij
Cope with Debian's bzr using the system configobj, not shipping with its own.
30
try:
31
    import bzrlib.util.configobj.configobj as configobj
32
except ImportError:
33
    import configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
34
from bzrlib.plugins.fastimport import (
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
35
    branch_updater,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
36
    bzr_commit_handler,
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
37
    cache_manager,
0.102.13 by Ian Clatworthy
Fix feature checking
38
    commands,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
39
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
40
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
41
    idmapfile,
0.78.5 by Ian Clatworthy
move import/export of marks into a module
42
    marks_file,
0.64.5 by Ian Clatworthy
first cut at generic processing method
43
    processor,
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
44
    revision_store,
0.64.5 by Ian Clatworthy
first cut at generic processing method
45
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
46
47
0.64.41 by Ian Clatworthy
update multiple working trees if requested
48
# How many commits before automatically reporting progress
49
_DEFAULT_AUTO_PROGRESS = 1000
50
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
51
# How many commits before automatically checkpointing
52
_DEFAULT_AUTO_CHECKPOINT = 10000
53
0.64.170 by Ian Clatworthy
add autopack option to fast-import
54
# How many checkpoints before automatically packing
55
_DEFAULT_AUTO_PACK = 4
56
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
57
# How many inventories to cache
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
58
_DEFAULT_INV_CACHE_SIZE = 1
59
_DEFAULT_CHK_INV_CACHE_SIZE = 1
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
60
0.64.41 by Ian Clatworthy
update multiple working trees if requested
61
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
62
class GenericProcessor(processor.ImportProcessor):
63
    """An import processor that handles basic imports.
64
65
    Current features supported:
66
0.64.16 by Ian Clatworthy
safe processing tweaks
67
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
68
    * files and symlinks commits are supported
69
    * checkpoints automatically happen at a configurable frequency
70
      over and above the stream requested checkpoints
71
    * timestamped progress reporting, both automatic and stream requested
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
72
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
73
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
74
    At checkpoints and on completion, the commit-id -> revision-id map is
75
    saved to a file called 'fastimport-id-map'. If the import crashes
76
    or is interrupted, it can be started again and this file will be
77
    used to skip over already loaded revisions. The format of each line
78
    is "commit-id revision-id" so commit-ids cannot include spaces.
79
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
80
    Here are the supported parameters:
81
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
82
    * info - name of a hints file holding the analysis generated
83
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
84
      importing large repositories, this parameter is needed so
85
      that the importer knows what blobs to intelligently cache.
86
0.64.41 by Ian Clatworthy
update multiple working trees if requested
87
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
88
      By default, the importer updates the repository
89
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
90
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
91
0.64.170 by Ian Clatworthy
add autopack option to fast-import
92
    * count - only import this many commits then exit. If not set
93
      or negative, all commits are imported.
94
    
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
95
    * checkpoint - automatically checkpoint every n commits over and
96
      above any checkpoints contained in the import stream.
97
      The default is 10000.
98
0.64.170 by Ian Clatworthy
add autopack option to fast-import
99
    * autopack - pack every n checkpoints. The default is 4.
100
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
101
    * inv-cache - number of inventories to cache.
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
102
      If not set, the default is 1.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
103
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
104
    * mode - import algorithm to use: default, experimental or classic.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
105
106
    * import-marks - name of file to read to load mark information from
107
108
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
109
    """
110
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
111
    known_params = [
112
        'info',
113
        'trees',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
114
        'count',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
115
        'checkpoint',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
116
        'autopack',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
117
        'inv-cache',
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
118
        'mode',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
119
        'import-marks',
120
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
121
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
122
0.64.215 by Ian Clatworthy
tweak GenericProcessor __init__ method
123
    def __init__(self, bzrdir, params=None, verbose=False, outf=None,
0.64.196 by Ian Clatworthy
get tests passing again
124
            prune_empty_dirs=True):
125
        processor.ImportProcessor.__init__(self, bzrdir, params, verbose)
126
        self.prune_empty_dirs = prune_empty_dirs
127
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
128
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
129
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
130
        self._load_info_and_params()
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
131
        if self.total_commits:
132
            self.note("Starting import of %d commits ..." %
133
                (self.total_commits,))
134
        else:
135
            self.note("Starting import ...")
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
136
        self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
137
            self.inventory_cache_size)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
138
        
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
139
        if self.params.get("import-marks") is not None:
0.79.2 by Ian Clatworthy
extend & use marks_file API
140
            mark_info = marks_file.import_marks(self.params.get("import-marks"))
141
            if mark_info is not None:
142
                self.cache_mgr.revision_ids = mark_info[0]
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
143
            self.skip_total = False
144
            self.first_incremental_commit = True
145
        else:
146
            self.first_incremental_commit = False
147
            self.skip_total = self._init_id_map()
148
            if self.skip_total:
149
                self.note("Found %d commits already loaded - "
150
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
151
        self._revision_count = 0
152
153
        # mapping of tag name to revision_id
154
        self.tags = {}
155
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
156
        # Create the revision store to use for committing, if any
157
        self.rev_store = self._revision_store_factory()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
158
0.64.51 by Ian Clatworthy
disable autopacking
159
        # Disable autopacking if the repo format supports it.
160
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
161
        if isinstance(self.repo, pack_repo.KnitPackRepository):
162
            self._original_max_pack_count = \
163
                self.repo._pack_collection._max_pack_count
164
            def _max_pack_count_for_import(total_revisions):
165
                return total_revisions + 1
166
            self.repo._pack_collection._max_pack_count = \
167
                _max_pack_count_for_import
168
        else:
169
            self._original_max_pack_count = None
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
170
 
171
        # Make groupcompress use the fast algorithm during importing.
172
        # We want to repack at the end anyhow when more information
173
        # is available to do a better job of saving space.
174
        try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
175
            from bzrlib import groupcompress
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
176
            groupcompress._FAST = True
177
        except ImportError:
178
            pass
179
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
180
        # Create a write group. This is committed at the end of the import.
181
        # Checkpointing closes the current one and starts a new one.
182
        self.repo.start_write_group()
183
184
    def _load_info_and_params(self):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
185
        self._mode = bool(self.params.get('mode', 'default'))
186
        self._experimental = self._mode == 'experimental'
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
187
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
188
        # This is currently hard-coded but might be configurable via
189
        # parameters one day if that's needed
190
        repo_transport = self.repo.control_files._transport
191
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
192
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
193
        # Load the info file, if any
194
        info_path = self.params.get('info')
195
        if info_path is not None:
196
            self.info = configobj.ConfigObj(info_path)
197
        else:
198
            self.info = None
199
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
200
        # Decide which CommitHandler to use
0.64.167 by Ian Clatworthy
incremental packing for chk formats
201
        self.supports_chk = getattr(self.repo._format, 'supports_chks', False)
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
202
        if self.supports_chk and self._mode == 'classic':
203
            note("Cannot use classic algorithm on CHK repositories"
204
                 " - using default one instead")
205
            self._mode = 'default'
206
        if self._mode == 'classic':
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
207
            self.commit_handler_factory = \
208
                bzr_commit_handler.InventoryCommitHandler
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
209
        else:
210
            self.commit_handler_factory = \
211
                bzr_commit_handler.InventoryDeltaCommitHandler
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
212
0.64.41 by Ian Clatworthy
update multiple working trees if requested
213
        # Decide how often to automatically report progress
214
        # (not a parameter yet)
215
        self.progress_every = _DEFAULT_AUTO_PROGRESS
216
        if self.verbose:
217
            self.progress_every = self.progress_every / 10
218
0.64.170 by Ian Clatworthy
add autopack option to fast-import
219
        # Decide how often (# of commits) to automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
220
        self.checkpoint_every = int(self.params.get('checkpoint',
221
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
222
0.64.170 by Ian Clatworthy
add autopack option to fast-import
223
        # Decide how often (# of checkpoints) to automatically pack
224
        self.checkpoint_count = 0
225
        self.autopack_every = int(self.params.get('autopack',
226
            _DEFAULT_AUTO_PACK))
227
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
228
        # Decide how big to make the inventory cache
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
229
        cache_size = int(self.params.get('inv-cache', -1))
230
        if cache_size == -1:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
231
            if self.supports_chk:
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
232
                cache_size = _DEFAULT_CHK_INV_CACHE_SIZE
233
            else:
234
                cache_size = _DEFAULT_INV_CACHE_SIZE
235
        self.inventory_cache_size = cache_size
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
236
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
237
        # Find the maximum number of commits to import (None means all)
238
        # and prepare progress reporting. Just in case the info file
239
        # has an outdated count of commits, we store the max counts
240
        # at which we need to terminate separately to the total used
241
        # for progress tracking.
242
        try:
243
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
244
            if self.max_commits < 0:
245
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
246
        except KeyError:
247
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
248
        if self.info is not None:
249
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
250
            if (self.max_commits is not None and
251
                self.total_commits > self.max_commits):
252
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
253
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
254
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
255
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
256
    def _revision_store_factory(self):
257
        """Make a RevisionStore based on what the repository supports."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
258
        new_repo_api = hasattr(self.repo, 'revisions')
259
        if new_repo_api:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
260
            return revision_store.RevisionStore2(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
261
        elif not self._experimental:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
262
            return revision_store.RevisionStore1(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
263
        else:
264
            def fulltext_when(count):
265
                total = self.total_commits
266
                if total is not None and count == total:
267
                    fulltext = True
268
                else:
269
                    # Create an inventory fulltext every 200 revisions
270
                    fulltext = count % 200 == 0
271
                if fulltext:
272
                    self.note("%d commits - storing inventory as full-text",
273
                        count)
274
                return fulltext
275
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
276
            return revision_store.ImportRevisionStore1(
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
277
                self.repo, self.inventory_cache_size,
278
                fulltext_when=fulltext_when)
279
0.64.27 by Ian Clatworthy
1st cut at performance tuning
280
    def _process(self, command_iter):
281
        # if anything goes wrong, abort the write group if any
282
        try:
283
            processor.ImportProcessor._process(self, command_iter)
284
        except:
285
            if self.repo is not None and self.repo.is_in_write_group():
286
                self.repo.abort_write_group()
287
            raise
288
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
289
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
290
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
291
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
292
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
293
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
294
        if self.params.get("export-marks") is not None:
0.78.5 by Ian Clatworthy
move import/export of marks into a module
295
            marks_file.export_marks(self.params.get("export-marks"),
296
                self.cache_mgr.revision_ids)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
297
0.97.1 by Gonéri Le Bouder
avoid STDERR crash
298
        if self.cache_mgr.last_ref == None:
299
            """Nothing to refresh"""
300
            return
301
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
302
        # Update the branches
303
        self.note("Updating branch information ...")
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
304
        updater = branch_updater.BranchUpdater(self.repo, self.branch,
305
            self.cache_mgr, helpers.invert_dictset(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
306
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
307
        branches_updated, branches_lost = updater.update()
308
        self._branch_count = len(branches_updated)
309
310
        # Tell the user about branches that were not created
311
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
312
            if not self.repo.is_shared():
313
                self.warning("Cannot import multiple branches into "
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
314
                    "a standalone branch")
0.64.37 by Ian Clatworthy
create branches as required
315
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
316
            for lost_info in branches_lost:
317
                head_revision = lost_info[1]
318
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
319
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
320
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
321
        # Update the working trees as requested
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
322
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
323
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
324
        if self._branch_count == 0:
325
            self.note("no branches to update")
326
            self.note("no working trees to update")
327
            remind_about_update = False
328
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
329
            trees = self._get_working_trees(branches_updated)
330
            if trees:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
331
                self._update_working_trees(trees)
0.64.34 by Ian Clatworthy
report lost branches
332
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
333
            else:
334
                self.warning("No working trees available to update")
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
335
        else:
336
            # Update just the trunk. (This is always the first branch
337
            # returned by the branch updater.)
338
            trunk_branch = branches_updated[0]
339
            trees = self._get_working_trees([trunk_branch])
340
            if trees:
341
                self._update_working_trees(trees)
342
                remind_about_update = self._branch_count > 1
0.64.51 by Ian Clatworthy
disable autopacking
343
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
344
        # Dump the cache stats now because we clear it before the final pack
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
345
        if self.verbose:
346
            self.cache_mgr.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
347
        if self._original_max_pack_count:
348
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
349
            # checkpoint instead. We now pack the repository to optimise
350
            # how data is stored.
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
351
            self.cache_mgr.clear_all()
0.64.162 by Ian Clatworthy
always repack the repository on completion
352
            self._pack_repository()
353
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
354
        # Finish up by dumping stats & telling the user what to do next.
355
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
356
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
357
            # This message is explicitly not timestamped.
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
358
            note("To refresh the working tree for other branches, "
359
                "use 'bzr update' inside that branch.")
360
361
    def _update_working_trees(self, trees):
362
        if self.verbose:
363
            reporter = delta._ChangeReporter()
364
        else:
365
            reporter = None
366
        for wt in trees:
367
            self.note("Updating the working tree for %s ...", wt.basedir)
368
            wt.update(reporter)
369
            self._tree_count += 1
0.64.41 by Ian Clatworthy
update multiple working trees if requested
370
0.64.167 by Ian Clatworthy
incremental packing for chk formats
371
    def _pack_repository(self, final=True):
0.64.162 by Ian Clatworthy
always repack the repository on completion
372
        # Before packing, free whatever memory we can and ensure
373
        # that groupcompress is configured to optimise disk space
374
        import gc
0.64.167 by Ian Clatworthy
incremental packing for chk formats
375
        if final:
376
            try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
377
                from bzrlib import groupcompress
0.64.167 by Ian Clatworthy
incremental packing for chk formats
378
            except ImportError:
379
                pass
380
            else:
381
                groupcompress._FAST = False
0.64.162 by Ian Clatworthy
always repack the repository on completion
382
        gc.collect()
383
        self.note("Packing repository ...")
384
        self.repo.pack()
385
386
        # To be conservative, packing puts the old packs and
387
        # indices in obsolete_packs. We err on the side of
388
        # optimism and clear out that directory to save space.
389
        self.note("Removing obsolete packs ...")
390
        # TODO: Use a public API for this once one exists
391
        repo_transport = self.repo._pack_collection.transport
392
        repo_transport.clone('obsolete_packs').delete_multi(
393
            repo_transport.list_dir('obsolete_packs'))
394
0.64.167 by Ian Clatworthy
incremental packing for chk formats
395
        # If we're not done, free whatever memory we can
396
        if not final:
397
            gc.collect()
398
0.64.41 by Ian Clatworthy
update multiple working trees if requested
399
    def _get_working_trees(self, branches):
400
        """Get the working trees for branches in the repository."""
401
        result = []
402
        wt_expected = self.repo.make_working_trees()
403
        for br in branches:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
404
            if br is None:
405
                continue
406
            elif br == self.branch:
407
                if self.working_tree:
408
                    result.append(self.working_tree)
0.64.41 by Ian Clatworthy
update multiple working trees if requested
409
            elif wt_expected:
410
                try:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
411
                    result.append(br.bzrdir.open_workingtree())
0.64.41 by Ian Clatworthy
update multiple working trees if requested
412
                except errors.NoWorkingTree:
413
                    self.warning("No working tree for branch %s", br)
414
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
415
416
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
417
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
418
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
419
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
420
        wtc = self._tree_count
421
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
422
            rc, helpers.single_plural(rc, "revision", "revisions"),
423
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
424
            wtc, helpers.single_plural(wtc, "tree", "trees"),
425
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
426
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
427
    def _init_id_map(self):
428
        """Load the id-map and check it matches the repository.
429
        
430
        :return: the number of entries in the map
431
        """
432
        # Currently, we just check the size. In the future, we might
433
        # decide to be more paranoid and check that the revision-ids
434
        # are identical as well.
435
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
436
            self.id_map_path)
437
        existing_count = len(self.repo.all_revision_ids())
0.64.106 by Ian Clatworthy
let the id-map file have more revisions than the repository
438
        if existing_count < known:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
439
            raise plugin_errors.BadRepositorySize(known, existing_count)
440
        return known
441
442
    def _save_id_map(self):
443
        """Save the id-map."""
444
        # Save the whole lot every time. If this proves a problem, we can
445
        # change to 'append just the new ones' at a later time.
446
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
447
0.64.5 by Ian Clatworthy
first cut at generic processing method
448
    def blob_handler(self, cmd):
449
        """Process a BlobCommand."""
450
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
451
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
452
        else:
453
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
454
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
455
0.64.170 by Ian Clatworthy
add autopack option to fast-import
456
    def checkpoint_handler(self, cmd):
0.64.5 by Ian Clatworthy
first cut at generic processing method
457
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
458
        # Commit the current write group and start a new one
459
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
460
        self._save_id_map()
0.64.220 by Ian Clatworthy
Only count implicit checkpoints when deciding when to auto-pack
461
        # track the number of automatic checkpoints done
462
        if cmd is None:
463
            self.checkpoint_count += 1
464
            if self.checkpoint_count % self.autopack_every == 0:
465
                self._pack_repository(final=False)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
466
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
467
468
    def commit_handler(self, cmd):
469
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
470
        if self.skip_total and self._revision_count < self.skip_total:
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
471
            self.cache_mgr.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
472
            # Check that we really do know about this commit-id
473
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
474
                raise plugin_errors.BadRestart(cmd.id)
475
            # Consume the file commands and free any non-sticky blobs
476
            for fc in cmd.file_iter():
477
                pass
478
            self.cache_mgr._blobs = {}
479
            self._revision_count += 1
0.111.2 by Max Bowsher
Also catch tagging via commit when resuming a crashed import.
480
            if cmd.ref.startswith('refs/tags/'):
481
                tag_name = cmd.ref[len('refs/tags/'):]
482
                self._set_tag(tag_name, cmd.id)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
483
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
484
        if self.first_incremental_commit:
485
            self.first_incremental_commit = None
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
486
            parents = self.cache_mgr.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
487
488
        # 'Commit' the revision and report progress
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
489
        handler = self.commit_handler_factory(cmd, self.cache_mgr,
0.64.196 by Ian Clatworthy
get tests passing again
490
            self.rev_store, verbose=self.verbose,
491
            prune_empty_dirs=self.prune_empty_dirs)
0.64.180 by Ian Clatworthy
report triggering commit when exception occurs
492
        try:
493
            handler.process()
494
        except:
495
            print "ABORT: exception occurred processing commit %s" % (cmd.id)
496
            raise
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
497
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
498
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
499
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
500
0.111.1 by Max Bowsher
Set a tag when touching a refs/tags/ ref with a commit command.
501
        if cmd.ref.startswith('refs/tags/'):
502
            tag_name = cmd.ref[len('refs/tags/'):]
503
            self._set_tag(tag_name, cmd.id)
504
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
505
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
506
        if (self.max_commits is not None and
507
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
508
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
509
            self.finished = True
510
        elif self._revision_count % self.checkpoint_every == 0:
511
            self.note("%d commits - automatic checkpoint triggered",
512
                self._revision_count)
0.64.170 by Ian Clatworthy
add autopack option to fast-import
513
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
514
0.64.25 by Ian Clatworthy
slightly better progress reporting
515
    def report_progress(self, details=''):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
516
        if self._revision_count % self.progress_every == 0:
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
517
            if self.total_commits is not None:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
518
                counts = "%d/%d" % (self._revision_count, self.total_commits)
519
            else:
520
                counts = "%d" % (self._revision_count,)
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
521
            minutes = (time.time() - self._start_time) / 60
522
            revisions_added = self._revision_count - self.skip_total
523
            rate = revisions_added * 1.0 / minutes
524
            if rate > 10:
525
                rate_str = "at %.0f/minute " % rate
526
            else:
527
                rate_str = "at %.1f/minute " % rate
0.64.150 by Ian Clatworthy
show commit rate rather than meaningless ETA in verbose mode
528
            self.note("%s commits processed %s%s" % (counts, rate_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
529
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
530
    def progress_handler(self, cmd):
531
        """Process a ProgressCommand."""
0.64.271 by Ian Clatworthy
Ignore progress messages unless in verbose mode
532
        # Most progress messages embedded in streams are annoying.
533
        # Ignore them unless in verbose mode.
534
        if self.verbose:
535
            self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
536
537
    def reset_handler(self, cmd):
538
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
539
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
540
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
541
            if cmd.from_ is not None:
542
                self._set_tag(tag_name, cmd.from_)
543
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
544
                self.warning("ignoring reset refs/tags/%s - no from clause"
545
                    % tag_name)
0.64.109 by Ian Clatworthy
initial cut at reset support
546
            return
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
547
548
        if cmd.from_ is not None:
0.64.109 by Ian Clatworthy
initial cut at reset support
549
            self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.5 by Ian Clatworthy
first cut at generic processing method
550
551
    def tag_handler(self, cmd):
552
        """Process a TagCommand."""
0.64.107 by Ian Clatworthy
warn on tags with a missing from clause
553
        if cmd.from_ is not None:
554
            self._set_tag(cmd.id, cmd.from_)
555
        else:
556
            self.warning("ignoring tag %s - no from clause" % cmd.id)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
557
558
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
559
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
560
        bzr_tag_name = name.decode('utf-8', 'replace')
561
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
562
        self.tags[bzr_tag_name] = bzr_rev_id
0.102.9 by Ian Clatworthy
parsing of multiple authors and commit properties
563
564
    def feature_handler(self, cmd):
565
        """Process a FeatureCommand."""
0.102.11 by Ian Clatworthy
Validate features are known before importing
566
        feature = cmd.feature_name
567
        if feature not in commands.FEATURE_NAMES:
568
            raise plugin_errors.UnknownFeature(feature)