/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
22
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
26
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    )
0.64.51 by Ian Clatworthy
disable autopacking
28
from bzrlib.repofmt import pack_repo
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
29
from bzrlib.trace import note
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
30
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
31
from bzrlib.plugins.fastimport import (
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
32
    branch_updater,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
33
    bzr_commit_handler,
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
34
    cache_manager,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
35
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
36
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
37
    idmapfile,
0.78.5 by Ian Clatworthy
move import/export of marks into a module
38
    marks_file,
0.64.5 by Ian Clatworthy
first cut at generic processing method
39
    processor,
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
40
    revision_store,
0.64.5 by Ian Clatworthy
first cut at generic processing method
41
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
42
43
0.64.41 by Ian Clatworthy
update multiple working trees if requested
44
# How many commits before automatically reporting progress
45
_DEFAULT_AUTO_PROGRESS = 1000
46
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
47
# How many commits before automatically checkpointing
48
_DEFAULT_AUTO_CHECKPOINT = 10000
49
0.64.170 by Ian Clatworthy
add autopack option to fast-import
50
# How many checkpoints before automatically packing
51
_DEFAULT_AUTO_PACK = 4
52
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
53
# How many inventories to cache
54
_DEFAULT_INV_CACHE_SIZE = 10
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
55
_DEFAULT_CHK_INV_CACHE_SIZE = 100
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
56
0.64.41 by Ian Clatworthy
update multiple working trees if requested
57
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
58
class GenericProcessor(processor.ImportProcessor):
59
    """An import processor that handles basic imports.
60
61
    Current features supported:
62
0.64.16 by Ian Clatworthy
safe processing tweaks
63
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
64
    * files and symlinks commits are supported
65
    * checkpoints automatically happen at a configurable frequency
66
      over and above the stream requested checkpoints
67
    * timestamped progress reporting, both automatic and stream requested
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
68
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
69
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
70
    At checkpoints and on completion, the commit-id -> revision-id map is
71
    saved to a file called 'fastimport-id-map'. If the import crashes
72
    or is interrupted, it can be started again and this file will be
73
    used to skip over already loaded revisions. The format of each line
74
    is "commit-id revision-id" so commit-ids cannot include spaces.
75
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
76
    Here are the supported parameters:
77
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
78
    * info - name of a hints file holding the analysis generated
79
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
80
      importing large repositories, this parameter is needed so
81
      that the importer knows what blobs to intelligently cache.
82
0.64.41 by Ian Clatworthy
update multiple working trees if requested
83
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
84
      By default, the importer updates the repository
85
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
86
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
87
0.64.170 by Ian Clatworthy
add autopack option to fast-import
88
    * count - only import this many commits then exit. If not set
89
      or negative, all commits are imported.
90
    
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
91
    * checkpoint - automatically checkpoint every n commits over and
92
      above any checkpoints contained in the import stream.
93
      The default is 10000.
94
0.64.170 by Ian Clatworthy
add autopack option to fast-import
95
    * autopack - pack every n checkpoints. The default is 4.
96
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
97
    * inv-cache - number of inventories to cache.
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
98
      If not set, the default is 100 for CHK formats and 10 otherwise.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
99
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
100
    * mode - import algorithm to use: default, experimental or classic.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
101
102
    * import-marks - name of file to read to load mark information from
103
104
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
105
    """
106
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
107
    known_params = [
108
        'info',
109
        'trees',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
110
        'count',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
111
        'checkpoint',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
112
        'autopack',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
113
        'inv-cache',
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
114
        'mode',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
115
        'import-marks',
116
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
117
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
118
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
119
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
120
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
121
        self._load_info_and_params()
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
122
        self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
123
            self.inventory_cache_size)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
124
        
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
125
        if self.params.get("import-marks") is not None:
0.79.2 by Ian Clatworthy
extend & use marks_file API
126
            mark_info = marks_file.import_marks(self.params.get("import-marks"))
127
            if mark_info is not None:
128
                self.cache_mgr.revision_ids = mark_info[0]
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
129
            self.skip_total = False
130
            self.first_incremental_commit = True
131
        else:
132
            self.first_incremental_commit = False
133
            self.skip_total = self._init_id_map()
134
            if self.skip_total:
135
                self.note("Found %d commits already loaded - "
136
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
137
        self._revision_count = 0
138
139
        # mapping of tag name to revision_id
140
        self.tags = {}
141
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
142
        # Create the revision store to use for committing, if any
143
        self.rev_store = self._revision_store_factory()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
144
0.64.51 by Ian Clatworthy
disable autopacking
145
        # Disable autopacking if the repo format supports it.
146
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
147
        if isinstance(self.repo, pack_repo.KnitPackRepository):
148
            self._original_max_pack_count = \
149
                self.repo._pack_collection._max_pack_count
150
            def _max_pack_count_for_import(total_revisions):
151
                return total_revisions + 1
152
            self.repo._pack_collection._max_pack_count = \
153
                _max_pack_count_for_import
154
        else:
155
            self._original_max_pack_count = None
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
156
 
157
        # Make groupcompress use the fast algorithm during importing.
158
        # We want to repack at the end anyhow when more information
159
        # is available to do a better job of saving space.
160
        try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
161
            from bzrlib import groupcompress
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
162
            groupcompress._FAST = True
163
        except ImportError:
164
            pass
165
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
166
        # Create a write group. This is committed at the end of the import.
167
        # Checkpointing closes the current one and starts a new one.
168
        self.repo.start_write_group()
169
170
    def _load_info_and_params(self):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
171
        self._mode = bool(self.params.get('mode', 'default'))
172
        self._experimental = self._mode == 'experimental'
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
173
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
174
        # This is currently hard-coded but might be configurable via
175
        # parameters one day if that's needed
176
        repo_transport = self.repo.control_files._transport
177
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
178
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
179
        # Load the info file, if any
180
        info_path = self.params.get('info')
181
        if info_path is not None:
182
            self.info = configobj.ConfigObj(info_path)
183
        else:
184
            self.info = None
185
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
186
        # Decide which CommitHandler to use
0.64.167 by Ian Clatworthy
incremental packing for chk formats
187
        self.supports_chk = getattr(self.repo._format, 'supports_chks', False)
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
188
        if self.supports_chk and self._mode == 'classic':
189
            note("Cannot use classic algorithm on CHK repositories"
190
                 " - using default one instead")
191
            self._mode = 'default'
192
        if self._mode == 'classic':
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
193
            self.commit_handler_factory = \
194
                bzr_commit_handler.InventoryCommitHandler
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
195
        else:
196
            self.commit_handler_factory = \
197
                bzr_commit_handler.InventoryDeltaCommitHandler
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
198
0.64.41 by Ian Clatworthy
update multiple working trees if requested
199
        # Decide how often to automatically report progress
200
        # (not a parameter yet)
201
        self.progress_every = _DEFAULT_AUTO_PROGRESS
202
        if self.verbose:
203
            self.progress_every = self.progress_every / 10
204
0.64.170 by Ian Clatworthy
add autopack option to fast-import
205
        # Decide how often (# of commits) to automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
206
        self.checkpoint_every = int(self.params.get('checkpoint',
207
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
208
0.64.170 by Ian Clatworthy
add autopack option to fast-import
209
        # Decide how often (# of checkpoints) to automatically pack
210
        self.checkpoint_count = 0
211
        self.autopack_every = int(self.params.get('autopack',
212
            _DEFAULT_AUTO_PACK))
213
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
214
        # Decide how big to make the inventory cache
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
215
        cache_size = int(self.params.get('inv-cache', -1))
216
        if cache_size == -1:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
217
            if self.supports_chk:
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
218
                cache_size = _DEFAULT_CHK_INV_CACHE_SIZE
219
            else:
220
                cache_size = _DEFAULT_INV_CACHE_SIZE
221
        self.inventory_cache_size = cache_size
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
222
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
223
        # Find the maximum number of commits to import (None means all)
224
        # and prepare progress reporting. Just in case the info file
225
        # has an outdated count of commits, we store the max counts
226
        # at which we need to terminate separately to the total used
227
        # for progress tracking.
228
        try:
229
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
230
            if self.max_commits < 0:
231
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
232
        except KeyError:
233
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
234
        if self.info is not None:
235
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
236
            if (self.max_commits is not None and
237
                self.total_commits > self.max_commits):
238
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
239
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
240
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
241
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
242
    def _revision_store_factory(self):
243
        """Make a RevisionStore based on what the repository supports."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
244
        new_repo_api = hasattr(self.repo, 'revisions')
245
        if new_repo_api:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
246
            return revision_store.RevisionStore2(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
247
        elif not self._experimental:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
248
            return revision_store.RevisionStore1(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
249
        else:
250
            def fulltext_when(count):
251
                total = self.total_commits
252
                if total is not None and count == total:
253
                    fulltext = True
254
                else:
255
                    # Create an inventory fulltext every 200 revisions
256
                    fulltext = count % 200 == 0
257
                if fulltext:
258
                    self.note("%d commits - storing inventory as full-text",
259
                        count)
260
                return fulltext
261
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
262
            return revision_store.ImportRevisionStore1(
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
263
                self.repo, self.inventory_cache_size,
264
                fulltext_when=fulltext_when)
265
0.64.27 by Ian Clatworthy
1st cut at performance tuning
266
    def _process(self, command_iter):
267
        # if anything goes wrong, abort the write group if any
268
        try:
269
            processor.ImportProcessor._process(self, command_iter)
270
        except:
271
            if self.repo is not None and self.repo.is_in_write_group():
272
                self.repo.abort_write_group()
273
            raise
274
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
275
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
276
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
277
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
278
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
279
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
280
        if self.params.get("export-marks") is not None:
0.78.5 by Ian Clatworthy
move import/export of marks into a module
281
            marks_file.export_marks(self.params.get("export-marks"),
282
                self.cache_mgr.revision_ids)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
283
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
284
        # Update the branches
285
        self.note("Updating branch information ...")
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
286
        updater = branch_updater.BranchUpdater(self.repo, self.branch,
287
            self.cache_mgr, helpers.invert_dictset(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
288
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
289
        branches_updated, branches_lost = updater.update()
290
        self._branch_count = len(branches_updated)
291
292
        # Tell the user about branches that were not created
293
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
294
            if not self.repo.is_shared():
295
                self.warning("Cannot import multiple branches into "
296
                    "an unshared repository")
297
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
298
            for lost_info in branches_lost:
299
                head_revision = lost_info[1]
300
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
301
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
302
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
303
        # Update the working trees as requested
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
304
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
305
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
306
        if self._branch_count == 0:
307
            self.note("no branches to update")
308
            self.note("no working trees to update")
309
            remind_about_update = False
310
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
311
            trees = self._get_working_trees(branches_updated)
312
            if trees:
313
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
314
                if self.verbose:
315
                    report = delta._ChangeReporter()
316
                else:
317
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
318
                for wt in trees:
319
                    wt.update(reporter)
320
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
321
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
322
            else:
323
                self.warning("No working trees available to update")
0.64.51 by Ian Clatworthy
disable autopacking
324
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
325
        # Dump the cache stats now because we clear it before the final pack
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
326
        if self.verbose:
327
            self.cache_mgr.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
328
        if self._original_max_pack_count:
329
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
330
            # checkpoint instead. We now pack the repository to optimise
331
            # how data is stored.
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
332
            self.cache_mgr.clear_all()
0.64.162 by Ian Clatworthy
always repack the repository on completion
333
            self._pack_repository()
334
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
335
        # Finish up by dumping stats & telling the user what to do next.
336
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
337
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
338
            # This message is explicitly not timestamped.
0.64.51 by Ian Clatworthy
disable autopacking
339
            note("To refresh the working tree for a branch, "
340
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
341
0.64.167 by Ian Clatworthy
incremental packing for chk formats
342
    def _pack_repository(self, final=True):
0.64.162 by Ian Clatworthy
always repack the repository on completion
343
        # Before packing, free whatever memory we can and ensure
344
        # that groupcompress is configured to optimise disk space
345
        import gc
0.64.167 by Ian Clatworthy
incremental packing for chk formats
346
        if final:
347
            try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
348
                from bzrlib import groupcompress
0.64.167 by Ian Clatworthy
incremental packing for chk formats
349
            except ImportError:
350
                pass
351
            else:
352
                groupcompress._FAST = False
0.64.162 by Ian Clatworthy
always repack the repository on completion
353
        gc.collect()
354
        self.note("Packing repository ...")
355
        self.repo.pack()
356
357
        # To be conservative, packing puts the old packs and
358
        # indices in obsolete_packs. We err on the side of
359
        # optimism and clear out that directory to save space.
360
        self.note("Removing obsolete packs ...")
361
        # TODO: Use a public API for this once one exists
362
        repo_transport = self.repo._pack_collection.transport
363
        repo_transport.clone('obsolete_packs').delete_multi(
364
            repo_transport.list_dir('obsolete_packs'))
365
0.64.167 by Ian Clatworthy
incremental packing for chk formats
366
        # If we're not done, free whatever memory we can
367
        if not final:
368
            gc.collect()
369
0.64.41 by Ian Clatworthy
update multiple working trees if requested
370
    def _get_working_trees(self, branches):
371
        """Get the working trees for branches in the repository."""
372
        result = []
373
        wt_expected = self.repo.make_working_trees()
374
        for br in branches:
375
            if br == self.branch and br is not None:
376
                wt = self.working_tree
377
            elif wt_expected:
378
                try:
379
                    wt = br.bzrdir.open_workingtree()
380
                except errors.NoWorkingTree:
381
                    self.warning("No working tree for branch %s", br)
382
                    continue
383
            else:
384
                continue
385
            result.append(wt)
386
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
387
388
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
389
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
390
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
391
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
392
        wtc = self._tree_count
393
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
394
            rc, helpers.single_plural(rc, "revision", "revisions"),
395
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
396
            wtc, helpers.single_plural(wtc, "tree", "trees"),
397
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
398
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
399
    def _init_id_map(self):
400
        """Load the id-map and check it matches the repository.
401
        
402
        :return: the number of entries in the map
403
        """
404
        # Currently, we just check the size. In the future, we might
405
        # decide to be more paranoid and check that the revision-ids
406
        # are identical as well.
407
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
408
            self.id_map_path)
409
        existing_count = len(self.repo.all_revision_ids())
0.64.106 by Ian Clatworthy
let the id-map file have more revisions than the repository
410
        if existing_count < known:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
411
            raise plugin_errors.BadRepositorySize(known, existing_count)
412
        return known
413
414
    def _save_id_map(self):
415
        """Save the id-map."""
416
        # Save the whole lot every time. If this proves a problem, we can
417
        # change to 'append just the new ones' at a later time.
418
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
419
0.64.5 by Ian Clatworthy
first cut at generic processing method
420
    def blob_handler(self, cmd):
421
        """Process a BlobCommand."""
422
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
423
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
424
        else:
425
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
426
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
427
0.64.170 by Ian Clatworthy
add autopack option to fast-import
428
    def checkpoint_handler(self, cmd):
0.64.5 by Ian Clatworthy
first cut at generic processing method
429
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
430
        # Commit the current write group and start a new one
431
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
432
        self._save_id_map()
0.64.170 by Ian Clatworthy
add autopack option to fast-import
433
        self.checkpoint_count += 1
434
        if self.checkpoint_count % self.autopack_every == 0:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
435
            self._pack_repository(final=False)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
436
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
437
438
    def commit_handler(self, cmd):
439
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
440
        if self.skip_total and self._revision_count < self.skip_total:
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
441
            self.cache_mgr.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
442
            # Check that we really do know about this commit-id
443
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
444
                raise plugin_errors.BadRestart(cmd.id)
445
            # Consume the file commands and free any non-sticky blobs
446
            for fc in cmd.file_iter():
447
                pass
448
            self.cache_mgr._blobs = {}
449
            self._revision_count += 1
450
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
451
        if self.first_incremental_commit:
452
            self.first_incremental_commit = None
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
453
            parents = self.cache_mgr.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
454
455
        # 'Commit' the revision and report progress
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
456
        handler = self.commit_handler_factory(cmd, self.cache_mgr,
457
            self.rev_store, verbose=self.verbose)
0.64.180 by Ian Clatworthy
report triggering commit when exception occurs
458
        try:
459
            handler.process()
460
        except:
461
            print "ABORT: exception occurred processing commit %s" % (cmd.id)
462
            raise
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
463
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
464
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
465
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
466
467
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
468
        if (self.max_commits is not None and
469
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
470
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
471
            self.finished = True
472
        elif self._revision_count % self.checkpoint_every == 0:
473
            self.note("%d commits - automatic checkpoint triggered",
474
                self._revision_count)
0.64.170 by Ian Clatworthy
add autopack option to fast-import
475
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
476
0.64.25 by Ian Clatworthy
slightly better progress reporting
477
    def report_progress(self, details=''):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
478
        if self._revision_count % self.progress_every == 0:
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
479
            if self.total_commits is not None:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
480
                counts = "%d/%d" % (self._revision_count, self.total_commits)
481
            else:
482
                counts = "%d" % (self._revision_count,)
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
483
            minutes = (time.time() - self._start_time) / 60
484
            revisions_added = self._revision_count - self.skip_total
485
            rate = revisions_added * 1.0 / minutes
486
            if rate > 10:
487
                rate_str = "at %.0f/minute " % rate
488
            else:
489
                rate_str = "at %.1f/minute " % rate
0.64.150 by Ian Clatworthy
show commit rate rather than meaningless ETA in verbose mode
490
            self.note("%s commits processed %s%s" % (counts, rate_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
491
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
492
    def progress_handler(self, cmd):
493
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
494
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
495
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
496
497
    def reset_handler(self, cmd):
498
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
499
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
500
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
501
            if cmd.from_ is not None:
502
                self._set_tag(tag_name, cmd.from_)
503
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
504
                self.warning("ignoring reset refs/tags/%s - no from clause"
505
                    % tag_name)
0.64.109 by Ian Clatworthy
initial cut at reset support
506
            return
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
507
508
        if cmd.from_ is not None:
0.64.109 by Ian Clatworthy
initial cut at reset support
509
            self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.5 by Ian Clatworthy
first cut at generic processing method
510
511
    def tag_handler(self, cmd):
512
        """Process a TagCommand."""
0.64.107 by Ian Clatworthy
warn on tags with a missing from clause
513
        if cmd.from_ is not None:
514
            self._set_tag(cmd.id, cmd.from_)
515
        else:
516
            self.warning("ignoring tag %s - no from clause" % cmd.id)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
517
518
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
519
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
520
        bzr_tag_name = name.decode('utf-8', 'replace')
521
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
522
        self.tags[bzr_tag_name] = bzr_rev_id