/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
22
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
26
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    )
0.64.51 by Ian Clatworthy
disable autopacking
28
from bzrlib.repofmt import pack_repo
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
29
from bzrlib.trace import note
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
30
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
31
from bzrlib.plugins.fastimport import (
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
32
    branch_updater,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
33
    bzr_commit_handler,
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
34
    cache_manager,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
35
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
36
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
37
    idmapfile,
0.78.5 by Ian Clatworthy
move import/export of marks into a module
38
    marks_file,
0.64.5 by Ian Clatworthy
first cut at generic processing method
39
    processor,
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
40
    revision_store,
0.64.5 by Ian Clatworthy
first cut at generic processing method
41
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
42
43
0.64.41 by Ian Clatworthy
update multiple working trees if requested
44
# How many commits before automatically reporting progress
45
_DEFAULT_AUTO_PROGRESS = 1000
46
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
47
# How many commits before automatically checkpointing
48
_DEFAULT_AUTO_CHECKPOINT = 10000
49
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
50
# How many inventories to cache
51
_DEFAULT_INV_CACHE_SIZE = 10
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
52
_DEFAULT_CHK_INV_CACHE_SIZE = 100
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
53
0.64.41 by Ian Clatworthy
update multiple working trees if requested
54
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
55
class GenericProcessor(processor.ImportProcessor):
56
    """An import processor that handles basic imports.
57
58
    Current features supported:
59
0.64.16 by Ian Clatworthy
safe processing tweaks
60
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
61
    * files and symlinks commits are supported
62
    * checkpoints automatically happen at a configurable frequency
63
      over and above the stream requested checkpoints
64
    * timestamped progress reporting, both automatic and stream requested
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
65
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
66
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
67
    At checkpoints and on completion, the commit-id -> revision-id map is
68
    saved to a file called 'fastimport-id-map'. If the import crashes
69
    or is interrupted, it can be started again and this file will be
70
    used to skip over already loaded revisions. The format of each line
71
    is "commit-id revision-id" so commit-ids cannot include spaces.
72
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
73
    Here are the supported parameters:
74
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
75
    * info - name of a hints file holding the analysis generated
76
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
77
      importing large repositories, this parameter is needed so
78
      that the importer knows what blobs to intelligently cache.
79
0.64.41 by Ian Clatworthy
update multiple working trees if requested
80
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
81
      By default, the importer updates the repository
82
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
83
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
84
85
    * checkpoint - automatically checkpoint every n commits over and
86
      above any checkpoints contained in the import stream.
87
      The default is 10000.
88
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
89
    * count - only import this many commits then exit. If not set
90
      or negative, all commits are imported.
91
    
92
    * inv-cache - number of inventories to cache.
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
93
      If not set, the default is 100 for CHK formats and 10 otherwise.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
94
95
    * experimental - enable experimental mode, i.e. use features
96
      not yet fully tested.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
97
98
    * import-marks - name of file to read to load mark information from
99
100
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
101
    """
102
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
103
    known_params = [
104
        'info',
105
        'trees',
106
        'checkpoint',
107
        'count',
108
        'inv-cache',
109
        'experimental',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
110
        'import-marks',
111
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
112
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
113
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
114
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
115
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
116
        self._load_info_and_params()
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
117
        self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
118
            self.inventory_cache_size)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
119
        
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
120
        if self.params.get("import-marks") is not None:
0.79.2 by Ian Clatworthy
extend & use marks_file API
121
            mark_info = marks_file.import_marks(self.params.get("import-marks"))
122
            if mark_info is not None:
123
                self.cache_mgr.revision_ids = mark_info[0]
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
124
            self.skip_total = False
125
            self.first_incremental_commit = True
126
        else:
127
            self.first_incremental_commit = False
128
            self.skip_total = self._init_id_map()
129
            if self.skip_total:
130
                self.note("Found %d commits already loaded - "
131
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
132
        self._revision_count = 0
133
134
        # mapping of tag name to revision_id
135
        self.tags = {}
136
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
137
        # Create the revision store to use for committing, if any
138
        self.rev_store = self._revision_store_factory()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
139
0.64.51 by Ian Clatworthy
disable autopacking
140
        # Disable autopacking if the repo format supports it.
141
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
142
        if isinstance(self.repo, pack_repo.KnitPackRepository):
143
            self._original_max_pack_count = \
144
                self.repo._pack_collection._max_pack_count
145
            def _max_pack_count_for_import(total_revisions):
146
                return total_revisions + 1
147
            self.repo._pack_collection._max_pack_count = \
148
                _max_pack_count_for_import
149
        else:
150
            self._original_max_pack_count = None
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
151
 
152
        # Make groupcompress use the fast algorithm during importing.
153
        # We want to repack at the end anyhow when more information
154
        # is available to do a better job of saving space.
155
        try:
156
            from bzrlib.plugins.groupcompress import groupcompress
157
            groupcompress._FAST = True
158
        except ImportError:
159
            pass
160
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
161
        # Create a write group. This is committed at the end of the import.
162
        # Checkpointing closes the current one and starts a new one.
163
        self.repo.start_write_group()
164
165
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
166
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
167
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
168
        # This is currently hard-coded but might be configurable via
169
        # parameters one day if that's needed
170
        repo_transport = self.repo.control_files._transport
171
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
172
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
173
        # Load the info file, if any
174
        info_path = self.params.get('info')
175
        if info_path is not None:
176
            self.info = configobj.ConfigObj(info_path)
177
        else:
178
            self.info = None
179
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
180
        # Decide which CommitHandler to use
0.64.167 by Ian Clatworthy
incremental packing for chk formats
181
        self.supports_chk = getattr(self.repo._format, 'supports_chks', False)
182
        if self.supports_chk:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
183
            self.commit_handler_factory = \
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
184
                bzr_commit_handler.CHKInventoryCommitHandler
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
185
        else:
186
            self.commit_handler_factory = \
187
                bzr_commit_handler.InventoryCommitHandler
188
0.64.41 by Ian Clatworthy
update multiple working trees if requested
189
        # Decide how often to automatically report progress
190
        # (not a parameter yet)
191
        self.progress_every = _DEFAULT_AUTO_PROGRESS
192
        if self.verbose:
193
            self.progress_every = self.progress_every / 10
194
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
195
        # Decide how often to automatically checkpoint
196
        self.checkpoint_every = int(self.params.get('checkpoint',
197
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
198
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
199
        # Decide how big to make the inventory cache
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
200
        cache_size = int(self.params.get('inv-cache', -1))
201
        if cache_size == -1:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
202
            if self.supports_chk:
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
203
                cache_size = _DEFAULT_CHK_INV_CACHE_SIZE
204
            else:
205
                cache_size = _DEFAULT_INV_CACHE_SIZE
206
        self.inventory_cache_size = cache_size
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
207
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
208
        # Find the maximum number of commits to import (None means all)
209
        # and prepare progress reporting. Just in case the info file
210
        # has an outdated count of commits, we store the max counts
211
        # at which we need to terminate separately to the total used
212
        # for progress tracking.
213
        try:
214
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
215
            if self.max_commits < 0:
216
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
217
        except KeyError:
218
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
219
        if self.info is not None:
220
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
221
            if (self.max_commits is not None and
222
                self.total_commits > self.max_commits):
223
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
224
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
225
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
226
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
227
    def _revision_store_factory(self):
228
        """Make a RevisionStore based on what the repository supports."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
229
        new_repo_api = hasattr(self.repo, 'revisions')
230
        if new_repo_api:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
231
            return revision_store.RevisionStore2(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
232
        elif not self._experimental:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
233
            return revision_store.RevisionStore1(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
234
        else:
235
            def fulltext_when(count):
236
                total = self.total_commits
237
                if total is not None and count == total:
238
                    fulltext = True
239
                else:
240
                    # Create an inventory fulltext every 200 revisions
241
                    fulltext = count % 200 == 0
242
                if fulltext:
243
                    self.note("%d commits - storing inventory as full-text",
244
                        count)
245
                return fulltext
246
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
247
            return revision_store.ImportRevisionStore1(
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
248
                self.repo, self.inventory_cache_size,
249
                fulltext_when=fulltext_when)
250
0.64.27 by Ian Clatworthy
1st cut at performance tuning
251
    def _process(self, command_iter):
252
        # if anything goes wrong, abort the write group if any
253
        try:
254
            processor.ImportProcessor._process(self, command_iter)
255
        except:
256
            if self.repo is not None and self.repo.is_in_write_group():
257
                self.repo.abort_write_group()
258
            raise
259
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
260
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
261
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
262
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
263
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
264
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
265
        if self.params.get("export-marks") is not None:
0.78.5 by Ian Clatworthy
move import/export of marks into a module
266
            marks_file.export_marks(self.params.get("export-marks"),
267
                self.cache_mgr.revision_ids)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
268
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
269
        # Update the branches
270
        self.note("Updating branch information ...")
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
271
        updater = branch_updater.BranchUpdater(self.repo, self.branch,
272
            self.cache_mgr, helpers.invert_dictset(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
273
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
274
        branches_updated, branches_lost = updater.update()
275
        self._branch_count = len(branches_updated)
276
277
        # Tell the user about branches that were not created
278
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
279
            if not self.repo.is_shared():
280
                self.warning("Cannot import multiple branches into "
281
                    "an unshared repository")
282
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
283
            for lost_info in branches_lost:
284
                head_revision = lost_info[1]
285
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
286
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
287
288
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
289
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
290
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
291
        if self._branch_count == 0:
292
            self.note("no branches to update")
293
            self.note("no working trees to update")
294
            remind_about_update = False
295
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
296
            trees = self._get_working_trees(branches_updated)
297
            if trees:
298
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
299
                if self.verbose:
300
                    report = delta._ChangeReporter()
301
                else:
302
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
303
                for wt in trees:
304
                    wt.update(reporter)
305
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
306
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
307
            else:
308
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
309
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
310
311
        if self._original_max_pack_count:
312
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
313
            # checkpoint instead. We now pack the repository to optimise
314
            # how data is stored.
0.64.162 by Ian Clatworthy
always repack the repository on completion
315
            self._pack_repository()
316
317
        # Finish up by telling the user what to do next.
0.64.34 by Ian Clatworthy
report lost branches
318
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
319
            # This message is explicitly not timestamped.
0.64.51 by Ian Clatworthy
disable autopacking
320
            note("To refresh the working tree for a branch, "
321
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
322
0.64.167 by Ian Clatworthy
incremental packing for chk formats
323
    def _pack_repository(self, final=True):
0.64.162 by Ian Clatworthy
always repack the repository on completion
324
        # Before packing, free whatever memory we can and ensure
325
        # that groupcompress is configured to optimise disk space
326
        import gc
0.64.167 by Ian Clatworthy
incremental packing for chk formats
327
        if final:
328
            self.cache_mgr.clear_all()
329
            try:
330
                from bzrlib.plugins.groupcompress import groupcompress
331
            except ImportError:
332
                pass
333
            else:
334
                groupcompress._FAST = False
0.64.162 by Ian Clatworthy
always repack the repository on completion
335
        gc.collect()
336
        self.note("Packing repository ...")
337
        self.repo.pack()
338
339
        # To be conservative, packing puts the old packs and
340
        # indices in obsolete_packs. We err on the side of
341
        # optimism and clear out that directory to save space.
342
        self.note("Removing obsolete packs ...")
343
        # TODO: Use a public API for this once one exists
344
        repo_transport = self.repo._pack_collection.transport
345
        repo_transport.clone('obsolete_packs').delete_multi(
346
            repo_transport.list_dir('obsolete_packs'))
347
0.64.167 by Ian Clatworthy
incremental packing for chk formats
348
        # If we're not done, free whatever memory we can
349
        if not final:
350
            gc.collect()
351
0.64.41 by Ian Clatworthy
update multiple working trees if requested
352
    def _get_working_trees(self, branches):
353
        """Get the working trees for branches in the repository."""
354
        result = []
355
        wt_expected = self.repo.make_working_trees()
356
        for br in branches:
357
            if br == self.branch and br is not None:
358
                wt = self.working_tree
359
            elif wt_expected:
360
                try:
361
                    wt = br.bzrdir.open_workingtree()
362
                except errors.NoWorkingTree:
363
                    self.warning("No working tree for branch %s", br)
364
                    continue
365
            else:
366
                continue
367
            result.append(wt)
368
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
369
370
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
371
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
372
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
373
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
374
        wtc = self._tree_count
375
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
376
            rc, helpers.single_plural(rc, "revision", "revisions"),
377
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
378
            wtc, helpers.single_plural(wtc, "tree", "trees"),
379
            time_required)
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
380
        if self.verbose:
381
            self.cache_mgr.dump_stats()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
382
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
383
    def _init_id_map(self):
384
        """Load the id-map and check it matches the repository.
385
        
386
        :return: the number of entries in the map
387
        """
388
        # Currently, we just check the size. In the future, we might
389
        # decide to be more paranoid and check that the revision-ids
390
        # are identical as well.
391
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
392
            self.id_map_path)
393
        existing_count = len(self.repo.all_revision_ids())
0.64.106 by Ian Clatworthy
let the id-map file have more revisions than the repository
394
        if existing_count < known:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
395
            raise plugin_errors.BadRepositorySize(known, existing_count)
396
        return known
397
398
    def _save_id_map(self):
399
        """Save the id-map."""
400
        # Save the whole lot every time. If this proves a problem, we can
401
        # change to 'append just the new ones' at a later time.
402
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
403
0.64.5 by Ian Clatworthy
first cut at generic processing method
404
    def blob_handler(self, cmd):
405
        """Process a BlobCommand."""
406
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
407
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
408
        else:
409
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
410
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
411
0.64.167 by Ian Clatworthy
incremental packing for chk formats
412
    def checkpoint_handler(self, cmd, pack_repo=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
413
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
414
        # Commit the current write group and start a new one
415
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
416
        self._save_id_map()
0.64.167 by Ian Clatworthy
incremental packing for chk formats
417
        if pack_repo:
418
            self._pack_repository(final=False)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
419
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
420
421
    def commit_handler(self, cmd):
422
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
423
        if self.skip_total and self._revision_count < self.skip_total:
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
424
            self.cache_mgr.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
425
            # Check that we really do know about this commit-id
426
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
427
                raise plugin_errors.BadRestart(cmd.id)
428
            # Consume the file commands and free any non-sticky blobs
429
            for fc in cmd.file_iter():
430
                pass
431
            self.cache_mgr._blobs = {}
432
            self._revision_count += 1
433
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
434
        if self.first_incremental_commit:
435
            self.first_incremental_commit = None
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
436
            parents = self.cache_mgr.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
437
438
        # 'Commit' the revision and report progress
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
439
        handler = self.commit_handler_factory(cmd, self.cache_mgr,
440
            self.rev_store, verbose=self.verbose)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
441
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
442
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
443
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
444
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
445
446
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
447
        if (self.max_commits is not None and
448
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
449
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
450
            self.finished = True
451
        elif self._revision_count % self.checkpoint_every == 0:
452
            self.note("%d commits - automatic checkpoint triggered",
453
                self._revision_count)
0.64.167 by Ian Clatworthy
incremental packing for chk formats
454
            self.checkpoint_handler(None, pack_repo=self.supports_chk)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
455
0.64.25 by Ian Clatworthy
slightly better progress reporting
456
    def report_progress(self, details=''):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
457
        if self._revision_count % self.progress_every == 0:
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
458
            if self.total_commits is not None:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
459
                counts = "%d/%d" % (self._revision_count, self.total_commits)
460
            else:
461
                counts = "%d" % (self._revision_count,)
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
462
            minutes = (time.time() - self._start_time) / 60
463
            revisions_added = self._revision_count - self.skip_total
464
            rate = revisions_added * 1.0 / minutes
465
            if rate > 10:
466
                rate_str = "at %.0f/minute " % rate
467
            else:
468
                rate_str = "at %.1f/minute " % rate
0.64.150 by Ian Clatworthy
show commit rate rather than meaningless ETA in verbose mode
469
            self.note("%s commits processed %s%s" % (counts, rate_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
470
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
471
    def progress_handler(self, cmd):
472
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
473
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
474
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
475
476
    def reset_handler(self, cmd):
477
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
478
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
479
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
480
            if cmd.from_ is not None:
481
                self._set_tag(tag_name, cmd.from_)
482
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
483
                self.warning("ignoring reset refs/tags/%s - no from clause"
484
                    % tag_name)
0.64.109 by Ian Clatworthy
initial cut at reset support
485
            return
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
486
487
        if cmd.from_ is not None:
0.64.109 by Ian Clatworthy
initial cut at reset support
488
            self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.5 by Ian Clatworthy
first cut at generic processing method
489
490
    def tag_handler(self, cmd):
491
        """Process a TagCommand."""
0.64.107 by Ian Clatworthy
warn on tags with a missing from clause
492
        if cmd.from_ is not None:
493
            self._set_tag(cmd.id, cmd.from_)
494
        else:
495
            self.warning("ignoring tag %s - no from clause" % cmd.id)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
496
497
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
498
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
499
        bzr_tag_name = name.decode('utf-8', 'replace')
500
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
501
        self.tags[bzr_tag_name] = bzr_rev_id