/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
22
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
26
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    )
0.64.313 by Jelmer Vernooij
Support both locations for KnitPackRepository.
28
try:
29
    from bzrlib.repofmt.knitpack_repo import KnitPackRepository
30
except ImportError:
31
    from bzrlib.repofmt.pack_repo import KnitPackRepository
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
32
from bzrlib.trace import (
33
    mutter,
34
    note,
35
    warning,
36
    )
0.118.1 by Jelmer Vernooij
Cope with Debian's bzr using the system configobj, not shipping with its own.
37
try:
38
    import bzrlib.util.configobj.configobj as configobj
39
except ImportError:
40
    import configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
41
from bzrlib.plugins.fastimport import (
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
42
    branch_updater,
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
43
    cache_manager,
0.123.1 by Jelmer Vernooij
Move pure-fastimport code into its own directory, in preparation of splitting it into a separate package.
44
    marks_file,
45
    revision_store,
46
    )
0.123.2 by Jelmer Vernooij
Split out fastimport, import it from the system.
47
from fastimport import (
0.102.13 by Ian Clatworthy
Fix feature checking
48
    commands,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
49
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
50
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
51
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
52
    processor,
53
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
54
55
0.64.41 by Ian Clatworthy
update multiple working trees if requested
56
# How many commits before automatically reporting progress
57
_DEFAULT_AUTO_PROGRESS = 1000
58
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
59
# How many commits before automatically checkpointing
60
_DEFAULT_AUTO_CHECKPOINT = 10000
61
0.64.170 by Ian Clatworthy
add autopack option to fast-import
62
# How many checkpoints before automatically packing
63
_DEFAULT_AUTO_PACK = 4
64
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
65
# How many inventories to cache
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
66
_DEFAULT_INV_CACHE_SIZE = 1
67
_DEFAULT_CHK_INV_CACHE_SIZE = 1
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
68
0.64.41 by Ian Clatworthy
update multiple working trees if requested
69
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
70
class GenericProcessor(processor.ImportProcessor):
71
    """An import processor that handles basic imports.
72
73
    Current features supported:
74
0.64.16 by Ian Clatworthy
safe processing tweaks
75
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
76
    * files and symlinks commits are supported
77
    * checkpoints automatically happen at a configurable frequency
78
      over and above the stream requested checkpoints
79
    * timestamped progress reporting, both automatic and stream requested
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
80
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
81
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
82
    At checkpoints and on completion, the commit-id -> revision-id map is
83
    saved to a file called 'fastimport-id-map'. If the import crashes
84
    or is interrupted, it can be started again and this file will be
85
    used to skip over already loaded revisions. The format of each line
86
    is "commit-id revision-id" so commit-ids cannot include spaces.
87
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
88
    Here are the supported parameters:
89
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
90
    * info - name of a hints file holding the analysis generated
91
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
92
      importing large repositories, this parameter is needed so
93
      that the importer knows what blobs to intelligently cache.
94
0.64.41 by Ian Clatworthy
update multiple working trees if requested
95
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
96
      By default, the importer updates the repository
97
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
98
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
99
0.64.170 by Ian Clatworthy
add autopack option to fast-import
100
    * count - only import this many commits then exit. If not set
101
      or negative, all commits are imported.
102
    
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
103
    * checkpoint - automatically checkpoint every n commits over and
104
      above any checkpoints contained in the import stream.
105
      The default is 10000.
106
0.64.170 by Ian Clatworthy
add autopack option to fast-import
107
    * autopack - pack every n checkpoints. The default is 4.
108
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
109
    * inv-cache - number of inventories to cache.
0.64.254 by Ian Clatworthy
Change the default inventory cache size to 1. For large projects, this reduces memory overhead and also speeds up conversion.
110
      If not set, the default is 1.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
111
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
112
    * mode - import algorithm to use: default, experimental or classic.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
113
114
    * import-marks - name of file to read to load mark information from
115
116
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
117
    """
118
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
119
    known_params = [
120
        'info',
121
        'trees',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
122
        'count',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
123
        'checkpoint',
0.64.170 by Ian Clatworthy
add autopack option to fast-import
124
        'autopack',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
125
        'inv-cache',
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
126
        'mode',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
127
        'import-marks',
128
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
129
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
130
0.64.215 by Ian Clatworthy
tweak GenericProcessor __init__ method
131
    def __init__(self, bzrdir, params=None, verbose=False, outf=None,
0.64.196 by Ian Clatworthy
get tests passing again
132
            prune_empty_dirs=True):
0.123.4 by Jelmer Vernooij
Only require passing BzrDir to bzr-specific processors.
133
        processor.ImportProcessor.__init__(self, params, verbose)
0.64.196 by Ian Clatworthy
get tests passing again
134
        self.prune_empty_dirs = prune_empty_dirs
0.123.4 by Jelmer Vernooij
Only require passing BzrDir to bzr-specific processors.
135
        self.bzrdir = bzrdir
136
        try:
137
            # Might be inside a branch
138
            (self.working_tree, self.branch) = bzrdir._get_tree_branch()
139
            self.repo = self.branch.repository
140
        except errors.NotBranchError:
141
            # Must be inside a repository
142
            self.working_tree = None
143
            self.branch = None
144
            self.repo = bzrdir.open_repository()
0.64.196 by Ian Clatworthy
get tests passing again
145
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
146
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
147
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
148
        self._load_info_and_params()
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
149
        if self.total_commits:
150
            self.note("Starting import of %d commits ..." %
151
                (self.total_commits,))
152
        else:
153
            self.note("Starting import ...")
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
154
        self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
155
            self.inventory_cache_size)
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
156
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
157
        if self.params.get("import-marks") is not None:
0.79.2 by Ian Clatworthy
extend & use marks_file API
158
            mark_info = marks_file.import_marks(self.params.get("import-marks"))
159
            if mark_info is not None:
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
160
                self.cache_mgr.marks = mark_info
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
161
            self.skip_total = False
162
            self.first_incremental_commit = True
163
        else:
164
            self.first_incremental_commit = False
165
            self.skip_total = self._init_id_map()
166
            if self.skip_total:
167
                self.note("Found %d commits already loaded - "
168
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
169
        self._revision_count = 0
170
171
        # mapping of tag name to revision_id
172
        self.tags = {}
173
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
174
        # Create the revision store to use for committing, if any
175
        self.rev_store = self._revision_store_factory()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
176
0.64.51 by Ian Clatworthy
disable autopacking
177
        # Disable autopacking if the repo format supports it.
178
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
0.64.313 by Jelmer Vernooij
Support both locations for KnitPackRepository.
179
        if isinstance(self.repo, KnitPackRepository):
0.64.51 by Ian Clatworthy
disable autopacking
180
            self._original_max_pack_count = \
181
                self.repo._pack_collection._max_pack_count
182
            def _max_pack_count_for_import(total_revisions):
183
                return total_revisions + 1
184
            self.repo._pack_collection._max_pack_count = \
185
                _max_pack_count_for_import
186
        else:
187
            self._original_max_pack_count = None
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
188
 
189
        # Make groupcompress use the fast algorithm during importing.
190
        # We want to repack at the end anyhow when more information
191
        # is available to do a better job of saving space.
192
        try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
193
            from bzrlib import groupcompress
0.64.144 by Ian Clatworthy
make groupcompress _FAST during import
194
            groupcompress._FAST = True
195
        except ImportError:
196
            pass
197
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
198
        # Create a write group. This is committed at the end of the import.
199
        # Checkpointing closes the current one and starts a new one.
200
        self.repo.start_write_group()
201
202
    def _load_info_and_params(self):
0.123.15 by Jelmer Vernooij
Fix some more bugs found by ad-hoc testing.
203
        from bzrlib.plugins.fastimport import bzr_commit_handler
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
204
        self._mode = bool(self.params.get('mode', 'default'))
205
        self._experimental = self._mode == 'experimental'
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
206
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
207
        # This is currently hard-coded but might be configurable via
208
        # parameters one day if that's needed
209
        repo_transport = self.repo.control_files._transport
210
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
211
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
212
        # Load the info file, if any
213
        info_path = self.params.get('info')
214
        if info_path is not None:
215
            self.info = configobj.ConfigObj(info_path)
216
        else:
217
            self.info = None
218
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
219
        # Decide which CommitHandler to use
0.64.167 by Ian Clatworthy
incremental packing for chk formats
220
        self.supports_chk = getattr(self.repo._format, 'supports_chks', False)
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
221
        if self.supports_chk and self._mode == 'classic':
222
            note("Cannot use classic algorithm on CHK repositories"
223
                 " - using default one instead")
224
            self._mode = 'default'
225
        if self._mode == 'classic':
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
226
            self.commit_handler_factory = \
227
                bzr_commit_handler.InventoryCommitHandler
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
228
        else:
229
            self.commit_handler_factory = \
230
                bzr_commit_handler.InventoryDeltaCommitHandler
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
231
0.64.41 by Ian Clatworthy
update multiple working trees if requested
232
        # Decide how often to automatically report progress
233
        # (not a parameter yet)
234
        self.progress_every = _DEFAULT_AUTO_PROGRESS
235
        if self.verbose:
236
            self.progress_every = self.progress_every / 10
237
0.64.170 by Ian Clatworthy
add autopack option to fast-import
238
        # Decide how often (# of commits) to automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
239
        self.checkpoint_every = int(self.params.get('checkpoint',
240
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
241
0.64.170 by Ian Clatworthy
add autopack option to fast-import
242
        # Decide how often (# of checkpoints) to automatically pack
243
        self.checkpoint_count = 0
244
        self.autopack_every = int(self.params.get('autopack',
245
            _DEFAULT_AUTO_PACK))
246
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
247
        # Decide how big to make the inventory cache
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
248
        cache_size = int(self.params.get('inv-cache', -1))
249
        if cache_size == -1:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
250
            if self.supports_chk:
0.64.149 by Ian Clatworthy
larger default inventory cache for chk formats
251
                cache_size = _DEFAULT_CHK_INV_CACHE_SIZE
252
            else:
253
                cache_size = _DEFAULT_INV_CACHE_SIZE
254
        self.inventory_cache_size = cache_size
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
255
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
256
        # Find the maximum number of commits to import (None means all)
257
        # and prepare progress reporting. Just in case the info file
258
        # has an outdated count of commits, we store the max counts
259
        # at which we need to terminate separately to the total used
260
        # for progress tracking.
261
        try:
262
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
263
            if self.max_commits < 0:
264
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
265
        except KeyError:
266
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
267
        if self.info is not None:
268
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
269
            if (self.max_commits is not None and
270
                self.total_commits > self.max_commits):
271
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
272
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
273
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
274
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
275
    def _revision_store_factory(self):
276
        """Make a RevisionStore based on what the repository supports."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
277
        new_repo_api = hasattr(self.repo, 'revisions')
278
        if new_repo_api:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
279
            return revision_store.RevisionStore2(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
280
        elif not self._experimental:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
281
            return revision_store.RevisionStore1(self.repo)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
282
        else:
283
            def fulltext_when(count):
284
                total = self.total_commits
285
                if total is not None and count == total:
286
                    fulltext = True
287
                else:
288
                    # Create an inventory fulltext every 200 revisions
289
                    fulltext = count % 200 == 0
290
                if fulltext:
291
                    self.note("%d commits - storing inventory as full-text",
292
                        count)
293
                return fulltext
294
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
295
            return revision_store.ImportRevisionStore1(
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
296
                self.repo, self.inventory_cache_size,
297
                fulltext_when=fulltext_when)
298
0.123.5 by Jelmer Vernooij
Fix typo, handle bzr-specific locking in GenericProcessor.
299
    def process(self, command_iter):
300
        """Import data into Bazaar by processing a stream of commands.
301
302
        :param command_iter: an iterator providing commands
303
        """
304
        if self.working_tree is not None:
305
            self.working_tree.lock_write()
306
        elif self.branch is not None:
307
            self.branch.lock_write()
308
        elif self.repo is not None:
309
            self.repo.lock_write()
310
        try:
311
            super(GenericProcessor, self)._process(command_iter)
312
        finally:
313
            # If an unhandled exception occurred, abort the write group
314
            if self.repo is not None and self.repo.is_in_write_group():
315
                self.repo.abort_write_group()
316
            # Release the locks
317
            if self.working_tree is not None:
318
                self.working_tree.unlock()
319
            elif self.branch is not None:
320
                self.branch.unlock()
321
            elif self.repo is not None:
322
                self.repo.unlock()
323
0.64.27 by Ian Clatworthy
1st cut at performance tuning
324
    def _process(self, command_iter):
325
        # if anything goes wrong, abort the write group if any
326
        try:
327
            processor.ImportProcessor._process(self, command_iter)
328
        except:
329
            if self.repo is not None and self.repo.is_in_write_group():
330
                self.repo.abort_write_group()
331
            raise
332
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
333
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
334
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
335
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
336
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
337
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
338
        if self.params.get("export-marks") is not None:
0.78.5 by Ian Clatworthy
move import/export of marks into a module
339
            marks_file.export_marks(self.params.get("export-marks"),
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
340
                self.cache_mgr.marks)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
341
0.123.6 by Jelmer Vernooij
Split out reftracker.
342
        if self.cache_mgr.reftracker.last_ref == None:
0.97.1 by Gonéri Le Bouder
avoid STDERR crash
343
            """Nothing to refresh"""
344
            return
345
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
346
        # Update the branches
347
        self.note("Updating branch information ...")
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
348
        updater = branch_updater.BranchUpdater(self.repo, self.branch,
0.123.6 by Jelmer Vernooij
Split out reftracker.
349
            self.cache_mgr, helpers.invert_dictset(
350
                self.cache_mgr.reftracker.heads),
351
            self.cache_mgr.reftracker.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
352
        branches_updated, branches_lost = updater.update()
353
        self._branch_count = len(branches_updated)
354
355
        # Tell the user about branches that were not created
356
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
357
            if not self.repo.is_shared():
358
                self.warning("Cannot import multiple branches into "
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
359
                    "a standalone branch")
0.64.37 by Ian Clatworthy
create branches as required
360
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
361
            for lost_info in branches_lost:
362
                head_revision = lost_info[1]
363
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
364
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
365
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
366
        # Update the working trees as requested
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
367
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
368
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
369
        if self._branch_count == 0:
370
            self.note("no branches to update")
371
            self.note("no working trees to update")
372
            remind_about_update = False
373
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
374
            trees = self._get_working_trees(branches_updated)
375
            if trees:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
376
                self._update_working_trees(trees)
0.64.34 by Ian Clatworthy
report lost branches
377
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
378
            else:
379
                self.warning("No working trees available to update")
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
380
        else:
381
            # Update just the trunk. (This is always the first branch
382
            # returned by the branch updater.)
383
            trunk_branch = branches_updated[0]
384
            trees = self._get_working_trees([trunk_branch])
385
            if trees:
386
                self._update_working_trees(trees)
387
                remind_about_update = self._branch_count > 1
0.64.51 by Ian Clatworthy
disable autopacking
388
0.64.176 by Ian Clatworthy
faster export of revision range & improved diagnostics in fast-export
389
        # Dump the cache stats now because we clear it before the final pack
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
390
        if self.verbose:
391
            self.cache_mgr.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
392
        if self._original_max_pack_count:
393
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
394
            # checkpoint instead. We now pack the repository to optimise
395
            # how data is stored.
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
396
            self.cache_mgr.clear_all()
0.64.162 by Ian Clatworthy
always repack the repository on completion
397
            self._pack_repository()
398
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
399
        # Finish up by dumping stats & telling the user what to do next.
400
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
401
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
402
            # This message is explicitly not timestamped.
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
403
            note("To refresh the working tree for other branches, "
404
                "use 'bzr update' inside that branch.")
405
406
    def _update_working_trees(self, trees):
407
        if self.verbose:
408
            reporter = delta._ChangeReporter()
409
        else:
410
            reporter = None
411
        for wt in trees:
412
            self.note("Updating the working tree for %s ...", wt.basedir)
413
            wt.update(reporter)
414
            self._tree_count += 1
0.64.41 by Ian Clatworthy
update multiple working trees if requested
415
0.64.167 by Ian Clatworthy
incremental packing for chk formats
416
    def _pack_repository(self, final=True):
0.64.162 by Ian Clatworthy
always repack the repository on completion
417
        # Before packing, free whatever memory we can and ensure
418
        # that groupcompress is configured to optimise disk space
419
        import gc
0.64.167 by Ian Clatworthy
incremental packing for chk formats
420
        if final:
421
            try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
422
                from bzrlib import groupcompress
0.64.167 by Ian Clatworthy
incremental packing for chk formats
423
            except ImportError:
424
                pass
425
            else:
426
                groupcompress._FAST = False
0.64.162 by Ian Clatworthy
always repack the repository on completion
427
        gc.collect()
428
        self.note("Packing repository ...")
429
        self.repo.pack()
430
431
        # To be conservative, packing puts the old packs and
432
        # indices in obsolete_packs. We err on the side of
433
        # optimism and clear out that directory to save space.
434
        self.note("Removing obsolete packs ...")
435
        # TODO: Use a public API for this once one exists
436
        repo_transport = self.repo._pack_collection.transport
437
        repo_transport.clone('obsolete_packs').delete_multi(
438
            repo_transport.list_dir('obsolete_packs'))
439
0.64.167 by Ian Clatworthy
incremental packing for chk formats
440
        # If we're not done, free whatever memory we can
441
        if not final:
442
            gc.collect()
443
0.64.41 by Ian Clatworthy
update multiple working trees if requested
444
    def _get_working_trees(self, branches):
445
        """Get the working trees for branches in the repository."""
446
        result = []
447
        wt_expected = self.repo.make_working_trees()
448
        for br in branches:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
449
            if br is None:
450
                continue
451
            elif br == self.branch:
452
                if self.working_tree:
453
                    result.append(self.working_tree)
0.64.41 by Ian Clatworthy
update multiple working trees if requested
454
            elif wt_expected:
455
                try:
0.95.3 by Ian Clatworthy
Update the working tree for trunk implicitly
456
                    result.append(br.bzrdir.open_workingtree())
0.64.41 by Ian Clatworthy
update multiple working trees if requested
457
                except errors.NoWorkingTree:
458
                    self.warning("No working tree for branch %s", br)
459
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
460
461
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
462
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
463
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
464
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
465
        wtc = self._tree_count
466
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
467
            rc, helpers.single_plural(rc, "revision", "revisions"),
468
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
469
            wtc, helpers.single_plural(wtc, "tree", "trees"),
470
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
471
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
472
    def _init_id_map(self):
473
        """Load the id-map and check it matches the repository.
474
        
475
        :return: the number of entries in the map
476
        """
477
        # Currently, we just check the size. In the future, we might
478
        # decide to be more paranoid and check that the revision-ids
479
        # are identical as well.
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
480
        self.cache_mgr.marks, known = idmapfile.load_id_map(
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
481
            self.id_map_path)
482
        existing_count = len(self.repo.all_revision_ids())
0.64.106 by Ian Clatworthy
let the id-map file have more revisions than the repository
483
        if existing_count < known:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
484
            raise plugin_errors.BadRepositorySize(known, existing_count)
485
        return known
486
487
    def _save_id_map(self):
488
        """Save the id-map."""
489
        # Save the whole lot every time. If this proves a problem, we can
490
        # change to 'append just the new ones' at a later time.
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
491
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.marks)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
492
0.64.5 by Ian Clatworthy
first cut at generic processing method
493
    def blob_handler(self, cmd):
494
        """Process a BlobCommand."""
495
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
496
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
497
        else:
498
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
499
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
500
0.64.170 by Ian Clatworthy
add autopack option to fast-import
501
    def checkpoint_handler(self, cmd):
0.64.5 by Ian Clatworthy
first cut at generic processing method
502
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
503
        # Commit the current write group and start a new one
504
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
505
        self._save_id_map()
0.64.220 by Ian Clatworthy
Only count implicit checkpoints when deciding when to auto-pack
506
        # track the number of automatic checkpoints done
507
        if cmd is None:
508
            self.checkpoint_count += 1
509
            if self.checkpoint_count % self.autopack_every == 0:
510
                self._pack_repository(final=False)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
511
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
512
513
    def commit_handler(self, cmd):
514
        """Process a CommitCommand."""
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
515
        mark = cmd.id.lstrip(':')
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
516
        if self.skip_total and self._revision_count < self.skip_total:
0.123.6 by Jelmer Vernooij
Split out reftracker.
517
            self.cache_mgr.reftracker.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
518
            # Check that we really do know about this commit-id
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
519
            if not self.cache_mgr.marks.has_key(mark):
520
                raise plugin_errors.BadRestart(mark)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
521
            self.cache_mgr._blobs = {}
522
            self._revision_count += 1
0.111.2 by Max Bowsher
Also catch tagging via commit when resuming a crashed import.
523
            if cmd.ref.startswith('refs/tags/'):
524
                tag_name = cmd.ref[len('refs/tags/'):]
525
                self._set_tag(tag_name, cmd.id)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
526
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
527
        if self.first_incremental_commit:
528
            self.first_incremental_commit = None
0.123.6 by Jelmer Vernooij
Split out reftracker.
529
            parents = self.cache_mgr.reftracker.track_heads(cmd)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
530
531
        # 'Commit' the revision and report progress
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
532
        handler = self.commit_handler_factory(cmd, self.cache_mgr,
0.64.196 by Ian Clatworthy
get tests passing again
533
            self.rev_store, verbose=self.verbose,
534
            prune_empty_dirs=self.prune_empty_dirs)
0.64.180 by Ian Clatworthy
report triggering commit when exception occurs
535
        try:
536
            handler.process()
537
        except:
538
            print "ABORT: exception occurred processing commit %s" % (cmd.id)
539
            raise
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
540
        self.cache_mgr.add_mark(mark, handler.revision_id)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
541
        self._revision_count += 1
0.129.1 by termie
Add a bunch of mark id normalization.
542
        self.report_progress("(%s)" % cmd.id.lstrip(':'))
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
543
0.111.1 by Max Bowsher
Set a tag when touching a refs/tags/ ref with a commit command.
544
        if cmd.ref.startswith('refs/tags/'):
545
            tag_name = cmd.ref[len('refs/tags/'):]
546
            self._set_tag(tag_name, cmd.id)
547
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
548
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
549
        if (self.max_commits is not None and
550
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
551
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
552
            self.finished = True
553
        elif self._revision_count % self.checkpoint_every == 0:
554
            self.note("%d commits - automatic checkpoint triggered",
555
                self._revision_count)
0.64.170 by Ian Clatworthy
add autopack option to fast-import
556
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
557
0.64.25 by Ian Clatworthy
slightly better progress reporting
558
    def report_progress(self, details=''):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
559
        if self._revision_count % self.progress_every == 0:
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
560
            if self.total_commits is not None:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
561
                counts = "%d/%d" % (self._revision_count, self.total_commits)
562
            else:
563
                counts = "%d" % (self._revision_count,)
0.64.152 by Ian Clatworthy
miscellaneous progress reporting fixes
564
            minutes = (time.time() - self._start_time) / 60
565
            revisions_added = self._revision_count - self.skip_total
566
            rate = revisions_added * 1.0 / minutes
567
            if rate > 10:
568
                rate_str = "at %.0f/minute " % rate
569
            else:
570
                rate_str = "at %.1f/minute " % rate
0.64.150 by Ian Clatworthy
show commit rate rather than meaningless ETA in verbose mode
571
            self.note("%s commits processed %s%s" % (counts, rate_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
572
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
573
    def progress_handler(self, cmd):
574
        """Process a ProgressCommand."""
0.64.271 by Ian Clatworthy
Ignore progress messages unless in verbose mode
575
        # Most progress messages embedded in streams are annoying.
576
        # Ignore them unless in verbose mode.
577
        if self.verbose:
578
            self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
579
580
    def reset_handler(self, cmd):
581
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
582
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
583
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
584
            if cmd.from_ is not None:
585
                self._set_tag(tag_name, cmd.from_)
586
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
587
                self.warning("ignoring reset refs/tags/%s - no from clause"
588
                    % tag_name)
0.64.109 by Ian Clatworthy
initial cut at reset support
589
            return
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
590
591
        if cmd.from_ is not None:
0.123.6 by Jelmer Vernooij
Split out reftracker.
592
            self.cache_mgr.reftracker.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.5 by Ian Clatworthy
first cut at generic processing method
593
594
    def tag_handler(self, cmd):
595
        """Process a TagCommand."""
0.64.107 by Ian Clatworthy
warn on tags with a missing from clause
596
        if cmd.from_ is not None:
597
            self._set_tag(cmd.id, cmd.from_)
598
        else:
599
            self.warning("ignoring tag %s - no from clause" % cmd.id)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
600
601
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
602
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
603
        bzr_tag_name = name.decode('utf-8', 'replace')
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
604
        bzr_rev_id = self.cache_mgr.lookup_committish(from_)
0.64.11 by Ian Clatworthy
tag support
605
        self.tags[bzr_tag_name] = bzr_rev_id
0.102.9 by Ian Clatworthy
parsing of multiple authors and commit properties
606
607
    def feature_handler(self, cmd):
608
        """Process a FeatureCommand."""
0.102.11 by Ian Clatworthy
Validate features are known before importing
609
        feature = cmd.feature_name
610
        if feature not in commands.FEATURE_NAMES:
611
            raise plugin_errors.UnknownFeature(feature)
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
612
0.64.297 by Jelmer Vernooij
Fix typo.
613
    def debug(self, msg, *args):
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
614
        """Output a debug message if the appropriate -D option was given."""
615
        if "fast-import" in debug.debug_flags:
616
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
617
            mutter(msg, *args)
618
619
    def note(self, msg, *args):
620
        """Output a note but timestamp it."""
621
        msg = "%s %s" % (self._time_of_day(), msg)
622
        note(msg, *args)
623
624
    def warning(self, msg, *args):
625
        """Output a warning but timestamp it."""
626
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
627
        warning(msg, *args)