/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.119 by Ian Clatworthy
fix missing os import in generic_processor
20
import os
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
26
    errors,
27
    generate_ids,
28
    inventory,
29
    lru_cache,
30
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
31
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
32
    revision,
33
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
34
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
    )
0.64.51 by Ian Clatworthy
disable autopacking
36
from bzrlib.repofmt import pack_repo
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
37
from bzrlib.trace import note
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
38
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
39
from bzrlib.plugins.fastimport import (
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
40
    branch_updater,
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
41
    cache_manager,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
42
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
43
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
44
    idmapfile,
0.78.5 by Ian Clatworthy
move import/export of marks into a module
45
    marks_file,
0.64.5 by Ian Clatworthy
first cut at generic processing method
46
    processor,
47
    revisionloader,
48
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
50
0.64.41 by Ian Clatworthy
update multiple working trees if requested
51
# How many commits before automatically reporting progress
52
_DEFAULT_AUTO_PROGRESS = 1000
53
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
54
# How many commits before automatically checkpointing
55
_DEFAULT_AUTO_CHECKPOINT = 10000
56
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
57
# How many inventories to cache
58
_DEFAULT_INV_CACHE_SIZE = 10
59
0.64.41 by Ian Clatworthy
update multiple working trees if requested
60
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
61
class GenericProcessor(processor.ImportProcessor):
62
    """An import processor that handles basic imports.
63
64
    Current features supported:
65
0.64.16 by Ian Clatworthy
safe processing tweaks
66
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
67
    * files and symlinks commits are supported
68
    * checkpoints automatically happen at a configurable frequency
69
      over and above the stream requested checkpoints
70
    * timestamped progress reporting, both automatic and stream requested
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
71
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
72
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
73
    At checkpoints and on completion, the commit-id -> revision-id map is
74
    saved to a file called 'fastimport-id-map'. If the import crashes
75
    or is interrupted, it can be started again and this file will be
76
    used to skip over already loaded revisions. The format of each line
77
    is "commit-id revision-id" so commit-ids cannot include spaces.
78
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
79
    Here are the supported parameters:
80
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
81
    * info - name of a hints file holding the analysis generated
82
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
83
      importing large repositories, this parameter is needed so
84
      that the importer knows what blobs to intelligently cache.
85
0.64.41 by Ian Clatworthy
update multiple working trees if requested
86
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
87
      By default, the importer updates the repository
88
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
89
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
90
91
    * checkpoint - automatically checkpoint every n commits over and
92
      above any checkpoints contained in the import stream.
93
      The default is 10000.
94
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
95
    * count - only import this many commits then exit. If not set
96
      or negative, all commits are imported.
97
    
98
    * inv-cache - number of inventories to cache.
99
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
100
101
    * experimental - enable experimental mode, i.e. use features
102
      not yet fully tested.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
103
104
    * import-marks - name of file to read to load mark information from
105
106
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
107
    """
108
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
109
    known_params = [
110
        'info',
111
        'trees',
112
        'checkpoint',
113
        'count',
114
        'inv-cache',
115
        'experimental',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
116
        'import-marks',
117
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
118
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
119
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
120
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
121
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
122
        self._load_info_and_params()
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
123
        self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
124
            self.inventory_cache_size)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
125
        
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
126
        if self.params.get("import-marks") is not None:
0.79.2 by Ian Clatworthy
extend & use marks_file API
127
            mark_info = marks_file.import_marks(self.params.get("import-marks"))
128
            if mark_info is not None:
129
                self.cache_mgr.revision_ids = mark_info[0]
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
130
            self.skip_total = False
131
            self.first_incremental_commit = True
132
        else:
133
            self.first_incremental_commit = False
134
            self.skip_total = self._init_id_map()
135
            if self.skip_total:
136
                self.note("Found %d commits already loaded - "
137
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
138
        self._revision_count = 0
139
140
        # mapping of tag name to revision_id
141
        self.tags = {}
142
143
        # Create the revision loader needed for committing
0.64.79 by Ian Clatworthy
support new Repository API
144
        new_repo_api = hasattr(self.repo, 'revisions')
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
145
        if new_repo_api:
146
            self.loader = revisionloader.RevisionLoader2(self.repo)
147
        elif not self._experimental:
148
            self.loader = revisionloader.RevisionLoader1(self.repo)
149
        else:
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
150
            def fulltext_when(count):
151
                total = self.total_commits
152
                if total is not None and count == total:
153
                    fulltext = True
154
                else:
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
155
                    # Create an inventory fulltext every 200 revisions
156
                    fulltext = count % 200 == 0
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
157
                if fulltext:
158
                    self.note("%d commits - storing inventory as full-text",
159
                        count)
160
                return fulltext
161
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
162
            self.loader = revisionloader.ImportRevisionLoader1(
163
                self.repo, self.inventory_cache_size,
164
                fulltext_when=fulltext_when)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
165
0.64.51 by Ian Clatworthy
disable autopacking
166
        # Disable autopacking if the repo format supports it.
167
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
168
        if isinstance(self.repo, pack_repo.KnitPackRepository):
169
            self._original_max_pack_count = \
170
                self.repo._pack_collection._max_pack_count
171
            def _max_pack_count_for_import(total_revisions):
172
                return total_revisions + 1
173
            self.repo._pack_collection._max_pack_count = \
174
                _max_pack_count_for_import
175
        else:
176
            self._original_max_pack_count = None
177
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
178
        # Create a write group. This is committed at the end of the import.
179
        # Checkpointing closes the current one and starts a new one.
180
        self.repo.start_write_group()
181
182
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
183
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
184
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
185
        # This is currently hard-coded but might be configurable via
186
        # parameters one day if that's needed
187
        repo_transport = self.repo.control_files._transport
188
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
189
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
190
        # Load the info file, if any
191
        info_path = self.params.get('info')
192
        if info_path is not None:
193
            self.info = configobj.ConfigObj(info_path)
194
        else:
195
            self.info = None
196
0.64.41 by Ian Clatworthy
update multiple working trees if requested
197
        # Decide how often to automatically report progress
198
        # (not a parameter yet)
199
        self.progress_every = _DEFAULT_AUTO_PROGRESS
200
        if self.verbose:
201
            self.progress_every = self.progress_every / 10
202
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
203
        # Decide how often to automatically checkpoint
204
        self.checkpoint_every = int(self.params.get('checkpoint',
205
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
206
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
207
        # Decide how big to make the inventory cache
208
        self.inventory_cache_size = int(self.params.get('inv-cache',
209
            _DEFAULT_INV_CACHE_SIZE))
210
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
211
        # Find the maximum number of commits to import (None means all)
212
        # and prepare progress reporting. Just in case the info file
213
        # has an outdated count of commits, we store the max counts
214
        # at which we need to terminate separately to the total used
215
        # for progress tracking.
216
        try:
217
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
218
            if self.max_commits < 0:
219
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
220
        except KeyError:
221
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
222
        if self.info is not None:
223
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
224
            if (self.max_commits is not None and
225
                self.total_commits > self.max_commits):
226
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
227
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
228
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
229
0.64.27 by Ian Clatworthy
1st cut at performance tuning
230
    def _process(self, command_iter):
231
        # if anything goes wrong, abort the write group if any
232
        try:
233
            processor.ImportProcessor._process(self, command_iter)
234
        except:
235
            if self.repo is not None and self.repo.is_in_write_group():
236
                self.repo.abort_write_group()
237
            raise
238
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
239
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
240
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
241
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
242
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
243
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
244
        if self.params.get("export-marks") is not None:
0.78.5 by Ian Clatworthy
move import/export of marks into a module
245
            marks_file.export_marks(self.params.get("export-marks"),
246
                self.cache_mgr.revision_ids)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
247
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
248
        # Update the branches
249
        self.note("Updating branch information ...")
0.78.4 by Ian Clatworthy
move GenericBranchUpdater into its own module
250
        updater = branch_updater.BranchUpdater(self.repo, self.branch,
251
            self.cache_mgr, helpers.invert_dictset(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
252
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
253
        branches_updated, branches_lost = updater.update()
254
        self._branch_count = len(branches_updated)
255
256
        # Tell the user about branches that were not created
257
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
258
            if not self.repo.is_shared():
259
                self.warning("Cannot import multiple branches into "
260
                    "an unshared repository")
261
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
262
            for lost_info in branches_lost:
263
                head_revision = lost_info[1]
264
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
265
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
266
267
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
268
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
269
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
270
        if self._branch_count == 0:
271
            self.note("no branches to update")
272
            self.note("no working trees to update")
273
            remind_about_update = False
274
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
275
            trees = self._get_working_trees(branches_updated)
276
            if trees:
277
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
278
                if self.verbose:
279
                    report = delta._ChangeReporter()
280
                else:
281
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
282
                for wt in trees:
283
                    wt.update(reporter)
284
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
285
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
286
            else:
287
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
288
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
289
290
        # Finish up by telling the user what to do next.
291
        if self._original_max_pack_count:
292
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
293
            # checkpoint instead. We now pack the repository to optimise
294
            # how data is stored.
295
            if self._revision_count > self.checkpoint_every:
296
                self.note("Packing repository ...")
297
                self.repo.pack()
298
                # To be conservative, packing puts the old packs and
299
                # indices in obsolete_packs. We err on the side of
300
                # optimism and clear out that directory to save space.
301
                self.note("Removing obsolete packs ...")
302
                # TODO: Use a public API for this once one exists
303
                repo_transport = self.repo._pack_collection.transport
304
                repo_transport.clone('obsolete_packs').delete_multi(
305
                    repo_transport.list_dir('obsolete_packs'))
0.64.34 by Ian Clatworthy
report lost branches
306
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
307
            # This message is explicitly not timestamped.
0.64.51 by Ian Clatworthy
disable autopacking
308
            note("To refresh the working tree for a branch, "
309
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
310
311
    def _get_working_trees(self, branches):
312
        """Get the working trees for branches in the repository."""
313
        result = []
314
        wt_expected = self.repo.make_working_trees()
315
        for br in branches:
316
            if br == self.branch and br is not None:
317
                wt = self.working_tree
318
            elif wt_expected:
319
                try:
320
                    wt = br.bzrdir.open_workingtree()
321
                except errors.NoWorkingTree:
322
                    self.warning("No working tree for branch %s", br)
323
                    continue
324
            else:
325
                continue
326
            result.append(wt)
327
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
328
329
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
330
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
331
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
332
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
333
        wtc = self._tree_count
334
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
335
            rc, helpers.single_plural(rc, "revision", "revisions"),
336
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
337
            wtc, helpers.single_plural(wtc, "tree", "trees"),
338
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
339
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
340
    def _init_id_map(self):
341
        """Load the id-map and check it matches the repository.
342
        
343
        :return: the number of entries in the map
344
        """
345
        # Currently, we just check the size. In the future, we might
346
        # decide to be more paranoid and check that the revision-ids
347
        # are identical as well.
348
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
349
            self.id_map_path)
350
        existing_count = len(self.repo.all_revision_ids())
0.64.106 by Ian Clatworthy
let the id-map file have more revisions than the repository
351
        if existing_count < known:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
352
            raise plugin_errors.BadRepositorySize(known, existing_count)
353
        return known
354
355
    def _save_id_map(self):
356
        """Save the id-map."""
357
        # Save the whole lot every time. If this proves a problem, we can
358
        # change to 'append just the new ones' at a later time.
359
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
360
0.64.5 by Ian Clatworthy
first cut at generic processing method
361
    def blob_handler(self, cmd):
362
        """Process a BlobCommand."""
363
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
364
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
365
        else:
366
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
367
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
368
369
    def checkpoint_handler(self, cmd):
370
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
371
        # Commit the current write group and start a new one
372
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
373
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
374
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
375
376
    def commit_handler(self, cmd):
377
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
378
        if self.skip_total and self._revision_count < self.skip_total:
379
            _track_heads(cmd, self.cache_mgr)
380
            # Check that we really do know about this commit-id
381
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
382
                raise plugin_errors.BadRestart(cmd.id)
383
            # Consume the file commands and free any non-sticky blobs
384
            for fc in cmd.file_iter():
385
                pass
386
            self.cache_mgr._blobs = {}
387
            self._revision_count += 1
388
            # If we're finished getting back to where we were,
389
            # load the file-ids cache
390
            if self._revision_count == self.skip_total:
391
                self._gen_file_ids_cache()
392
                self.note("Generated the file-ids cache - %d entries",
393
                    len(self.cache_mgr.file_ids.keys()))
394
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
395
        if self.first_incremental_commit:
396
            self.first_incremental_commit = None
397
            parents = _track_heads(cmd, self.cache_mgr)
398
            self._gen_file_ids_cache(parents)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
399
400
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
401
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
402
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
403
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
404
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
405
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
406
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
407
408
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
409
        if (self.max_commits is not None and
410
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
411
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
412
            self.finished = True
413
        elif self._revision_count % self.checkpoint_every == 0:
414
            self.note("%d commits - automatic checkpoint triggered",
415
                self._revision_count)
416
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
417
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
418
    def _gen_file_ids_cache(self, revs=False):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
419
        """Generate the file-id cache by searching repository inventories.
420
        """
421
        # Get the interesting revisions - the heads
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
422
        if revs:
423
            head_ids = revs
424
        else:
425
            head_ids = self.cache_mgr.heads.keys()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
426
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
427
428
        # Update the fileid cache
429
        file_ids = {}
430
        for revision_id in revision_ids:
431
            inv = self.repo.revision_tree(revision_id).inventory
0.64.93 by Ian Clatworthy
minor comment clean-ups
432
            # Cache the inventories while we're at it
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
433
            self.cache_mgr.inventories[revision_id] = inv
434
            for path, ie in inv.iter_entries():
435
                file_ids[path] = ie.file_id
436
        self.cache_mgr.file_ids = file_ids
437
0.64.25 by Ian Clatworthy
slightly better progress reporting
438
    def report_progress(self, details=''):
439
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
440
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
441
            if self.total_commits is not None:
442
                counts = "%d/%d" % (self._revision_count, self.total_commits)
443
                eta = progress.get_eta(self._start_time, self._revision_count,
444
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
445
                eta_str = progress.str_tdelta(eta)
446
                if eta_str.endswith('--'):
447
                    eta_str = ''
448
                else:
449
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
450
            else:
451
                counts = "%d" % (self._revision_count,)
452
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
453
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
454
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
455
    def progress_handler(self, cmd):
456
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
457
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
458
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
459
460
    def reset_handler(self, cmd):
461
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
462
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
463
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
464
            if cmd.from_ is not None:
465
                self._set_tag(tag_name, cmd.from_)
466
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
467
                self.warning("ignoring reset refs/tags/%s - no from clause"
468
                    % tag_name)
0.64.109 by Ian Clatworthy
initial cut at reset support
469
            return
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
470
0.75.2 by Brian de Alwis
Reset takes a <commitsh> and not just a revid; added note to
471
	# FIXME: cmd.from_ is a committish and thus could reference
0.64.109 by Ian Clatworthy
initial cut at reset support
472
	# another branch.  Create a method for resolving commitish's.
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
473
        if cmd.from_ is not None:
0.64.109 by Ian Clatworthy
initial cut at reset support
474
            self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.5 by Ian Clatworthy
first cut at generic processing method
475
476
    def tag_handler(self, cmd):
477
        """Process a TagCommand."""
0.64.107 by Ian Clatworthy
warn on tags with a missing from clause
478
        if cmd.from_ is not None:
479
            self._set_tag(cmd.id, cmd.from_)
480
        else:
481
            self.warning("ignoring tag %s - no from clause" % cmd.id)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
482
483
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
484
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
485
        bzr_tag_name = name.decode('utf-8', 'replace')
486
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
487
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
488
489
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
490
def _track_heads(cmd, cache_mgr):
491
    """Track the repository heads given a CommitCommand.
492
    
493
    :return: the list of parents in terms of commit-ids
494
    """
495
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
496
    if cmd.from_ is not None:
497
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
498
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
499
        last_id = cache_mgr.last_ids.get(cmd.ref)
500
        if last_id is not None:
501
            parents = [last_id]
502
        else:
503
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
504
    parents.extend(cmd.merges)
0.64.109 by Ian Clatworthy
initial cut at reset support
505
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
506
    # Track the heads
0.64.109 by Ian Clatworthy
initial cut at reset support
507
    cache_mgr.track_heads_for_ref(cmd.ref, cmd.id, parents)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
508
    return parents
509
510
0.64.5 by Ian Clatworthy
first cut at generic processing method
511
class GenericCommitHandler(processor.CommitHandler):
512
0.64.48 by Ian Clatworthy
one revision loader instance
513
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
514
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
515
        processor.CommitHandler.__init__(self, command)
516
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
517
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
518
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
519
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
520
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
521
522
    def pre_process_files(self):
523
        """Prepare for committing."""
524
        self.revision_id = self.gen_revision_id()
525
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
526
        self.lines_for_commit = {}
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
527
        if self.repo.supports_rich_root():
528
            self.lines_for_commit[inventory.ROOT_ID] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
529
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
530
        # Track the heads and get the real parent list
531
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
532
0.64.93 by Ian Clatworthy
minor comment clean-ups
533
        # Convert the parent commit-ids to bzr revision-ids
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
534
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
535
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
536
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
537
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
538
            self.parents = []
0.64.109 by Ian Clatworthy
initial cut at reset support
539
        self.debug("%s id: %s, parents: %s", self.command.id,
540
            self.revision_id, str(self.parents))
0.64.7 by Ian Clatworthy
start of multiple commit handling
541
0.64.14 by Ian Clatworthy
commit of modified files working
542
        # Seed the inventory from the previous one
543
        if len(self.parents) == 0:
544
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
545
        else:
546
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
547
            inv = self.get_inventory(self.parents[0])
548
            # TODO: Shallow copy - deep inventory copying is expensive
549
            self.inventory = inv.copy()
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
550
        if self.repo.supports_rich_root():
551
            self.inventory.revision_id = self.revision_id
552
        else:
0.64.13 by Ian Clatworthy
commit of new files working
553
            # In this repository, root entries have no knit or weave. When
554
            # serializing out to disk and back in, root.revision is always
555
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
556
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
557
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
558
        # directory-path -> inventory-entry for current inventory
559
        self.directory_entries = dict(self.inventory.directories())
560
0.64.14 by Ian Clatworthy
commit of modified files working
561
    def post_process_files(self):
562
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
563
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
564
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
565
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
566
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
567
        committer = self.command.committer
568
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
569
        author = self.command.author
570
        if author is not None:
571
            author_id = "%s <%s>" % (author[0],author[1])
572
            if author_id != who:
573
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
574
        rev = revision.Revision(
575
           timestamp=committer[2],
576
           timezone=committer[3],
577
           committer=who,
0.78.2 by Ian Clatworthy
move escape_commit_message into helpers
578
           message=helpers.escape_commit_message(self.command.message),
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
579
           revision_id=self.revision_id,
580
           properties=rev_props,
581
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
582
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
583
            lambda file_id: self._get_lines(file_id),
584
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
585
0.64.5 by Ian Clatworthy
first cut at generic processing method
586
    def modify_handler(self, filecmd):
587
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
588
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
589
        else:
590
            data = filecmd.data
0.64.67 by James Westby
Add support for -Dfast-import.
591
        self.debug("modifying %s", filecmd.path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
592
        self._modify_inventory(filecmd.path, filecmd.kind,
593
            filecmd.is_executable, data)
594
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
595
    def _delete_recursive(self, path):
0.64.67 by James Westby
Add support for -Dfast-import.
596
        self.debug("deleting %s", path)
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
597
        fileid = self.bzr_file_id(path)
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
598
        dirname, basename = osutils.split(path)
599
        if (fileid in self.inventory and
600
            isinstance(self.inventory[fileid], inventory.InventoryDirectory)):
601
            for child_path in self.inventory[fileid].children.keys():
0.64.123 by Ian Clatworthy
fix osutils call in delete_recursive
602
                self._delete_recursive(osutils.pathjoin(path, child_path))
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
603
        try:
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
604
            if self.inventory.id2path(fileid) == path:
605
                del self.inventory[fileid]
606
            else:
607
                # already added by some other name?
608
                if dirname in self.cache_mgr.file_ids:
609
                    parent_id = self.cache_mgr.file_ids[dirname]
610
                    del self.inventory[parent_id].children[basename]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
611
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
612
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
613
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
614
            self._warn_unless_in_merges(fileid, path)
0.64.102 by Ian Clatworthy
Handle a directory becoming a file and subsequent child deletes
615
        except AttributeError, ex:
616
            if ex.args[0] == 'children':
617
                # A directory has changed into a file and then one
618
                # of it's children is being deleted!
619
                self._warn_unless_in_merges(fileid, path)
620
            else:
621
                raise
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
622
        try:
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
623
            self.cache_mgr.delete_path(path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
624
        except KeyError:
625
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
626
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
627
    def delete_handler(self, filecmd):
628
        self._delete_recursive(filecmd.path)
629
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
630
    def _warn_unless_in_merges(self, fileid, path):
631
        if len(self.parents) <= 1:
632
            return
633
        for parent in self.parents[1:]:
634
            if fileid in self.get_inventory(parent):
635
                return
636
        self.warning("ignoring delete of %s as not in parent inventories", path)
637
0.64.5 by Ian Clatworthy
first cut at generic processing method
638
    def copy_handler(self, filecmd):
0.76.2 by Ian Clatworthy
code & tests for file copying
639
        src_path = filecmd.src_path
640
        dest_path = filecmd.dest_path
641
        self.debug("copying %s to %s", src_path, dest_path)
642
        if not self.parents:
643
            self.warning("ignoring copy of %s to %s - no parent revisions",
644
                src_path, dest_path)
645
            return
646
        file_id = self.inventory.path2id(src_path)
647
        if file_id is None:
648
            self.warning("ignoring copy of %s to %s - source does not exist",
649
                src_path, dest_path)
650
            return
651
        ie = self.inventory[file_id]
652
        kind = ie.kind
653
        if kind == 'file':
654
            content = self._get_content_from_repo(self.parents[0], file_id)
655
            self._modify_inventory(dest_path, kind, ie.executable, content)
656
        elif kind == 'symlink':
657
            self._modify_inventory(dest_path, kind, False, ie.symlink_target)
658
        else:
659
            self.warning("ignoring copy of %s %s - feature not yet supported",
660
                kind, path)
661
662
    def _get_content_from_repo(self, revision_id, file_id):
663
        """Get the content of a file for a revision-id."""
664
        revtree = self.repo.revision_tree(revision_id)
665
        return revtree.get_file_text(file_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
666
667
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
668
        old_path = filecmd.old_path
669
        new_path = filecmd.new_path
0.64.67 by James Westby
Add support for -Dfast-import.
670
        self.debug("renaming %s to %s", old_path, new_path)
0.64.16 by Ian Clatworthy
safe processing tweaks
671
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
672
        basename, new_parent_ie = self._ensure_directory(new_path)
673
        new_parent_id = new_parent_ie.file_id
0.64.67 by James Westby
Add support for -Dfast-import.
674
        existing_id = self.inventory.path2id(new_path)
675
        if existing_id is not None:
676
            self.inventory.remove_recursive_id(existing_id)
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
677
        ie = self.inventory[file_id]
678
        lines = self.loader._get_lines(file_id, ie.revision)
679
        self.lines_for_commit[file_id] = lines
0.65.4 by James Westby
Make the rename handling more robust.
680
        self.inventory.rename(file_id, new_parent_id, basename)
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
681
        self.cache_mgr.rename_path(old_path, new_path)
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
682
        self.inventory[file_id].revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
683
684
    def deleteall_handler(self, filecmd):
0.73.1 by Miklos Vajna
Implement the 'deleteall' command.
685
        self.debug("deleting all files (and also all directories)")
0.64.110 by Ian Clatworthy
make deleteall less agressive in the files it tries to delete
686
        # Would be nice to have an inventory.clear() method here
687
        root_items = [ie for (name, ie) in
688
            self.inventory.root.children.iteritems()]
689
        for root_item in root_items:
690
            self.inventory.remove_recursive_id(root_item.file_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
691
0.64.16 by Ian Clatworthy
safe processing tweaks
692
    def bzr_file_id_and_new(self, path):
693
        """Get a Bazaar file identifier and new flag for a path.
694
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
695
        :return: file_id, is_new where
696
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
697
        """
698
        try:
0.64.67 by James Westby
Add support for -Dfast-import.
699
            id = self.cache_mgr.file_ids[path]
700
            return id, False
0.64.16 by Ian Clatworthy
safe processing tweaks
701
        except KeyError:
702
            id = generate_ids.gen_file_id(path)
703
            self.cache_mgr.file_ids[path] = id
0.64.67 by James Westby
Add support for -Dfast-import.
704
            self.debug("Generated new file id %s for '%s'", id, path)
0.64.16 by Ian Clatworthy
safe processing tweaks
705
            return id, True
706
0.64.5 by Ian Clatworthy
first cut at generic processing method
707
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
708
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
709
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
710
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
711
    def gen_initial_inventory(self):
712
        """Generate an inventory for a parentless revision."""
713
        inv = inventory.Inventory(revision_id=self.revision_id)
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
714
        if self.repo.supports_rich_root():
715
            # The very first root needs to have the right revision
716
            inv.root.revision = self.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
717
        return inv
718
0.64.5 by Ian Clatworthy
first cut at generic processing method
719
    def gen_revision_id(self):
720
        """Generate a revision id.
721
722
        Subclasses may override this to produce deterministic ids say.
723
        """
724
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
725
        # Perhaps 'who' being the person running the import is ok? If so,
726
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
727
        who = "%s <%s>" % (committer[0],committer[1])
728
        timestamp = committer[2]
729
        return generate_ids.gen_revision_id(who, timestamp)
730
0.64.7 by Ian Clatworthy
start of multiple commit handling
731
    def get_inventory(self, revision_id):
732
        """Get the inventory for a revision id."""
733
        try:
734
            inv = self.cache_mgr.inventories[revision_id]
735
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
736
            if self.verbose:
737
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
738
            # Not cached so reconstruct from repository
739
            inv = self.repo.revision_tree(revision_id).inventory
740
            self.cache_mgr.inventories[revision_id] = inv
741
        return inv
742
0.64.5 by Ian Clatworthy
first cut at generic processing method
743
    def _get_inventories(self, revision_ids):
744
        """Get the inventories for revision-ids.
745
        
746
        This is a callback used by the RepositoryLoader to
0.64.93 by Ian Clatworthy
minor comment clean-ups
747
        speed up inventory reconstruction.
748
        """
0.64.5 by Ian Clatworthy
first cut at generic processing method
749
        present = []
750
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
751
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
752
        # successfully loaded into the repsoitory
753
        for revision_id in revision_ids:
754
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
755
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
756
                present.append(revision_id)
757
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
758
                if self.verbose:
759
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
760
                # Not cached so reconstruct from repository
761
                if self.repo.has_revision(revision_id):
762
                    rev_tree = self.repo.revision_tree(revision_id)
763
                    present.append(revision_id)
764
                else:
765
                    rev_tree = self.repo.revision_tree(None)
766
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
767
                self.cache_mgr.inventories[revision_id] = inv
768
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
769
        return present, inventories
770
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
771
    def _get_lines(self, file_id):
772
        """Get the lines for a file-id."""
773
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
774
775
    def _modify_inventory(self, path, kind, is_executable, data):
776
        """Add to or change an item in the inventory."""
777
        # Create the new InventoryEntry
778
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
779
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
780
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
781
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
782
        if isinstance(ie, inventory.InventoryFile):
783
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
784
            lines = osutils.split_lines(data)
785
            ie.text_sha1 = osutils.sha_strings(lines)
786
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
787
            self.lines_for_commit[file_id] = lines
0.64.73 by James Westby
Correct typo: InventoryLnk -> InventoryLink
788
        elif isinstance(ie, inventory.InventoryLink):
0.64.74 by Ian Clatworthy
fix symlink importing
789
            ie.symlink_target = data.encode('utf8')
790
            # There are no lines stored for a symlink so
791
            # make sure the cache used by get_lines knows that
792
            self.lines_for_commit[file_id] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
793
        else:
794
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
795
                (kind,))
796
0.64.16 by Ian Clatworthy
safe processing tweaks
797
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
798
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
799
            # HACK: no API for this (del+add does more than it needs to)
800
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
801
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
802
        else:
803
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
804
805
    def _ensure_directory(self, path):
806
        """Ensure that the containing directory exists for 'path'"""
807
        dirname, basename = osutils.split(path)
808
        if dirname == '':
809
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
810
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
811
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
812
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
813
        except KeyError:
814
            # We will create this entry, since it doesn't exist
815
            pass
816
        else:
817
            return basename, ie
818
819
        # No directory existed, we will just create one, first, make sure
820
        # the parent exists
821
        dir_basename, parent_ie = self._ensure_directory(dirname)
822
        dir_file_id = self.bzr_file_id(dirname)
823
        ie = inventory.entry_factory['directory'](dir_file_id,
824
                                                  dir_basename,
825
                                                  parent_ie.file_id)
826
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
827
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
828
        # There are no lines stored for a directory so
829
        # make sure the cache used by get_lines knows that
830
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
831
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
832
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
833
        return basename, ie