/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.67 by James Westby
Add support for -Dfast-import.
25
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
26
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    errors,
28
    generate_ids,
29
    inventory,
30
    lru_cache,
31
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
32
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
33
    revision,
34
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
35
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
36
    )
0.64.51 by Ian Clatworthy
disable autopacking
37
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
38
from bzrlib.trace import (
0.64.67 by James Westby
Add support for -Dfast-import.
39
    error,
40
    mutter,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    note,
42
    warning,
43
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
44
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
45
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
47
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
48
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
    processor,
50
    revisionloader,
51
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
53
0.64.41 by Ian Clatworthy
update multiple working trees if requested
54
# How many commits before automatically reporting progress
55
_DEFAULT_AUTO_PROGRESS = 1000
56
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
57
# How many commits before automatically checkpointing
58
_DEFAULT_AUTO_CHECKPOINT = 10000
59
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
60
# How many inventories to cache
61
_DEFAULT_INV_CACHE_SIZE = 10
62
0.64.41 by Ian Clatworthy
update multiple working trees if requested
63
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
64
class GenericProcessor(processor.ImportProcessor):
65
    """An import processor that handles basic imports.
66
67
    Current features supported:
68
0.64.16 by Ian Clatworthy
safe processing tweaks
69
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
70
    * files and symlinks commits are supported
71
    * checkpoints automatically happen at a configurable frequency
72
      over and above the stream requested checkpoints
73
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
74
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
75
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
76
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
77
    At checkpoints and on completion, the commit-id -> revision-id map is
78
    saved to a file called 'fastimport-id-map'. If the import crashes
79
    or is interrupted, it can be started again and this file will be
80
    used to skip over already loaded revisions. The format of each line
81
    is "commit-id revision-id" so commit-ids cannot include spaces.
82
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
83
    Here are the supported parameters:
84
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
85
    * info - name of a hints file holding the analysis generated
86
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
87
      importing large repositories, this parameter is needed so
88
      that the importer knows what blobs to intelligently cache.
89
0.64.41 by Ian Clatworthy
update multiple working trees if requested
90
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
91
      By default, the importer updates the repository
92
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
93
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
94
95
    * checkpoint - automatically checkpoint every n commits over and
96
      above any checkpoints contained in the import stream.
97
      The default is 10000.
98
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
99
    * count - only import this many commits then exit. If not set
100
      or negative, all commits are imported.
101
    
102
    * inv-cache - number of inventories to cache.
103
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
104
105
    * experimental - enable experimental mode, i.e. use features
106
      not yet fully tested.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
107
    """
108
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
109
    known_params = [
110
        'info',
111
        'trees',
112
        'checkpoint',
113
        'count',
114
        'inv-cache',
115
        'experimental',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
116
        'import-marks',
117
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
118
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
119
120
    def note(self, msg, *args):
121
        """Output a note but timestamp it."""
122
        msg = "%s %s" % (self._time_of_day(), msg)
123
        note(msg, *args)
124
125
    def warning(self, msg, *args):
126
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
127
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
128
        warning(msg, *args)
129
0.64.67 by James Westby
Add support for -Dfast-import.
130
    def debug(self, mgs, *args):
131
        """Output a debug message if the appropriate -D option was given."""
132
        if "fast-import" in debug.debug_flags:
133
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
134
            mutter(msg, *args)
135
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
136
    def _time_of_day(self):
137
        """Time of day as a string."""
138
        # Note: this is a separate method so tests can patch in a fixed value
139
        return time.strftime("%H:%M:%S")
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
140
141
    def _import_marks(self, filename):
142
        try:
143
            f = file(filename)
144
        except IOError:
145
            self.warning("Could not open import-marks file, not importing marks")
146
            return
147
148
        firstline = f.readline()
149
        match = re.match(r'^format=(\d+)$', firstline)
150
151
        if not match:
152
            print >>sys.stderr, "%r doesn't look like a mark file" % (filename,)
153
            sys.exit(1)
154
        elif match.group(1) != '1':
155
            print >>sys.stderr, 'format version in mark file not supported'
156
            sys.exit(1)
157
158
        for string in f.readline().rstrip('\n').split('\0'):
159
            if not string:
160
                continue
161
            name, integer = string.rsplit('.', 1)
162
            # We really can't do anything with the branch information, so we
163
            # just skip it
164
            
165
        self.cache_mgr.revision_ids = {}
166
        for line in f:
167
            line = line.rstrip('\n')
168
            mark, revid = line.split(' ', 1)
169
170
            self.cache_mgr.revision_ids[mark] = revid
0.64.67 by James Westby
Add support for -Dfast-import.
171
    
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
172
    def export_marks(self, filename):
173
        f = file(filename, 'w')
174
        f.write('format=1\n')
175
176
        f.write('\0tmp.0\n')
177
178
        for mark, revid in self.cache_mgr.revision_ids.iteritems():
179
            f.write('%s %s\n' % (mark, revid))
180
181
        f.close()
182
        
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
183
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
184
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
185
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
186
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
187
            self.inventory_cache_size)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
188
        
189
        if self.params.get("import-marks"):
190
            self._import_marks(self.params.get("import-marks"))
191
            self.skip_total = False
192
            self._revision_count = 0
193
            self.first_incremental_commit = True
194
        else:
195
            self.first_incremental_commit = False
196
            self.skip_total = self._init_id_map()
197
198
            if self.skip_total:
199
                self.note("Found %d commits already loaded - "
200
                    "skipping over these ...", self.skip_total)
201
            self._revision_count = 0
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
202
203
        # mapping of tag name to revision_id
204
        self.tags = {}
205
206
        # Create the revision loader needed for committing
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
207
        if self._experimental:
208
            loader_factory = revisionloader.ExperimentalRevisionLoader
209
        else:
210
            loader_factory = revisionloader.ImportRevisionLoader
211
        self.loader = loader_factory(self.repo, self.inventory_cache_size)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
212
0.64.51 by Ian Clatworthy
disable autopacking
213
        # Disable autopacking if the repo format supports it.
214
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
215
        if isinstance(self.repo, pack_repo.KnitPackRepository):
216
            self._original_max_pack_count = \
217
                self.repo._pack_collection._max_pack_count
218
            def _max_pack_count_for_import(total_revisions):
219
                return total_revisions + 1
220
            self.repo._pack_collection._max_pack_count = \
221
                _max_pack_count_for_import
222
        else:
223
            self._original_max_pack_count = None
224
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
225
        # Create a write group. This is committed at the end of the import.
226
        # Checkpointing closes the current one and starts a new one.
227
        self.repo.start_write_group()
228
229
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
230
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
231
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
232
        # This is currently hard-coded but might be configurable via
233
        # parameters one day if that's needed
234
        repo_transport = self.repo.control_files._transport
235
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
236
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
237
        # Load the info file, if any
238
        info_path = self.params.get('info')
239
        if info_path is not None:
240
            self.info = configobj.ConfigObj(info_path)
241
        else:
242
            self.info = None
243
0.64.41 by Ian Clatworthy
update multiple working trees if requested
244
        # Decide how often to automatically report progress
245
        # (not a parameter yet)
246
        self.progress_every = _DEFAULT_AUTO_PROGRESS
247
        if self.verbose:
248
            self.progress_every = self.progress_every / 10
249
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
250
        # Decide how often to automatically checkpoint
251
        self.checkpoint_every = int(self.params.get('checkpoint',
252
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
253
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
254
        # Decide how big to make the inventory cache
255
        self.inventory_cache_size = int(self.params.get('inv-cache',
256
            _DEFAULT_INV_CACHE_SIZE))
257
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
258
        # Find the maximum number of commits to import (None means all)
259
        # and prepare progress reporting. Just in case the info file
260
        # has an outdated count of commits, we store the max counts
261
        # at which we need to terminate separately to the total used
262
        # for progress tracking.
263
        try:
264
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
265
            if self.max_commits < 0:
266
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
267
        except KeyError:
268
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
269
        if self.info is not None:
270
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
271
            if (self.max_commits is not None and
272
                self.total_commits > self.max_commits):
273
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
274
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
275
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
276
0.64.27 by Ian Clatworthy
1st cut at performance tuning
277
    def _process(self, command_iter):
278
        # if anything goes wrong, abort the write group if any
279
        try:
280
            processor.ImportProcessor._process(self, command_iter)
281
        except:
282
            if self.repo is not None and self.repo.is_in_write_group():
283
                self.repo.abort_write_group()
284
            raise
285
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
286
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
287
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
288
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
289
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
290
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
291
        if self.params.get("export-marks"):
292
            self.export_marks(self.params.get("export-marks"))
293
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
294
        # Update the branches
295
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
296
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
297
            helpers.invert_dict(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
298
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
299
        branches_updated, branches_lost = updater.update()
300
        self._branch_count = len(branches_updated)
301
302
        # Tell the user about branches that were not created
303
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
304
            if not self.repo.is_shared():
305
                self.warning("Cannot import multiple branches into "
306
                    "an unshared repository")
307
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
308
            for lost_info in branches_lost:
309
                head_revision = lost_info[1]
310
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
311
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
312
313
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
314
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
315
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
316
        if self._branch_count == 0:
317
            self.note("no branches to update")
318
            self.note("no working trees to update")
319
            remind_about_update = False
320
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
321
            trees = self._get_working_trees(branches_updated)
322
            if trees:
323
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
324
                if self.verbose:
325
                    report = delta._ChangeReporter()
326
                else:
327
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
328
                for wt in trees:
329
                    wt.update(reporter)
330
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
331
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
332
            else:
333
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
334
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
335
336
        # Finish up by telling the user what to do next.
337
        if self._original_max_pack_count:
338
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
339
            # checkpoint instead. We now pack the repository to optimise
340
            # how data is stored.
341
            if self._revision_count > self.checkpoint_every:
342
                self.note("Packing repository ...")
343
                self.repo.pack()
344
                # To be conservative, packing puts the old packs and
345
                # indices in obsolete_packs. We err on the side of
346
                # optimism and clear out that directory to save space.
347
                self.note("Removing obsolete packs ...")
348
                # TODO: Use a public API for this once one exists
349
                repo_transport = self.repo._pack_collection.transport
350
                repo_transport.clone('obsolete_packs').delete_multi(
351
                    repo_transport.list_dir('obsolete_packs'))
0.64.34 by Ian Clatworthy
report lost branches
352
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
353
            # This message is explicitly not timestamped.
0.64.51 by Ian Clatworthy
disable autopacking
354
            note("To refresh the working tree for a branch, "
355
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
356
357
    def _get_working_trees(self, branches):
358
        """Get the working trees for branches in the repository."""
359
        result = []
360
        wt_expected = self.repo.make_working_trees()
361
        for br in branches:
362
            if br == self.branch and br is not None:
363
                wt = self.working_tree
364
            elif wt_expected:
365
                try:
366
                    wt = br.bzrdir.open_workingtree()
367
                except errors.NoWorkingTree:
368
                    self.warning("No working tree for branch %s", br)
369
                    continue
370
            else:
371
                continue
372
            result.append(wt)
373
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
374
375
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
376
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
377
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
378
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
379
        wtc = self._tree_count
380
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
381
            rc, helpers.single_plural(rc, "revision", "revisions"),
382
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
383
            wtc, helpers.single_plural(wtc, "tree", "trees"),
384
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
385
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
386
    def _init_id_map(self):
387
        """Load the id-map and check it matches the repository.
388
        
389
        :return: the number of entries in the map
390
        """
391
        # Currently, we just check the size. In the future, we might
392
        # decide to be more paranoid and check that the revision-ids
393
        # are identical as well.
394
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
395
            self.id_map_path)
396
        existing_count = len(self.repo.all_revision_ids())
397
        if existing_count != known:
398
            raise plugin_errors.BadRepositorySize(known, existing_count)
399
        return known
400
401
    def _save_id_map(self):
402
        """Save the id-map."""
403
        # Save the whole lot every time. If this proves a problem, we can
404
        # change to 'append just the new ones' at a later time.
405
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
406
0.64.5 by Ian Clatworthy
first cut at generic processing method
407
    def blob_handler(self, cmd):
408
        """Process a BlobCommand."""
409
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
410
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
411
        else:
412
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
413
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
414
415
    def checkpoint_handler(self, cmd):
416
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
417
        # Commit the current write group and start a new one
418
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
419
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
420
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
421
422
    def commit_handler(self, cmd):
423
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
424
        if self.skip_total and self._revision_count < self.skip_total:
425
            _track_heads(cmd, self.cache_mgr)
426
            # Check that we really do know about this commit-id
427
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
428
                raise plugin_errors.BadRestart(cmd.id)
429
            # Consume the file commands and free any non-sticky blobs
430
            for fc in cmd.file_iter():
431
                pass
432
            self.cache_mgr._blobs = {}
433
            self._revision_count += 1
434
            # If we're finished getting back to where we were,
435
            # load the file-ids cache
436
            if self._revision_count == self.skip_total:
437
                self._gen_file_ids_cache()
438
                self.note("Generated the file-ids cache - %d entries",
439
                    len(self.cache_mgr.file_ids.keys()))
440
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
441
        if self.first_incremental_commit:
442
            self.first_incremental_commit = None
443
            parents = _track_heads(cmd, self.cache_mgr)
444
            self._gen_file_ids_cache(parents)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
445
446
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
447
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
448
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
449
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
450
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
451
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
452
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
453
454
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
455
        if (self.max_commits is not None and
456
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
457
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
458
            self.finished = True
459
        elif self._revision_count % self.checkpoint_every == 0:
460
            self.note("%d commits - automatic checkpoint triggered",
461
                self._revision_count)
462
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
463
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
464
    def _gen_file_ids_cache(self, revs = False):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
465
        """Generate the file-id cache by searching repository inventories.
466
        """
467
        # Get the interesting revisions - the heads
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
468
        if revs:
469
            head_ids = revs
470
        else:
471
            head_ids = self.cache_mgr.heads.keys()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
472
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
473
474
        # Update the fileid cache
475
        file_ids = {}
476
        for revision_id in revision_ids:
477
            inv = self.repo.revision_tree(revision_id).inventory
478
            # Cache the inventoires while we're at it
479
            self.cache_mgr.inventories[revision_id] = inv
480
            for path, ie in inv.iter_entries():
481
                file_ids[path] = ie.file_id
482
        self.cache_mgr.file_ids = file_ids
483
0.64.25 by Ian Clatworthy
slightly better progress reporting
484
    def report_progress(self, details=''):
485
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
486
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
487
            if self.total_commits is not None:
488
                counts = "%d/%d" % (self._revision_count, self.total_commits)
489
                eta = progress.get_eta(self._start_time, self._revision_count,
490
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
491
                eta_str = progress.str_tdelta(eta)
492
                if eta_str.endswith('--'):
493
                    eta_str = ''
494
                else:
495
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
496
            else:
497
                counts = "%d" % (self._revision_count,)
498
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
499
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
500
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
501
    def progress_handler(self, cmd):
502
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
503
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
504
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
505
506
    def reset_handler(self, cmd):
507
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
508
        if cmd.ref.startswith('refs/tags/'):
509
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
510
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
511
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
512
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
513
514
    def tag_handler(self, cmd):
515
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
516
        self._set_tag(cmd.id, cmd.from_)
517
518
    def _set_tag(self, name, from_):
519
        """Define a tag given a name an import 'from' reference."""
520
        bzr_tag_name = name.decode('utf-8', 'replace')
521
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
522
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
523
524
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
525
class GenericCacheManager(object):
526
    """A manager of caches for the GenericProcessor."""
527
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
528
    def __init__(self, info, verbose=False, inventory_cache_size=10):
529
        """Create a manager of caches.
530
531
        :param info: a ConfigObj holding the output from
532
            the --info processor, or None if no hints are available
533
        """
534
        self.verbose = verbose
535
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
536
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
537
        # Sticky blobs aren't removed after being referenced.
538
        self._blobs = {}
539
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
540
541
        # revision-id -> Inventory cache
542
        # these are large and we probably don't need too many as
543
        # most parents are recent in history
544
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
545
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
546
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
547
        # we need to keep all of these but they are small
548
        self.revision_ids = {}
549
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
550
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
551
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
552
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
553
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
554
        self.last_ref = None
555
        self.last_ids = {}
556
        self.heads = {}
557
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
558
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
559
        self._blobs_to_keep = None
560
        if info is not None:
561
            try:
562
                self._blobs_to_keep = info['Blob usage tracking']['multi']
563
            except KeyError:
564
                # info not in file - possible when no blobs used
565
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
566
567
    def store_blob(self, id, data):
568
        """Store a blob of data."""
569
        if (self._blobs_to_keep is None or data == '' or
570
            id in self._blobs_to_keep):
571
            self._sticky_blobs[id] = data
572
        else:
573
            self._blobs[id] = data
574
575
    def fetch_blob(self, id):
576
        """Fetch a blob of data."""
577
        try:
578
            return self._sticky_blobs[id]
579
        except KeyError:
580
            return self._blobs.pop(id)
581
0.64.16 by Ian Clatworthy
safe processing tweaks
582
    def _delete_path(self, path):
583
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
584
        # we actually want to remember what file-id we gave a path,
585
        # even when that file is deleted, so doing nothing is correct
586
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
587
588
    def _rename_path(self, old_path, new_path):
589
        """Rename a path in the caches."""
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
590
        # In this case, we need to forget the file-id we gave a path,
591
        # otherwise, we'll get duplicate file-ids in the repository.
0.64.16 by Ian Clatworthy
safe processing tweaks
592
        self.file_ids[new_path] = self.file_ids[old_path]
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
593
        del self.file_ids[old_path]
0.64.16 by Ian Clatworthy
safe processing tweaks
594
595
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
596
def _track_heads(cmd, cache_mgr):
597
    """Track the repository heads given a CommitCommand.
598
    
599
    :return: the list of parents in terms of commit-ids
600
    """
601
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
602
    if cmd.from_ is not None:
603
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
604
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
605
        last_id = cache_mgr.last_ids.get(cmd.ref)
606
        if last_id is not None:
607
            parents = [last_id]
608
        else:
609
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
610
    parents.extend(cmd.merges)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
611
    # Track the heads
612
    for parent in parents:
613
        try:
614
            del cache_mgr.heads[parent]
615
        except KeyError:
616
            # it's ok if the parent isn't there - another
617
            # commit may have already removed it
618
            pass
619
    cache_mgr.heads[cmd.id] = cmd.ref
620
    cache_mgr.last_ids[cmd.ref] = cmd.id
621
    cache_mgr.last_ref = cmd.ref
622
    return parents
623
624
0.64.5 by Ian Clatworthy
first cut at generic processing method
625
class GenericCommitHandler(processor.CommitHandler):
626
0.64.48 by Ian Clatworthy
one revision loader instance
627
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
628
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
629
        processor.CommitHandler.__init__(self, command)
630
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
631
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
632
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
633
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
634
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
635
0.64.43 by Ian Clatworthy
verbose mode cleanup
636
    def note(self, msg, *args):
637
        """Output a note but add context."""
638
        msg = "%s (%s)" % (msg, self.command.id)
639
        note(msg, *args)
640
641
    def warning(self, msg, *args):
642
        """Output a warning but add context."""
643
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
644
        warning(msg, *args)
645
0.64.67 by James Westby
Add support for -Dfast-import.
646
    def debug(self, msg, *args):
647
        """Output a mutter if the appropriate -D option was given."""
648
        if "fast-import" in debug.debug_flags:
649
            msg = "%s (%s)" % (msg, self.command.id)
650
            mutter(msg, *args)
651
0.64.5 by Ian Clatworthy
first cut at generic processing method
652
    def pre_process_files(self):
653
        """Prepare for committing."""
654
        self.revision_id = self.gen_revision_id()
655
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
656
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
657
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
658
        # Track the heads and get the real parent list
659
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
660
0.64.14 by Ian Clatworthy
commit of modified files working
661
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
662
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
663
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
664
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
665
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
666
            self.parents = []
0.64.67 by James Westby
Add support for -Dfast-import.
667
        self.debug("revision parents are %s", str(self.parents))
0.64.7 by Ian Clatworthy
start of multiple commit handling
668
0.64.14 by Ian Clatworthy
commit of modified files working
669
        # Seed the inventory from the previous one
670
        if len(self.parents) == 0:
671
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
672
        else:
673
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
674
            inv = self.get_inventory(self.parents[0])
675
            # TODO: Shallow copy - deep inventory copying is expensive
676
            self.inventory = inv.copy()
0.64.13 by Ian Clatworthy
commit of new files working
677
        if not self.repo.supports_rich_root():
678
            # In this repository, root entries have no knit or weave. When
679
            # serializing out to disk and back in, root.revision is always
680
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
681
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
682
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
683
        # directory-path -> inventory-entry for current inventory
684
        self.directory_entries = dict(self.inventory.directories())
685
0.64.14 by Ian Clatworthy
commit of modified files working
686
    def post_process_files(self):
687
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
688
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
689
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
690
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
691
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
692
        committer = self.command.committer
693
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
694
        author = self.command.author
695
        if author is not None:
696
            author_id = "%s <%s>" % (author[0],author[1])
697
            if author_id != who:
698
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
699
        rev = revision.Revision(
700
           timestamp=committer[2],
701
           timezone=committer[3],
702
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
703
           message=self._escape_commit_message(self.command.message),
704
           revision_id=self.revision_id,
705
           properties=rev_props,
706
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
707
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
708
            lambda file_id: self._get_lines(file_id),
709
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
710
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
711
    def _escape_commit_message(self, message):
712
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
713
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
714
        # Code copied from bzrlib.commit.
715
        
716
        # Python strings can include characters that can't be
717
        # represented in well-formed XML; escape characters that
718
        # aren't listed in the XML specification
719
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
720
        message, _ = re.subn(
721
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
722
            lambda match: match.group(0).encode('unicode_escape'),
723
            message)
724
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
725
726
    def modify_handler(self, filecmd):
727
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
728
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
729
        else:
730
            data = filecmd.data
0.64.67 by James Westby
Add support for -Dfast-import.
731
        self.debug("modifying %s", filecmd.path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
732
        self._modify_inventory(filecmd.path, filecmd.kind,
733
            filecmd.is_executable, data)
734
735
    def delete_handler(self, filecmd):
736
        path = filecmd.path
0.64.67 by James Westby
Add support for -Dfast-import.
737
        self.debug("deleting %s", path)
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
738
        fileid = self.bzr_file_id(path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
739
        try:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
740
            del self.inventory[fileid]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
741
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
742
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
743
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
744
            self._warn_unless_in_merges(fileid, path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
745
        try:
746
            self.cache_mgr._delete_path(path)
747
        except KeyError:
748
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
749
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
750
    def _warn_unless_in_merges(self, fileid, path):
751
        if len(self.parents) <= 1:
752
            return
753
        for parent in self.parents[1:]:
754
            if fileid in self.get_inventory(parent):
755
                return
756
        self.warning("ignoring delete of %s as not in parent inventories", path)
757
0.64.5 by Ian Clatworthy
first cut at generic processing method
758
    def copy_handler(self, filecmd):
759
        raise NotImplementedError(self.copy_handler)
760
761
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
762
        old_path = filecmd.old_path
763
        new_path = filecmd.new_path
0.64.67 by James Westby
Add support for -Dfast-import.
764
        self.debug("renaming %s to %s", old_path, new_path)
0.64.16 by Ian Clatworthy
safe processing tweaks
765
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
766
        basename, new_parent_ie = self._ensure_directory(new_path)
767
        new_parent_id = new_parent_ie.file_id
0.64.67 by James Westby
Add support for -Dfast-import.
768
        existing_id = self.inventory.path2id(new_path)
769
        if existing_id is not None:
770
            self.inventory.remove_recursive_id(existing_id)
0.65.4 by James Westby
Make the rename handling more robust.
771
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
772
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
773
774
    def deleteall_handler(self, filecmd):
775
        raise NotImplementedError(self.deleteall_handler)
776
0.64.16 by Ian Clatworthy
safe processing tweaks
777
    def bzr_file_id_and_new(self, path):
778
        """Get a Bazaar file identifier and new flag for a path.
779
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
780
        :return: file_id, is_new where
781
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
782
        """
783
        try:
0.64.67 by James Westby
Add support for -Dfast-import.
784
            id = self.cache_mgr.file_ids[path]
785
            return id, False
0.64.16 by Ian Clatworthy
safe processing tweaks
786
        except KeyError:
787
            id = generate_ids.gen_file_id(path)
788
            self.cache_mgr.file_ids[path] = id
0.64.67 by James Westby
Add support for -Dfast-import.
789
            self.debug("Generated new file id %s for '%s'", id, path)
0.64.16 by Ian Clatworthy
safe processing tweaks
790
            return id, True
791
0.64.5 by Ian Clatworthy
first cut at generic processing method
792
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
793
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
794
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
795
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
796
    def gen_initial_inventory(self):
797
        """Generate an inventory for a parentless revision."""
798
        inv = inventory.Inventory(revision_id=self.revision_id)
799
        return inv
800
0.64.5 by Ian Clatworthy
first cut at generic processing method
801
    def gen_revision_id(self):
802
        """Generate a revision id.
803
804
        Subclasses may override this to produce deterministic ids say.
805
        """
806
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
807
        # Perhaps 'who' being the person running the import is ok? If so,
808
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
809
        who = "%s <%s>" % (committer[0],committer[1])
810
        timestamp = committer[2]
811
        return generate_ids.gen_revision_id(who, timestamp)
812
0.64.7 by Ian Clatworthy
start of multiple commit handling
813
    def get_inventory(self, revision_id):
814
        """Get the inventory for a revision id."""
815
        try:
816
            inv = self.cache_mgr.inventories[revision_id]
817
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
818
            if self.verbose:
819
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
820
            # Not cached so reconstruct from repository
821
            inv = self.repo.revision_tree(revision_id).inventory
822
            self.cache_mgr.inventories[revision_id] = inv
823
        return inv
824
0.64.5 by Ian Clatworthy
first cut at generic processing method
825
    def _get_inventories(self, revision_ids):
826
        """Get the inventories for revision-ids.
827
        
828
        This is a callback used by the RepositoryLoader to
829
        speed up inventory reconstruction."""
830
        present = []
831
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
832
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
833
        # successfully loaded into the repsoitory
834
        for revision_id in revision_ids:
835
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
836
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
837
                present.append(revision_id)
838
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
839
                if self.verbose:
840
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
841
                # Not cached so reconstruct from repository
842
                if self.repo.has_revision(revision_id):
843
                    rev_tree = self.repo.revision_tree(revision_id)
844
                    present.append(revision_id)
845
                else:
846
                    rev_tree = self.repo.revision_tree(None)
847
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
848
                self.cache_mgr.inventories[revision_id] = inv
849
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
850
        return present, inventories
851
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
852
    def _get_lines(self, file_id):
853
        """Get the lines for a file-id."""
854
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
855
856
    def _modify_inventory(self, path, kind, is_executable, data):
857
        """Add to or change an item in the inventory."""
858
        # Create the new InventoryEntry
859
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
860
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
861
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
862
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
863
        if isinstance(ie, inventory.InventoryFile):
864
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
865
            lines = osutils.split_lines(data)
866
            ie.text_sha1 = osutils.sha_strings(lines)
867
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
868
            self.lines_for_commit[file_id] = lines
0.64.73 by James Westby
Correct typo: InventoryLnk -> InventoryLink
869
        elif isinstance(ie, inventory.InventoryLink):
0.64.74 by Ian Clatworthy
fix symlink importing
870
            ie.symlink_target = data.encode('utf8')
871
            # There are no lines stored for a symlink so
872
            # make sure the cache used by get_lines knows that
873
            self.lines_for_commit[file_id] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
874
        else:
875
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
876
                (kind,))
877
0.64.16 by Ian Clatworthy
safe processing tweaks
878
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
879
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
880
            # HACK: no API for this (del+add does more than it needs to)
881
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
882
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
883
        else:
884
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
885
886
    def _ensure_directory(self, path):
887
        """Ensure that the containing directory exists for 'path'"""
888
        dirname, basename = osutils.split(path)
889
        if dirname == '':
890
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
891
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
892
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
893
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
894
        except KeyError:
895
            # We will create this entry, since it doesn't exist
896
            pass
897
        else:
898
            return basename, ie
899
900
        # No directory existed, we will just create one, first, make sure
901
        # the parent exists
902
        dir_basename, parent_ie = self._ensure_directory(dirname)
903
        dir_file_id = self.bzr_file_id(dirname)
904
        ie = inventory.entry_factory['directory'](dir_file_id,
905
                                                  dir_basename,
906
                                                  parent_ie.file_id)
907
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
908
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
909
        # There are no lines stored for a directory so
910
        # make sure the cache used by get_lines knows that
911
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
912
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
913
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
914
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
915
916
0.64.34 by Ian Clatworthy
report lost branches
917
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
918
0.64.64 by Ian Clatworthy
save tags known about in each branch
919
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
920
        """Create an object responsible for updating branches.
921
922
        :param heads_by_ref: a dictionary where
923
          names are git-style references like refs/heads/master;
924
          values are one item lists of commits marks.
925
        """
0.64.37 by Ian Clatworthy
create branches as required
926
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
927
        self.branch = branch
928
        self.cache_mgr = cache_mgr
929
        self.heads_by_ref = heads_by_ref
930
        self.last_ref = last_ref
0.64.64 by Ian Clatworthy
save tags known about in each branch
931
        self.tags = tags
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
932
933
    def update(self):
934
        """Update the Bazaar branches and tips matching the heads.
935
936
        If the repository is shared, this routine creates branches
937
        as required. If it isn't, warnings are produced about the
938
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
939
0.64.34 by Ian Clatworthy
report lost branches
940
        :return: updated, lost_heads where
941
          updated = the list of branches updated
942
          lost_heads = a list of (bazaar-name,revision) for branches that
943
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
944
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
945
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
946
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
947
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
948
            if self._update_branch(br, tip):
949
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
950
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
951
952
    def _get_matching_branches(self):
953
        """Get the Bazaar branches.
954
0.64.34 by Ian Clatworthy
report lost branches
955
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
956
          default_tip = the last commit mark for the default branch
957
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
958
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
959
            would have been created had the repository been shared and
960
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
961
        """
0.64.37 by Ian Clatworthy
create branches as required
962
        branch_tips = []
963
        lost_heads = []
964
        ref_names = self.heads_by_ref.keys()
965
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
966
            trunk = self.select_trunk(ref_names)
967
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
968
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
969
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
970
971
        # Convert the reference names into Bazaar speak
972
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
973
0.64.37 by Ian Clatworthy
create branches as required
974
        # Policy for locating branches
975
        def dir_under_current(name, ref_name):
976
            # Using the Bazaar name, get a directory under the current one
977
            return name
978
        def dir_sister_branch(name, ref_name):
979
            # Using the Bazaar name, get a sister directory to the branch
980
            return osutils.pathjoin(self.branch.base, "..", name)
981
        if self.branch is not None:
982
            dir_policy = dir_sister_branch
983
        else:
984
            dir_policy = dir_under_current
985
0.64.34 by Ian Clatworthy
report lost branches
986
        # Create/track missing branches
987
        shared_repo = self.repo.is_shared()
988
        for name in sorted(bzr_names.keys()):
989
            ref_name = bzr_names[name]
990
            tip = self.heads_by_ref[ref_name][0]
991
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
992
                location = dir_policy(name, ref_name)
993
                try:
994
                    br = self.make_branch(location)
995
                    branch_tips.append((br,tip))
996
                    continue
997
                except errors.BzrError, ex:
998
                    error("ERROR: failed to create branch %s: %s",
999
                        location, ex)
1000
            lost_head = self.cache_mgr.revision_ids[tip]
1001
            lost_info = (name, lost_head)
1002
            lost_heads.append(lost_info)
1003
        return branch_tips, lost_heads
1004
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1005
    def select_trunk(self, ref_names):
1006
        """Given a set of ref names, choose one as the trunk."""
1007
        for candidate in ['refs/heads/master']:
1008
            if candidate in ref_names:
1009
                return candidate
1010
        # Use the last reference in the import stream
1011
        return self.last_ref
1012
0.64.37 by Ian Clatworthy
create branches as required
1013
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1014
        """Make a branch in the repository if not already there."""
1015
        try:
1016
            return bzrdir.BzrDir.open(location).open_branch()
1017
        except errors.NotBranchError, ex:
1018
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
1019
1020
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
1021
        """Generate Bazaar branch names from import ref names.
1022
        
1023
        :return: a dictionary with Bazaar names as keys and
1024
          the original reference names as values.
1025
        """
0.64.34 by Ian Clatworthy
report lost branches
1026
        bazaar_names = {}
1027
        for ref_name in sorted(ref_names):
1028
            parts = ref_name.split('/')
1029
            if parts[0] == 'refs':
1030
                parts.pop(0)
1031
            full_name = "--".join(parts)
1032
            bazaar_name = parts[-1]
1033
            if bazaar_name in bazaar_names:
1034
                bazaar_name = full_name
1035
            bazaar_names[bazaar_name] = ref_name
1036
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1037
1038
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1039
        """Update a branch with last revision and tag information.
1040
        
1041
        :return: whether the branch was changed or not
1042
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1043
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
0.64.64 by Ian Clatworthy
save tags known about in each branch
1044
        revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
1045
        revno = len(revs)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1046
        existing_revno, existing_last_rev_id = br.last_revision_info()
1047
        changed = False
1048
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
1049
            br.set_last_revision_info(revno, last_rev_id)
1050
            changed = True
0.64.64 by Ian Clatworthy
save tags known about in each branch
1051
        # apply tags known in this branch
1052
        my_tags = {}
1053
        if self.tags:
1054
            for tag,rev in self.tags.items():
1055
                if rev in revs:
1056
                    my_tags[tag] = rev
1057
            if my_tags:
1058
                br.tags._set_tag_dict(my_tags)
1059
                changed = True
1060
        if changed:
1061
            tagno = len(my_tags)
1062
            note("\t branch %s now has %d %s and %d %s", br.nick,
1063
                revno, helpers.single_plural(revno, "revision", "revisions"),
1064
                tagno, helpers.single_plural(tagno, "tag", "tags"))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1065
        return changed