/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.67 by James Westby
Add support for -Dfast-import.
25
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
26
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    errors,
28
    generate_ids,
29
    inventory,
30
    lru_cache,
31
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
32
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
33
    revision,
34
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
35
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
36
    )
0.64.51 by Ian Clatworthy
disable autopacking
37
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
38
from bzrlib.trace import (
0.64.67 by James Westby
Add support for -Dfast-import.
39
    error,
40
    mutter,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    note,
42
    warning,
43
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
44
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
45
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
47
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
48
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
    processor,
50
    revisionloader,
51
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
53
0.64.41 by Ian Clatworthy
update multiple working trees if requested
54
# How many commits before automatically reporting progress
55
_DEFAULT_AUTO_PROGRESS = 1000
56
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
57
# How many commits before automatically checkpointing
58
_DEFAULT_AUTO_CHECKPOINT = 10000
59
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
60
# How many inventories to cache
61
_DEFAULT_INV_CACHE_SIZE = 10
62
0.64.41 by Ian Clatworthy
update multiple working trees if requested
63
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
64
class GenericProcessor(processor.ImportProcessor):
65
    """An import processor that handles basic imports.
66
67
    Current features supported:
68
0.64.16 by Ian Clatworthy
safe processing tweaks
69
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
70
    * files and symlinks commits are supported
71
    * checkpoints automatically happen at a configurable frequency
72
      over and above the stream requested checkpoints
73
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
74
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
75
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
76
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
77
    At checkpoints and on completion, the commit-id -> revision-id map is
78
    saved to a file called 'fastimport-id-map'. If the import crashes
79
    or is interrupted, it can be started again and this file will be
80
    used to skip over already loaded revisions. The format of each line
81
    is "commit-id revision-id" so commit-ids cannot include spaces.
82
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
83
    Here are the supported parameters:
84
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
85
    * info - name of a hints file holding the analysis generated
86
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
87
      importing large repositories, this parameter is needed so
88
      that the importer knows what blobs to intelligently cache.
89
0.64.41 by Ian Clatworthy
update multiple working trees if requested
90
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
91
      By default, the importer updates the repository
92
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
93
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
94
95
    * checkpoint - automatically checkpoint every n commits over and
96
      above any checkpoints contained in the import stream.
97
      The default is 10000.
98
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
99
    * count - only import this many commits then exit. If not set
100
      or negative, all commits are imported.
101
    
102
    * inv-cache - number of inventories to cache.
103
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
104
105
    * experimental - enable experimental mode, i.e. use features
106
      not yet fully tested.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
107
    """
108
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
109
    known_params = [
110
        'info',
111
        'trees',
112
        'checkpoint',
113
        'count',
114
        'inv-cache',
115
        'experimental',
116
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
117
118
    def note(self, msg, *args):
119
        """Output a note but timestamp it."""
120
        msg = "%s %s" % (self._time_of_day(), msg)
121
        note(msg, *args)
122
123
    def warning(self, msg, *args):
124
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
125
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
126
        warning(msg, *args)
127
0.64.67 by James Westby
Add support for -Dfast-import.
128
    def debug(self, mgs, *args):
129
        """Output a debug message if the appropriate -D option was given."""
130
        if "fast-import" in debug.debug_flags:
131
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
132
            mutter(msg, *args)
133
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
134
    def _time_of_day(self):
135
        """Time of day as a string."""
136
        # Note: this is a separate method so tests can patch in a fixed value
137
        return time.strftime("%H:%M:%S")
0.64.67 by James Westby
Add support for -Dfast-import.
138
    
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
139
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
140
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
141
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
142
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
143
            self.inventory_cache_size)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
144
        self.skip_total = self._init_id_map()
145
        if self.skip_total:
146
            self.note("Found %d commits already loaded - "
147
                "skipping over these ...", self.skip_total)
148
        self._revision_count = 0
149
150
        # mapping of tag name to revision_id
151
        self.tags = {}
152
153
        # Create the revision loader needed for committing
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
154
        if self._experimental:
155
            loader_factory = revisionloader.ExperimentalRevisionLoader
156
        else:
157
            loader_factory = revisionloader.ImportRevisionLoader
158
        self.loader = loader_factory(self.repo, self.inventory_cache_size)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
159
0.64.51 by Ian Clatworthy
disable autopacking
160
        # Disable autopacking if the repo format supports it.
161
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
162
        if isinstance(self.repo, pack_repo.KnitPackRepository):
163
            self._original_max_pack_count = \
164
                self.repo._pack_collection._max_pack_count
165
            def _max_pack_count_for_import(total_revisions):
166
                return total_revisions + 1
167
            self.repo._pack_collection._max_pack_count = \
168
                _max_pack_count_for_import
169
        else:
170
            self._original_max_pack_count = None
171
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
172
        # Create a write group. This is committed at the end of the import.
173
        # Checkpointing closes the current one and starts a new one.
174
        self.repo.start_write_group()
175
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
176
        # Turn on caching for the inventory versioned file
177
        inv_vf = self.repo.get_inventory_weave()
178
        inv_vf.enable_cache()
179
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
180
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
181
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
182
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
183
        # This is currently hard-coded but might be configurable via
184
        # parameters one day if that's needed
185
        repo_transport = self.repo.control_files._transport
186
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
187
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
188
        # Load the info file, if any
189
        info_path = self.params.get('info')
190
        if info_path is not None:
191
            self.info = configobj.ConfigObj(info_path)
192
        else:
193
            self.info = None
194
0.64.41 by Ian Clatworthy
update multiple working trees if requested
195
        # Decide how often to automatically report progress
196
        # (not a parameter yet)
197
        self.progress_every = _DEFAULT_AUTO_PROGRESS
198
        if self.verbose:
199
            self.progress_every = self.progress_every / 10
200
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
201
        # Decide how often to automatically checkpoint
202
        self.checkpoint_every = int(self.params.get('checkpoint',
203
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
204
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
205
        # Decide how big to make the inventory cache
206
        self.inventory_cache_size = int(self.params.get('inv-cache',
207
            _DEFAULT_INV_CACHE_SIZE))
208
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
209
        # Find the maximum number of commits to import (None means all)
210
        # and prepare progress reporting. Just in case the info file
211
        # has an outdated count of commits, we store the max counts
212
        # at which we need to terminate separately to the total used
213
        # for progress tracking.
214
        try:
215
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
216
            if self.max_commits < 0:
217
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
218
        except KeyError:
219
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
220
        if self.info is not None:
221
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
222
            if (self.max_commits is not None and
223
                self.total_commits > self.max_commits):
224
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
225
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
226
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
227
0.64.27 by Ian Clatworthy
1st cut at performance tuning
228
    def _process(self, command_iter):
229
        # if anything goes wrong, abort the write group if any
230
        try:
231
            processor.ImportProcessor._process(self, command_iter)
232
        except:
233
            if self.repo is not None and self.repo.is_in_write_group():
234
                self.repo.abort_write_group()
235
            raise
236
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
237
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
238
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
239
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
240
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
241
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
242
        # Update the branches
243
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
244
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
245
            helpers.invert_dict(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
246
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
247
        branches_updated, branches_lost = updater.update()
248
        self._branch_count = len(branches_updated)
249
250
        # Tell the user about branches that were not created
251
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
252
            if not self.repo.is_shared():
253
                self.warning("Cannot import multiple branches into "
254
                    "an unshared repository")
255
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
256
            for lost_info in branches_lost:
257
                head_revision = lost_info[1]
258
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
259
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
260
261
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
262
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
263
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
264
        if self._branch_count == 0:
265
            self.note("no branches to update")
266
            self.note("no working trees to update")
267
            remind_about_update = False
268
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
269
            trees = self._get_working_trees(branches_updated)
270
            if trees:
271
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
272
                if self.verbose:
273
                    report = delta._ChangeReporter()
274
                else:
275
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
276
                for wt in trees:
277
                    wt.update(reporter)
278
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
279
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
280
            else:
281
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
282
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
283
284
        # Finish up by telling the user what to do next.
285
        # (These messages are explicitly not timestamped.)
286
        if self._original_max_pack_count:
287
            # We earlier disabled autopacking, creating one pack every
288
            # checkpoint instead. If we checkpointed more than 10 times,
289
            # Bazaar would have auto-packed. For massive repositories,
290
            # this can take a *very* long time so we suggest it to the user
291
            # instead of doing it implicitly.
292
            if self._revision_count >= self.checkpoint_every * 10:
293
                note("To further optimize how data is stored, use 'bzr pack'.")
0.64.34 by Ian Clatworthy
report lost branches
294
        if remind_about_update:
0.64.51 by Ian Clatworthy
disable autopacking
295
            note("To refresh the working tree for a branch, "
296
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
297
298
    def _get_working_trees(self, branches):
299
        """Get the working trees for branches in the repository."""
300
        result = []
301
        wt_expected = self.repo.make_working_trees()
302
        for br in branches:
303
            if br == self.branch and br is not None:
304
                wt = self.working_tree
305
            elif wt_expected:
306
                try:
307
                    wt = br.bzrdir.open_workingtree()
308
                except errors.NoWorkingTree:
309
                    self.warning("No working tree for branch %s", br)
310
                    continue
311
            else:
312
                continue
313
            result.append(wt)
314
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
315
316
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
317
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
318
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
319
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
320
        wtc = self._tree_count
321
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
322
            rc, helpers.single_plural(rc, "revision", "revisions"),
323
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
324
            wtc, helpers.single_plural(wtc, "tree", "trees"),
325
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
326
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
327
    def _init_id_map(self):
328
        """Load the id-map and check it matches the repository.
329
        
330
        :return: the number of entries in the map
331
        """
332
        # Currently, we just check the size. In the future, we might
333
        # decide to be more paranoid and check that the revision-ids
334
        # are identical as well.
335
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
336
            self.id_map_path)
337
        existing_count = len(self.repo.all_revision_ids())
338
        if existing_count != known:
339
            raise plugin_errors.BadRepositorySize(known, existing_count)
340
        return known
341
342
    def _save_id_map(self):
343
        """Save the id-map."""
344
        # Save the whole lot every time. If this proves a problem, we can
345
        # change to 'append just the new ones' at a later time.
346
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
347
0.64.5 by Ian Clatworthy
first cut at generic processing method
348
    def blob_handler(self, cmd):
349
        """Process a BlobCommand."""
350
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
351
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
352
        else:
353
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
354
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
355
356
    def checkpoint_handler(self, cmd):
357
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
358
        # Commit the current write group and start a new one
359
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
360
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
361
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
362
363
    def commit_handler(self, cmd):
364
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
365
        if self.skip_total and self._revision_count < self.skip_total:
366
            _track_heads(cmd, self.cache_mgr)
367
            # Check that we really do know about this commit-id
368
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
369
                raise plugin_errors.BadRestart(cmd.id)
370
            # Consume the file commands and free any non-sticky blobs
371
            for fc in cmd.file_iter():
372
                pass
373
            self.cache_mgr._blobs = {}
374
            self._revision_count += 1
375
            # If we're finished getting back to where we were,
376
            # load the file-ids cache
377
            if self._revision_count == self.skip_total:
378
                self._gen_file_ids_cache()
379
                self.note("Generated the file-ids cache - %d entries",
380
                    len(self.cache_mgr.file_ids.keys()))
381
            return
382
383
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
384
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
385
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
386
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
387
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
388
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
389
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
390
391
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
392
        if (self.max_commits is not None and
393
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
394
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
395
            self.finished = True
396
        elif self._revision_count % self.checkpoint_every == 0:
397
            self.note("%d commits - automatic checkpoint triggered",
398
                self._revision_count)
399
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
400
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
401
    def _gen_file_ids_cache(self):
402
        """Generate the file-id cache by searching repository inventories.
403
        """
404
        # Get the interesting revisions - the heads
405
        head_ids = self.cache_mgr.heads.keys()
406
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
407
408
        # Update the fileid cache
409
        file_ids = {}
410
        for revision_id in revision_ids:
411
            inv = self.repo.revision_tree(revision_id).inventory
412
            # Cache the inventoires while we're at it
413
            self.cache_mgr.inventories[revision_id] = inv
414
            for path, ie in inv.iter_entries():
415
                file_ids[path] = ie.file_id
416
        self.cache_mgr.file_ids = file_ids
417
0.64.25 by Ian Clatworthy
slightly better progress reporting
418
    def report_progress(self, details=''):
419
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
420
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
421
            if self.total_commits is not None:
422
                counts = "%d/%d" % (self._revision_count, self.total_commits)
423
                eta = progress.get_eta(self._start_time, self._revision_count,
424
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
425
                eta_str = progress.str_tdelta(eta)
426
                if eta_str.endswith('--'):
427
                    eta_str = ''
428
                else:
429
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
430
            else:
431
                counts = "%d" % (self._revision_count,)
432
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
433
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
434
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
435
    def progress_handler(self, cmd):
436
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
437
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
438
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
439
440
    def reset_handler(self, cmd):
441
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
442
        if cmd.ref.startswith('refs/tags/'):
443
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
444
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
445
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
446
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
447
448
    def tag_handler(self, cmd):
449
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
450
        self._set_tag(cmd.id, cmd.from_)
451
452
    def _set_tag(self, name, from_):
453
        """Define a tag given a name an import 'from' reference."""
454
        bzr_tag_name = name.decode('utf-8', 'replace')
455
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
456
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
457
458
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
459
class GenericCacheManager(object):
460
    """A manager of caches for the GenericProcessor."""
461
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
462
    def __init__(self, info, verbose=False, inventory_cache_size=10):
463
        """Create a manager of caches.
464
465
        :param info: a ConfigObj holding the output from
466
            the --info processor, or None if no hints are available
467
        """
468
        self.verbose = verbose
469
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
470
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
471
        # Sticky blobs aren't removed after being referenced.
472
        self._blobs = {}
473
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
474
475
        # revision-id -> Inventory cache
476
        # these are large and we probably don't need too many as
477
        # most parents are recent in history
478
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
479
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
480
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
481
        # we need to keep all of these but they are small
482
        self.revision_ids = {}
483
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
484
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
485
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
486
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
487
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
488
        self.last_ref = None
489
        self.last_ids = {}
490
        self.heads = {}
491
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
492
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
493
        self._blobs_to_keep = None
494
        if info is not None:
495
            try:
496
                self._blobs_to_keep = info['Blob usage tracking']['multi']
497
            except KeyError:
498
                # info not in file - possible when no blobs used
499
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
500
501
    def store_blob(self, id, data):
502
        """Store a blob of data."""
503
        if (self._blobs_to_keep is None or data == '' or
504
            id in self._blobs_to_keep):
505
            self._sticky_blobs[id] = data
506
        else:
507
            self._blobs[id] = data
508
509
    def fetch_blob(self, id):
510
        """Fetch a blob of data."""
511
        try:
512
            return self._sticky_blobs[id]
513
        except KeyError:
514
            return self._blobs.pop(id)
515
0.64.16 by Ian Clatworthy
safe processing tweaks
516
    def _delete_path(self, path):
517
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
518
        # we actually want to remember what file-id we gave a path,
519
        # even when that file is deleted, so doing nothing is correct
520
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
521
522
    def _rename_path(self, old_path, new_path):
523
        """Rename a path in the caches."""
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
524
        # In this case, we need to forget the file-id we gave a path,
525
        # otherwise, we'll get duplicate file-ids in the repository.
0.64.16 by Ian Clatworthy
safe processing tweaks
526
        self.file_ids[new_path] = self.file_ids[old_path]
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
527
        del self.file_ids[old_path]
0.64.16 by Ian Clatworthy
safe processing tweaks
528
529
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
530
def _track_heads(cmd, cache_mgr):
531
    """Track the repository heads given a CommitCommand.
532
    
533
    :return: the list of parents in terms of commit-ids
534
    """
535
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
536
    if cmd.from_ is not None:
537
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
538
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
539
        last_id = cache_mgr.last_ids.get(cmd.ref)
540
        if last_id is not None:
541
            parents = [last_id]
542
        else:
543
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
544
    parents.extend(cmd.merges)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
545
    # Track the heads
546
    for parent in parents:
547
        try:
548
            del cache_mgr.heads[parent]
549
        except KeyError:
550
            # it's ok if the parent isn't there - another
551
            # commit may have already removed it
552
            pass
553
    cache_mgr.heads[cmd.id] = cmd.ref
554
    cache_mgr.last_ids[cmd.ref] = cmd.id
555
    cache_mgr.last_ref = cmd.ref
556
    return parents
557
558
0.64.5 by Ian Clatworthy
first cut at generic processing method
559
class GenericCommitHandler(processor.CommitHandler):
560
0.64.48 by Ian Clatworthy
one revision loader instance
561
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
562
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
563
        processor.CommitHandler.__init__(self, command)
564
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
565
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
566
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
567
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
568
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
569
0.64.43 by Ian Clatworthy
verbose mode cleanup
570
    def note(self, msg, *args):
571
        """Output a note but add context."""
572
        msg = "%s (%s)" % (msg, self.command.id)
573
        note(msg, *args)
574
575
    def warning(self, msg, *args):
576
        """Output a warning but add context."""
577
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
578
        warning(msg, *args)
579
0.64.67 by James Westby
Add support for -Dfast-import.
580
    def debug(self, msg, *args):
581
        """Output a mutter if the appropriate -D option was given."""
582
        if "fast-import" in debug.debug_flags:
583
            msg = "%s (%s)" % (msg, self.command.id)
584
            mutter(msg, *args)
585
0.64.5 by Ian Clatworthy
first cut at generic processing method
586
    def pre_process_files(self):
587
        """Prepare for committing."""
588
        self.revision_id = self.gen_revision_id()
589
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
590
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
591
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
592
        # Track the heads and get the real parent list
593
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
594
0.64.14 by Ian Clatworthy
commit of modified files working
595
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
596
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
597
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
598
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
599
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
600
            self.parents = []
0.64.67 by James Westby
Add support for -Dfast-import.
601
        self.debug("revision parents are %s", str(self.parents))
0.64.7 by Ian Clatworthy
start of multiple commit handling
602
0.64.14 by Ian Clatworthy
commit of modified files working
603
        # Seed the inventory from the previous one
604
        if len(self.parents) == 0:
605
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
606
        else:
607
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
608
            inv = self.get_inventory(self.parents[0])
609
            # TODO: Shallow copy - deep inventory copying is expensive
610
            self.inventory = inv.copy()
0.64.13 by Ian Clatworthy
commit of new files working
611
        if not self.repo.supports_rich_root():
612
            # In this repository, root entries have no knit or weave. When
613
            # serializing out to disk and back in, root.revision is always
614
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
615
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
616
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
617
        # directory-path -> inventory-entry for current inventory
618
        self.directory_entries = dict(self.inventory.directories())
619
0.64.14 by Ian Clatworthy
commit of modified files working
620
    def post_process_files(self):
621
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
622
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
623
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
624
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
625
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
626
        committer = self.command.committer
627
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
628
        author = self.command.author
629
        if author is not None:
630
            author_id = "%s <%s>" % (author[0],author[1])
631
            if author_id != who:
632
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
633
        rev = revision.Revision(
634
           timestamp=committer[2],
635
           timezone=committer[3],
636
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
637
           message=self._escape_commit_message(self.command.message),
638
           revision_id=self.revision_id,
639
           properties=rev_props,
640
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
641
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
642
            lambda file_id: self._get_lines(file_id),
643
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
644
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
645
    def _escape_commit_message(self, message):
646
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
647
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
648
        # Code copied from bzrlib.commit.
649
        
650
        # Python strings can include characters that can't be
651
        # represented in well-formed XML; escape characters that
652
        # aren't listed in the XML specification
653
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
654
        message, _ = re.subn(
655
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
656
            lambda match: match.group(0).encode('unicode_escape'),
657
            message)
658
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
659
660
    def modify_handler(self, filecmd):
661
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
662
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
663
        else:
664
            data = filecmd.data
0.64.67 by James Westby
Add support for -Dfast-import.
665
        self.debug("modifying %s", filecmd.path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
666
        self._modify_inventory(filecmd.path, filecmd.kind,
667
            filecmd.is_executable, data)
668
669
    def delete_handler(self, filecmd):
670
        path = filecmd.path
0.64.67 by James Westby
Add support for -Dfast-import.
671
        self.debug("deleting %s", path)
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
672
        fileid = self.bzr_file_id(path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
673
        try:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
674
            del self.inventory[fileid]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
675
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
676
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
677
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
678
            self._warn_unless_in_merges(fileid, path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
679
        try:
680
            self.cache_mgr._delete_path(path)
681
        except KeyError:
682
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
683
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
684
    def _warn_unless_in_merges(self, fileid, path):
685
        if len(self.parents) <= 1:
686
            return
687
        for parent in self.parents[1:]:
688
            if fileid in self.get_inventory(parent):
689
                return
690
        self.warning("ignoring delete of %s as not in parent inventories", path)
691
0.64.5 by Ian Clatworthy
first cut at generic processing method
692
    def copy_handler(self, filecmd):
693
        raise NotImplementedError(self.copy_handler)
694
695
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
696
        old_path = filecmd.old_path
697
        new_path = filecmd.new_path
0.64.67 by James Westby
Add support for -Dfast-import.
698
        self.debug("renaming %s to %s", old_path, new_path)
0.64.16 by Ian Clatworthy
safe processing tweaks
699
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
700
        basename, new_parent_ie = self._ensure_directory(new_path)
701
        new_parent_id = new_parent_ie.file_id
0.64.67 by James Westby
Add support for -Dfast-import.
702
        existing_id = self.inventory.path2id(new_path)
703
        if existing_id is not None:
704
            self.inventory.remove_recursive_id(existing_id)
0.65.4 by James Westby
Make the rename handling more robust.
705
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
706
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
707
708
    def deleteall_handler(self, filecmd):
709
        raise NotImplementedError(self.deleteall_handler)
710
0.64.16 by Ian Clatworthy
safe processing tweaks
711
    def bzr_file_id_and_new(self, path):
712
        """Get a Bazaar file identifier and new flag for a path.
713
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
714
        :return: file_id, is_new where
715
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
716
        """
717
        try:
0.64.67 by James Westby
Add support for -Dfast-import.
718
            id = self.cache_mgr.file_ids[path]
719
            return id, False
0.64.16 by Ian Clatworthy
safe processing tweaks
720
        except KeyError:
721
            id = generate_ids.gen_file_id(path)
722
            self.cache_mgr.file_ids[path] = id
0.64.67 by James Westby
Add support for -Dfast-import.
723
            self.debug("Generated new file id %s for '%s'", id, path)
0.64.16 by Ian Clatworthy
safe processing tweaks
724
            return id, True
725
0.64.5 by Ian Clatworthy
first cut at generic processing method
726
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
727
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
728
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
729
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
730
    def gen_initial_inventory(self):
731
        """Generate an inventory for a parentless revision."""
732
        inv = inventory.Inventory(revision_id=self.revision_id)
733
        return inv
734
0.64.5 by Ian Clatworthy
first cut at generic processing method
735
    def gen_revision_id(self):
736
        """Generate a revision id.
737
738
        Subclasses may override this to produce deterministic ids say.
739
        """
740
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
741
        # Perhaps 'who' being the person running the import is ok? If so,
742
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
743
        who = "%s <%s>" % (committer[0],committer[1])
744
        timestamp = committer[2]
745
        return generate_ids.gen_revision_id(who, timestamp)
746
0.64.7 by Ian Clatworthy
start of multiple commit handling
747
    def get_inventory(self, revision_id):
748
        """Get the inventory for a revision id."""
749
        try:
750
            inv = self.cache_mgr.inventories[revision_id]
751
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
752
            if self.verbose:
753
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
754
            # Not cached so reconstruct from repository
755
            inv = self.repo.revision_tree(revision_id).inventory
756
            self.cache_mgr.inventories[revision_id] = inv
757
        return inv
758
0.64.5 by Ian Clatworthy
first cut at generic processing method
759
    def _get_inventories(self, revision_ids):
760
        """Get the inventories for revision-ids.
761
        
762
        This is a callback used by the RepositoryLoader to
763
        speed up inventory reconstruction."""
764
        present = []
765
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
766
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
767
        # successfully loaded into the repsoitory
768
        for revision_id in revision_ids:
769
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
770
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
771
                present.append(revision_id)
772
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
773
                if self.verbose:
774
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
775
                # Not cached so reconstruct from repository
776
                if self.repo.has_revision(revision_id):
777
                    rev_tree = self.repo.revision_tree(revision_id)
778
                    present.append(revision_id)
779
                else:
780
                    rev_tree = self.repo.revision_tree(None)
781
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
782
                self.cache_mgr.inventories[revision_id] = inv
783
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
784
        return present, inventories
785
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
786
    def _get_lines(self, file_id):
787
        """Get the lines for a file-id."""
788
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
789
790
    def _modify_inventory(self, path, kind, is_executable, data):
791
        """Add to or change an item in the inventory."""
792
        # Create the new InventoryEntry
793
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
794
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
795
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
796
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
797
        if isinstance(ie, inventory.InventoryFile):
798
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
799
            lines = osutils.split_lines(data)
800
            ie.text_sha1 = osutils.sha_strings(lines)
801
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
802
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
803
        elif isinstance(ie, inventory.InventoryLnk):
804
            ie.symlink_target = data
805
        else:
806
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
807
                (kind,))
808
0.64.16 by Ian Clatworthy
safe processing tweaks
809
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
810
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
811
            # HACK: no API for this (del+add does more than it needs to)
812
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
813
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
814
        else:
815
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
816
817
    def _ensure_directory(self, path):
818
        """Ensure that the containing directory exists for 'path'"""
819
        dirname, basename = osutils.split(path)
820
        if dirname == '':
821
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
822
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
823
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
824
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
825
        except KeyError:
826
            # We will create this entry, since it doesn't exist
827
            pass
828
        else:
829
            return basename, ie
830
831
        # No directory existed, we will just create one, first, make sure
832
        # the parent exists
833
        dir_basename, parent_ie = self._ensure_directory(dirname)
834
        dir_file_id = self.bzr_file_id(dirname)
835
        ie = inventory.entry_factory['directory'](dir_file_id,
836
                                                  dir_basename,
837
                                                  parent_ie.file_id)
838
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
839
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
840
        # There are no lines stored for a directory so
841
        # make sure the cache used by get_lines knows that
842
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
843
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
844
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
845
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
846
847
0.64.34 by Ian Clatworthy
report lost branches
848
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
849
0.64.64 by Ian Clatworthy
save tags known about in each branch
850
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
851
        """Create an object responsible for updating branches.
852
853
        :param heads_by_ref: a dictionary where
854
          names are git-style references like refs/heads/master;
855
          values are one item lists of commits marks.
856
        """
0.64.37 by Ian Clatworthy
create branches as required
857
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
858
        self.branch = branch
859
        self.cache_mgr = cache_mgr
860
        self.heads_by_ref = heads_by_ref
861
        self.last_ref = last_ref
0.64.64 by Ian Clatworthy
save tags known about in each branch
862
        self.tags = tags
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
863
864
    def update(self):
865
        """Update the Bazaar branches and tips matching the heads.
866
867
        If the repository is shared, this routine creates branches
868
        as required. If it isn't, warnings are produced about the
869
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
870
0.64.34 by Ian Clatworthy
report lost branches
871
        :return: updated, lost_heads where
872
          updated = the list of branches updated
873
          lost_heads = a list of (bazaar-name,revision) for branches that
874
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
875
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
876
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
877
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
878
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
879
            if self._update_branch(br, tip):
880
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
881
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
882
883
    def _get_matching_branches(self):
884
        """Get the Bazaar branches.
885
0.64.34 by Ian Clatworthy
report lost branches
886
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
887
          default_tip = the last commit mark for the default branch
888
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
889
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
890
            would have been created had the repository been shared and
891
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
892
        """
0.64.37 by Ian Clatworthy
create branches as required
893
        branch_tips = []
894
        lost_heads = []
895
        ref_names = self.heads_by_ref.keys()
896
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
897
            trunk = self.select_trunk(ref_names)
898
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
899
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
900
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
901
902
        # Convert the reference names into Bazaar speak
903
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
904
0.64.37 by Ian Clatworthy
create branches as required
905
        # Policy for locating branches
906
        def dir_under_current(name, ref_name):
907
            # Using the Bazaar name, get a directory under the current one
908
            return name
909
        def dir_sister_branch(name, ref_name):
910
            # Using the Bazaar name, get a sister directory to the branch
911
            return osutils.pathjoin(self.branch.base, "..", name)
912
        if self.branch is not None:
913
            dir_policy = dir_sister_branch
914
        else:
915
            dir_policy = dir_under_current
916
0.64.34 by Ian Clatworthy
report lost branches
917
        # Create/track missing branches
918
        shared_repo = self.repo.is_shared()
919
        for name in sorted(bzr_names.keys()):
920
            ref_name = bzr_names[name]
921
            tip = self.heads_by_ref[ref_name][0]
922
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
923
                location = dir_policy(name, ref_name)
924
                try:
925
                    br = self.make_branch(location)
926
                    branch_tips.append((br,tip))
927
                    continue
928
                except errors.BzrError, ex:
929
                    error("ERROR: failed to create branch %s: %s",
930
                        location, ex)
931
            lost_head = self.cache_mgr.revision_ids[tip]
932
            lost_info = (name, lost_head)
933
            lost_heads.append(lost_info)
934
        return branch_tips, lost_heads
935
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
936
    def select_trunk(self, ref_names):
937
        """Given a set of ref names, choose one as the trunk."""
938
        for candidate in ['refs/heads/master']:
939
            if candidate in ref_names:
940
                return candidate
941
        # Use the last reference in the import stream
942
        return self.last_ref
943
0.64.37 by Ian Clatworthy
create branches as required
944
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
945
        """Make a branch in the repository if not already there."""
946
        try:
947
            return bzrdir.BzrDir.open(location).open_branch()
948
        except errors.NotBranchError, ex:
949
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
950
951
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
952
        """Generate Bazaar branch names from import ref names.
953
        
954
        :return: a dictionary with Bazaar names as keys and
955
          the original reference names as values.
956
        """
0.64.34 by Ian Clatworthy
report lost branches
957
        bazaar_names = {}
958
        for ref_name in sorted(ref_names):
959
            parts = ref_name.split('/')
960
            if parts[0] == 'refs':
961
                parts.pop(0)
962
            full_name = "--".join(parts)
963
            bazaar_name = parts[-1]
964
            if bazaar_name in bazaar_names:
965
                bazaar_name = full_name
966
            bazaar_names[bazaar_name] = ref_name
967
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
968
969
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
970
        """Update a branch with last revision and tag information.
971
        
972
        :return: whether the branch was changed or not
973
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
974
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
0.64.64 by Ian Clatworthy
save tags known about in each branch
975
        revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
976
        revno = len(revs)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
977
        existing_revno, existing_last_rev_id = br.last_revision_info()
978
        changed = False
979
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
980
            br.set_last_revision_info(revno, last_rev_id)
981
            changed = True
0.64.64 by Ian Clatworthy
save tags known about in each branch
982
        # apply tags known in this branch
983
        my_tags = {}
984
        if self.tags:
985
            for tag,rev in self.tags.items():
986
                if rev in revs:
987
                    my_tags[tag] = rev
988
            if my_tags:
989
                br.tags._set_tag_dict(my_tags)
990
                changed = True
991
        if changed:
992
            tagno = len(my_tags)
993
            note("\t branch %s now has %d %s and %d %s", br.nick,
994
                revno, helpers.single_plural(revno, "revision", "revisions"),
995
                tagno, helpers.single_plural(tagno, "tag", "tags"))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
996
        return changed