/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.67 by James Westby
Add support for -Dfast-import.
25
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
26
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    errors,
28
    generate_ids,
29
    inventory,
30
    lru_cache,
31
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
32
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
33
    revision,
34
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
35
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
36
    )
0.64.51 by Ian Clatworthy
disable autopacking
37
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
38
from bzrlib.trace import (
0.64.67 by James Westby
Add support for -Dfast-import.
39
    error,
40
    mutter,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    note,
42
    warning,
43
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
44
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
45
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
47
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
48
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
    processor,
50
    revisionloader,
51
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
53
0.64.41 by Ian Clatworthy
update multiple working trees if requested
54
# How many commits before automatically reporting progress
55
_DEFAULT_AUTO_PROGRESS = 1000
56
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
57
# How many commits before automatically checkpointing
58
_DEFAULT_AUTO_CHECKPOINT = 10000
59
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
60
# How many inventories to cache
61
_DEFAULT_INV_CACHE_SIZE = 10
62
0.64.41 by Ian Clatworthy
update multiple working trees if requested
63
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
64
class GenericProcessor(processor.ImportProcessor):
65
    """An import processor that handles basic imports.
66
67
    Current features supported:
68
0.64.16 by Ian Clatworthy
safe processing tweaks
69
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
70
    * files and symlinks commits are supported
71
    * checkpoints automatically happen at a configurable frequency
72
      over and above the stream requested checkpoints
73
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
74
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
75
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
76
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
77
    At checkpoints and on completion, the commit-id -> revision-id map is
78
    saved to a file called 'fastimport-id-map'. If the import crashes
79
    or is interrupted, it can be started again and this file will be
80
    used to skip over already loaded revisions. The format of each line
81
    is "commit-id revision-id" so commit-ids cannot include spaces.
82
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
83
    Here are the supported parameters:
84
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
85
    * info - name of a hints file holding the analysis generated
86
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
87
      importing large repositories, this parameter is needed so
88
      that the importer knows what blobs to intelligently cache.
89
0.64.41 by Ian Clatworthy
update multiple working trees if requested
90
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
91
      By default, the importer updates the repository
92
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
93
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
94
95
    * checkpoint - automatically checkpoint every n commits over and
96
      above any checkpoints contained in the import stream.
97
      The default is 10000.
98
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
99
    * count - only import this many commits then exit. If not set
100
      or negative, all commits are imported.
101
    
102
    * inv-cache - number of inventories to cache.
103
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
104
105
    * experimental - enable experimental mode, i.e. use features
106
      not yet fully tested.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
107
    """
108
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
109
    known_params = [
110
        'info',
111
        'trees',
112
        'checkpoint',
113
        'count',
114
        'inv-cache',
115
        'experimental',
116
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
117
118
    def note(self, msg, *args):
119
        """Output a note but timestamp it."""
120
        msg = "%s %s" % (self._time_of_day(), msg)
121
        note(msg, *args)
122
123
    def warning(self, msg, *args):
124
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
125
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
126
        warning(msg, *args)
127
0.64.67 by James Westby
Add support for -Dfast-import.
128
    def debug(self, mgs, *args):
129
        """Output a debug message if the appropriate -D option was given."""
130
        if "fast-import" in debug.debug_flags:
131
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
132
            mutter(msg, *args)
133
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
134
    def _time_of_day(self):
135
        """Time of day as a string."""
136
        # Note: this is a separate method so tests can patch in a fixed value
137
        return time.strftime("%H:%M:%S")
0.64.67 by James Westby
Add support for -Dfast-import.
138
    
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
139
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
140
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
141
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
142
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
143
            self.inventory_cache_size)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
144
        self.skip_total = self._init_id_map()
145
        if self.skip_total:
146
            self.note("Found %d commits already loaded - "
147
                "skipping over these ...", self.skip_total)
148
        self._revision_count = 0
149
150
        # mapping of tag name to revision_id
151
        self.tags = {}
152
153
        # Create the revision loader needed for committing
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
154
        if self._experimental:
155
            loader_factory = revisionloader.ExperimentalRevisionLoader
156
        else:
157
            loader_factory = revisionloader.ImportRevisionLoader
158
        self.loader = loader_factory(self.repo, self.inventory_cache_size)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
159
0.64.51 by Ian Clatworthy
disable autopacking
160
        # Disable autopacking if the repo format supports it.
161
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
162
        if isinstance(self.repo, pack_repo.KnitPackRepository):
163
            self._original_max_pack_count = \
164
                self.repo._pack_collection._max_pack_count
165
            def _max_pack_count_for_import(total_revisions):
166
                return total_revisions + 1
167
            self.repo._pack_collection._max_pack_count = \
168
                _max_pack_count_for_import
169
        else:
170
            self._original_max_pack_count = None
171
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
172
        # Create a write group. This is committed at the end of the import.
173
        # Checkpointing closes the current one and starts a new one.
174
        self.repo.start_write_group()
175
176
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
177
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
178
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
179
        # This is currently hard-coded but might be configurable via
180
        # parameters one day if that's needed
181
        repo_transport = self.repo.control_files._transport
182
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
183
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
184
        # Load the info file, if any
185
        info_path = self.params.get('info')
186
        if info_path is not None:
187
            self.info = configobj.ConfigObj(info_path)
188
        else:
189
            self.info = None
190
0.64.41 by Ian Clatworthy
update multiple working trees if requested
191
        # Decide how often to automatically report progress
192
        # (not a parameter yet)
193
        self.progress_every = _DEFAULT_AUTO_PROGRESS
194
        if self.verbose:
195
            self.progress_every = self.progress_every / 10
196
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
197
        # Decide how often to automatically checkpoint
198
        self.checkpoint_every = int(self.params.get('checkpoint',
199
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
200
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
201
        # Decide how big to make the inventory cache
202
        self.inventory_cache_size = int(self.params.get('inv-cache',
203
            _DEFAULT_INV_CACHE_SIZE))
204
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
205
        # Find the maximum number of commits to import (None means all)
206
        # and prepare progress reporting. Just in case the info file
207
        # has an outdated count of commits, we store the max counts
208
        # at which we need to terminate separately to the total used
209
        # for progress tracking.
210
        try:
211
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
212
            if self.max_commits < 0:
213
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
214
        except KeyError:
215
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
216
        if self.info is not None:
217
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
218
            if (self.max_commits is not None and
219
                self.total_commits > self.max_commits):
220
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
221
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
222
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
223
0.64.27 by Ian Clatworthy
1st cut at performance tuning
224
    def _process(self, command_iter):
225
        # if anything goes wrong, abort the write group if any
226
        try:
227
            processor.ImportProcessor._process(self, command_iter)
228
        except:
229
            if self.repo is not None and self.repo.is_in_write_group():
230
                self.repo.abort_write_group()
231
            raise
232
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
233
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
234
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
235
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
236
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
237
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
238
        # Update the branches
239
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
240
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
241
            helpers.invert_dict(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
242
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
243
        branches_updated, branches_lost = updater.update()
244
        self._branch_count = len(branches_updated)
245
246
        # Tell the user about branches that were not created
247
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
248
            if not self.repo.is_shared():
249
                self.warning("Cannot import multiple branches into "
250
                    "an unshared repository")
251
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
252
            for lost_info in branches_lost:
253
                head_revision = lost_info[1]
254
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
255
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
256
257
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
258
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
259
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
260
        if self._branch_count == 0:
261
            self.note("no branches to update")
262
            self.note("no working trees to update")
263
            remind_about_update = False
264
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
265
            trees = self._get_working_trees(branches_updated)
266
            if trees:
267
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
268
                if self.verbose:
269
                    report = delta._ChangeReporter()
270
                else:
271
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
272
                for wt in trees:
273
                    wt.update(reporter)
274
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
275
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
276
            else:
277
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
278
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
279
280
        # Finish up by telling the user what to do next.
281
        # (These messages are explicitly not timestamped.)
282
        if self._original_max_pack_count:
283
            # We earlier disabled autopacking, creating one pack every
284
            # checkpoint instead. If we checkpointed more than 10 times,
285
            # Bazaar would have auto-packed. For massive repositories,
286
            # this can take a *very* long time so we suggest it to the user
287
            # instead of doing it implicitly.
288
            if self._revision_count >= self.checkpoint_every * 10:
289
                note("To further optimize how data is stored, use 'bzr pack'.")
0.64.34 by Ian Clatworthy
report lost branches
290
        if remind_about_update:
0.64.51 by Ian Clatworthy
disable autopacking
291
            note("To refresh the working tree for a branch, "
292
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
293
294
    def _get_working_trees(self, branches):
295
        """Get the working trees for branches in the repository."""
296
        result = []
297
        wt_expected = self.repo.make_working_trees()
298
        for br in branches:
299
            if br == self.branch and br is not None:
300
                wt = self.working_tree
301
            elif wt_expected:
302
                try:
303
                    wt = br.bzrdir.open_workingtree()
304
                except errors.NoWorkingTree:
305
                    self.warning("No working tree for branch %s", br)
306
                    continue
307
            else:
308
                continue
309
            result.append(wt)
310
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
311
312
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
313
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
314
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
315
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
316
        wtc = self._tree_count
317
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
318
            rc, helpers.single_plural(rc, "revision", "revisions"),
319
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
320
            wtc, helpers.single_plural(wtc, "tree", "trees"),
321
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
322
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
323
    def _init_id_map(self):
324
        """Load the id-map and check it matches the repository.
325
        
326
        :return: the number of entries in the map
327
        """
328
        # Currently, we just check the size. In the future, we might
329
        # decide to be more paranoid and check that the revision-ids
330
        # are identical as well.
331
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
332
            self.id_map_path)
333
        existing_count = len(self.repo.all_revision_ids())
334
        if existing_count != known:
335
            raise plugin_errors.BadRepositorySize(known, existing_count)
336
        return known
337
338
    def _save_id_map(self):
339
        """Save the id-map."""
340
        # Save the whole lot every time. If this proves a problem, we can
341
        # change to 'append just the new ones' at a later time.
342
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
343
0.64.5 by Ian Clatworthy
first cut at generic processing method
344
    def blob_handler(self, cmd):
345
        """Process a BlobCommand."""
346
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
347
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
348
        else:
349
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
350
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
351
352
    def checkpoint_handler(self, cmd):
353
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
354
        # Commit the current write group and start a new one
355
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
356
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
357
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
358
359
    def commit_handler(self, cmd):
360
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
361
        if self.skip_total and self._revision_count < self.skip_total:
362
            _track_heads(cmd, self.cache_mgr)
363
            # Check that we really do know about this commit-id
364
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
365
                raise plugin_errors.BadRestart(cmd.id)
366
            # Consume the file commands and free any non-sticky blobs
367
            for fc in cmd.file_iter():
368
                pass
369
            self.cache_mgr._blobs = {}
370
            self._revision_count += 1
371
            # If we're finished getting back to where we were,
372
            # load the file-ids cache
373
            if self._revision_count == self.skip_total:
374
                self._gen_file_ids_cache()
375
                self.note("Generated the file-ids cache - %d entries",
376
                    len(self.cache_mgr.file_ids.keys()))
377
            return
378
379
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
380
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
381
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
382
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
383
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
384
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
385
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
386
387
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
388
        if (self.max_commits is not None and
389
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
390
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
391
            self.finished = True
392
        elif self._revision_count % self.checkpoint_every == 0:
393
            self.note("%d commits - automatic checkpoint triggered",
394
                self._revision_count)
395
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
396
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
397
    def _gen_file_ids_cache(self):
398
        """Generate the file-id cache by searching repository inventories.
399
        """
400
        # Get the interesting revisions - the heads
401
        head_ids = self.cache_mgr.heads.keys()
402
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
403
404
        # Update the fileid cache
405
        file_ids = {}
406
        for revision_id in revision_ids:
407
            inv = self.repo.revision_tree(revision_id).inventory
408
            # Cache the inventoires while we're at it
409
            self.cache_mgr.inventories[revision_id] = inv
410
            for path, ie in inv.iter_entries():
411
                file_ids[path] = ie.file_id
412
        self.cache_mgr.file_ids = file_ids
413
0.64.25 by Ian Clatworthy
slightly better progress reporting
414
    def report_progress(self, details=''):
415
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
416
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
417
            if self.total_commits is not None:
418
                counts = "%d/%d" % (self._revision_count, self.total_commits)
419
                eta = progress.get_eta(self._start_time, self._revision_count,
420
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
421
                eta_str = progress.str_tdelta(eta)
422
                if eta_str.endswith('--'):
423
                    eta_str = ''
424
                else:
425
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
426
            else:
427
                counts = "%d" % (self._revision_count,)
428
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
429
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
430
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
431
    def progress_handler(self, cmd):
432
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
433
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
434
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
435
436
    def reset_handler(self, cmd):
437
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
438
        if cmd.ref.startswith('refs/tags/'):
439
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
440
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
441
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
442
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
443
444
    def tag_handler(self, cmd):
445
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
446
        self._set_tag(cmd.id, cmd.from_)
447
448
    def _set_tag(self, name, from_):
449
        """Define a tag given a name an import 'from' reference."""
450
        bzr_tag_name = name.decode('utf-8', 'replace')
451
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
452
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
453
454
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
455
class GenericCacheManager(object):
456
    """A manager of caches for the GenericProcessor."""
457
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
458
    def __init__(self, info, verbose=False, inventory_cache_size=10):
459
        """Create a manager of caches.
460
461
        :param info: a ConfigObj holding the output from
462
            the --info processor, or None if no hints are available
463
        """
464
        self.verbose = verbose
465
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
466
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
467
        # Sticky blobs aren't removed after being referenced.
468
        self._blobs = {}
469
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
470
471
        # revision-id -> Inventory cache
472
        # these are large and we probably don't need too many as
473
        # most parents are recent in history
474
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
475
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
476
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
477
        # we need to keep all of these but they are small
478
        self.revision_ids = {}
479
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
480
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
481
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
482
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
483
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
484
        self.last_ref = None
485
        self.last_ids = {}
486
        self.heads = {}
487
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
488
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
489
        self._blobs_to_keep = None
490
        if info is not None:
491
            try:
492
                self._blobs_to_keep = info['Blob usage tracking']['multi']
493
            except KeyError:
494
                # info not in file - possible when no blobs used
495
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
496
497
    def store_blob(self, id, data):
498
        """Store a blob of data."""
499
        if (self._blobs_to_keep is None or data == '' or
500
            id in self._blobs_to_keep):
501
            self._sticky_blobs[id] = data
502
        else:
503
            self._blobs[id] = data
504
505
    def fetch_blob(self, id):
506
        """Fetch a blob of data."""
507
        try:
508
            return self._sticky_blobs[id]
509
        except KeyError:
510
            return self._blobs.pop(id)
511
0.64.16 by Ian Clatworthy
safe processing tweaks
512
    def _delete_path(self, path):
513
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
514
        # we actually want to remember what file-id we gave a path,
515
        # even when that file is deleted, so doing nothing is correct
516
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
517
518
    def _rename_path(self, old_path, new_path):
519
        """Rename a path in the caches."""
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
520
        # In this case, we need to forget the file-id we gave a path,
521
        # otherwise, we'll get duplicate file-ids in the repository.
0.64.16 by Ian Clatworthy
safe processing tweaks
522
        self.file_ids[new_path] = self.file_ids[old_path]
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
523
        del self.file_ids[old_path]
0.64.16 by Ian Clatworthy
safe processing tweaks
524
525
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
526
def _track_heads(cmd, cache_mgr):
527
    """Track the repository heads given a CommitCommand.
528
    
529
    :return: the list of parents in terms of commit-ids
530
    """
531
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
532
    if cmd.from_ is not None:
533
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
534
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
535
        last_id = cache_mgr.last_ids.get(cmd.ref)
536
        if last_id is not None:
537
            parents = [last_id]
538
        else:
539
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
540
    parents.extend(cmd.merges)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
541
    # Track the heads
542
    for parent in parents:
543
        try:
544
            del cache_mgr.heads[parent]
545
        except KeyError:
546
            # it's ok if the parent isn't there - another
547
            # commit may have already removed it
548
            pass
549
    cache_mgr.heads[cmd.id] = cmd.ref
550
    cache_mgr.last_ids[cmd.ref] = cmd.id
551
    cache_mgr.last_ref = cmd.ref
552
    return parents
553
554
0.64.5 by Ian Clatworthy
first cut at generic processing method
555
class GenericCommitHandler(processor.CommitHandler):
556
0.64.48 by Ian Clatworthy
one revision loader instance
557
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
558
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
559
        processor.CommitHandler.__init__(self, command)
560
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
561
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
562
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
563
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
564
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
565
0.64.43 by Ian Clatworthy
verbose mode cleanup
566
    def note(self, msg, *args):
567
        """Output a note but add context."""
568
        msg = "%s (%s)" % (msg, self.command.id)
569
        note(msg, *args)
570
571
    def warning(self, msg, *args):
572
        """Output a warning but add context."""
573
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
574
        warning(msg, *args)
575
0.64.67 by James Westby
Add support for -Dfast-import.
576
    def debug(self, msg, *args):
577
        """Output a mutter if the appropriate -D option was given."""
578
        if "fast-import" in debug.debug_flags:
579
            msg = "%s (%s)" % (msg, self.command.id)
580
            mutter(msg, *args)
581
0.64.5 by Ian Clatworthy
first cut at generic processing method
582
    def pre_process_files(self):
583
        """Prepare for committing."""
584
        self.revision_id = self.gen_revision_id()
585
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
586
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
587
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
588
        # Track the heads and get the real parent list
589
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
590
0.64.14 by Ian Clatworthy
commit of modified files working
591
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
592
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
593
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
594
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
595
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
596
            self.parents = []
0.64.67 by James Westby
Add support for -Dfast-import.
597
        self.debug("revision parents are %s", str(self.parents))
0.64.7 by Ian Clatworthy
start of multiple commit handling
598
0.64.14 by Ian Clatworthy
commit of modified files working
599
        # Seed the inventory from the previous one
600
        if len(self.parents) == 0:
601
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
602
        else:
603
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
604
            inv = self.get_inventory(self.parents[0])
605
            # TODO: Shallow copy - deep inventory copying is expensive
606
            self.inventory = inv.copy()
0.64.13 by Ian Clatworthy
commit of new files working
607
        if not self.repo.supports_rich_root():
608
            # In this repository, root entries have no knit or weave. When
609
            # serializing out to disk and back in, root.revision is always
610
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
611
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
612
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
613
        # directory-path -> inventory-entry for current inventory
614
        self.directory_entries = dict(self.inventory.directories())
615
0.64.14 by Ian Clatworthy
commit of modified files working
616
    def post_process_files(self):
617
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
618
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
619
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
620
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
621
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
622
        committer = self.command.committer
623
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
624
        author = self.command.author
625
        if author is not None:
626
            author_id = "%s <%s>" % (author[0],author[1])
627
            if author_id != who:
628
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
629
        rev = revision.Revision(
630
           timestamp=committer[2],
631
           timezone=committer[3],
632
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
633
           message=self._escape_commit_message(self.command.message),
634
           revision_id=self.revision_id,
635
           properties=rev_props,
636
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
637
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
638
            lambda file_id: self._get_lines(file_id),
639
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
640
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
641
    def _escape_commit_message(self, message):
642
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
643
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
644
        # Code copied from bzrlib.commit.
645
        
646
        # Python strings can include characters that can't be
647
        # represented in well-formed XML; escape characters that
648
        # aren't listed in the XML specification
649
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
650
        message, _ = re.subn(
651
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
652
            lambda match: match.group(0).encode('unicode_escape'),
653
            message)
654
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
655
656
    def modify_handler(self, filecmd):
657
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
658
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
659
        else:
660
            data = filecmd.data
0.64.67 by James Westby
Add support for -Dfast-import.
661
        self.debug("modifying %s", filecmd.path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
662
        self._modify_inventory(filecmd.path, filecmd.kind,
663
            filecmd.is_executable, data)
664
665
    def delete_handler(self, filecmd):
666
        path = filecmd.path
0.64.67 by James Westby
Add support for -Dfast-import.
667
        self.debug("deleting %s", path)
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
668
        fileid = self.bzr_file_id(path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
669
        try:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
670
            del self.inventory[fileid]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
671
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
672
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
673
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
674
            self._warn_unless_in_merges(fileid, path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
675
        try:
676
            self.cache_mgr._delete_path(path)
677
        except KeyError:
678
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
679
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
680
    def _warn_unless_in_merges(self, fileid, path):
681
        if len(self.parents) <= 1:
682
            return
683
        for parent in self.parents[1:]:
684
            if fileid in self.get_inventory(parent):
685
                return
686
        self.warning("ignoring delete of %s as not in parent inventories", path)
687
0.64.5 by Ian Clatworthy
first cut at generic processing method
688
    def copy_handler(self, filecmd):
689
        raise NotImplementedError(self.copy_handler)
690
691
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
692
        old_path = filecmd.old_path
693
        new_path = filecmd.new_path
0.64.67 by James Westby
Add support for -Dfast-import.
694
        self.debug("renaming %s to %s", old_path, new_path)
0.64.16 by Ian Clatworthy
safe processing tweaks
695
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
696
        basename, new_parent_ie = self._ensure_directory(new_path)
697
        new_parent_id = new_parent_ie.file_id
0.64.67 by James Westby
Add support for -Dfast-import.
698
        existing_id = self.inventory.path2id(new_path)
699
        if existing_id is not None:
700
            self.inventory.remove_recursive_id(existing_id)
0.65.4 by James Westby
Make the rename handling more robust.
701
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
702
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
703
704
    def deleteall_handler(self, filecmd):
705
        raise NotImplementedError(self.deleteall_handler)
706
0.64.16 by Ian Clatworthy
safe processing tweaks
707
    def bzr_file_id_and_new(self, path):
708
        """Get a Bazaar file identifier and new flag for a path.
709
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
710
        :return: file_id, is_new where
711
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
712
        """
713
        try:
0.64.67 by James Westby
Add support for -Dfast-import.
714
            id = self.cache_mgr.file_ids[path]
715
            return id, False
0.64.16 by Ian Clatworthy
safe processing tweaks
716
        except KeyError:
717
            id = generate_ids.gen_file_id(path)
718
            self.cache_mgr.file_ids[path] = id
0.64.67 by James Westby
Add support for -Dfast-import.
719
            self.debug("Generated new file id %s for '%s'", id, path)
0.64.16 by Ian Clatworthy
safe processing tweaks
720
            return id, True
721
0.64.5 by Ian Clatworthy
first cut at generic processing method
722
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
723
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
724
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
725
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
726
    def gen_initial_inventory(self):
727
        """Generate an inventory for a parentless revision."""
728
        inv = inventory.Inventory(revision_id=self.revision_id)
729
        return inv
730
0.64.5 by Ian Clatworthy
first cut at generic processing method
731
    def gen_revision_id(self):
732
        """Generate a revision id.
733
734
        Subclasses may override this to produce deterministic ids say.
735
        """
736
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
737
        # Perhaps 'who' being the person running the import is ok? If so,
738
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
739
        who = "%s <%s>" % (committer[0],committer[1])
740
        timestamp = committer[2]
741
        return generate_ids.gen_revision_id(who, timestamp)
742
0.64.7 by Ian Clatworthy
start of multiple commit handling
743
    def get_inventory(self, revision_id):
744
        """Get the inventory for a revision id."""
745
        try:
746
            inv = self.cache_mgr.inventories[revision_id]
747
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
748
            if self.verbose:
749
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
750
            # Not cached so reconstruct from repository
751
            inv = self.repo.revision_tree(revision_id).inventory
752
            self.cache_mgr.inventories[revision_id] = inv
753
        return inv
754
0.64.5 by Ian Clatworthy
first cut at generic processing method
755
    def _get_inventories(self, revision_ids):
756
        """Get the inventories for revision-ids.
757
        
758
        This is a callback used by the RepositoryLoader to
759
        speed up inventory reconstruction."""
760
        present = []
761
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
762
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
763
        # successfully loaded into the repsoitory
764
        for revision_id in revision_ids:
765
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
766
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
767
                present.append(revision_id)
768
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
769
                if self.verbose:
770
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
771
                # Not cached so reconstruct from repository
772
                if self.repo.has_revision(revision_id):
773
                    rev_tree = self.repo.revision_tree(revision_id)
774
                    present.append(revision_id)
775
                else:
776
                    rev_tree = self.repo.revision_tree(None)
777
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
778
                self.cache_mgr.inventories[revision_id] = inv
779
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
780
        return present, inventories
781
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
782
    def _get_lines(self, file_id):
783
        """Get the lines for a file-id."""
784
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
785
786
    def _modify_inventory(self, path, kind, is_executable, data):
787
        """Add to or change an item in the inventory."""
788
        # Create the new InventoryEntry
789
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
790
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
791
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
792
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
793
        if isinstance(ie, inventory.InventoryFile):
794
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
795
            lines = osutils.split_lines(data)
796
            ie.text_sha1 = osutils.sha_strings(lines)
797
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
798
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
799
        elif isinstance(ie, inventory.InventoryLnk):
800
            ie.symlink_target = data
801
        else:
802
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
803
                (kind,))
804
0.64.16 by Ian Clatworthy
safe processing tweaks
805
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
806
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
807
            # HACK: no API for this (del+add does more than it needs to)
808
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
809
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
810
        else:
811
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
812
813
    def _ensure_directory(self, path):
814
        """Ensure that the containing directory exists for 'path'"""
815
        dirname, basename = osutils.split(path)
816
        if dirname == '':
817
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
818
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
819
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
820
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
821
        except KeyError:
822
            # We will create this entry, since it doesn't exist
823
            pass
824
        else:
825
            return basename, ie
826
827
        # No directory existed, we will just create one, first, make sure
828
        # the parent exists
829
        dir_basename, parent_ie = self._ensure_directory(dirname)
830
        dir_file_id = self.bzr_file_id(dirname)
831
        ie = inventory.entry_factory['directory'](dir_file_id,
832
                                                  dir_basename,
833
                                                  parent_ie.file_id)
834
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
835
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
836
        # There are no lines stored for a directory so
837
        # make sure the cache used by get_lines knows that
838
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
839
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
840
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
841
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
842
843
0.64.34 by Ian Clatworthy
report lost branches
844
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
845
0.64.64 by Ian Clatworthy
save tags known about in each branch
846
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
847
        """Create an object responsible for updating branches.
848
849
        :param heads_by_ref: a dictionary where
850
          names are git-style references like refs/heads/master;
851
          values are one item lists of commits marks.
852
        """
0.64.37 by Ian Clatworthy
create branches as required
853
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
854
        self.branch = branch
855
        self.cache_mgr = cache_mgr
856
        self.heads_by_ref = heads_by_ref
857
        self.last_ref = last_ref
0.64.64 by Ian Clatworthy
save tags known about in each branch
858
        self.tags = tags
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
859
860
    def update(self):
861
        """Update the Bazaar branches and tips matching the heads.
862
863
        If the repository is shared, this routine creates branches
864
        as required. If it isn't, warnings are produced about the
865
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
866
0.64.34 by Ian Clatworthy
report lost branches
867
        :return: updated, lost_heads where
868
          updated = the list of branches updated
869
          lost_heads = a list of (bazaar-name,revision) for branches that
870
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
871
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
872
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
873
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
874
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
875
            if self._update_branch(br, tip):
876
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
877
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
878
879
    def _get_matching_branches(self):
880
        """Get the Bazaar branches.
881
0.64.34 by Ian Clatworthy
report lost branches
882
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
883
          default_tip = the last commit mark for the default branch
884
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
885
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
886
            would have been created had the repository been shared and
887
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
888
        """
0.64.37 by Ian Clatworthy
create branches as required
889
        branch_tips = []
890
        lost_heads = []
891
        ref_names = self.heads_by_ref.keys()
892
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
893
            trunk = self.select_trunk(ref_names)
894
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
895
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
896
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
897
898
        # Convert the reference names into Bazaar speak
899
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
900
0.64.37 by Ian Clatworthy
create branches as required
901
        # Policy for locating branches
902
        def dir_under_current(name, ref_name):
903
            # Using the Bazaar name, get a directory under the current one
904
            return name
905
        def dir_sister_branch(name, ref_name):
906
            # Using the Bazaar name, get a sister directory to the branch
907
            return osutils.pathjoin(self.branch.base, "..", name)
908
        if self.branch is not None:
909
            dir_policy = dir_sister_branch
910
        else:
911
            dir_policy = dir_under_current
912
0.64.34 by Ian Clatworthy
report lost branches
913
        # Create/track missing branches
914
        shared_repo = self.repo.is_shared()
915
        for name in sorted(bzr_names.keys()):
916
            ref_name = bzr_names[name]
917
            tip = self.heads_by_ref[ref_name][0]
918
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
919
                location = dir_policy(name, ref_name)
920
                try:
921
                    br = self.make_branch(location)
922
                    branch_tips.append((br,tip))
923
                    continue
924
                except errors.BzrError, ex:
925
                    error("ERROR: failed to create branch %s: %s",
926
                        location, ex)
927
            lost_head = self.cache_mgr.revision_ids[tip]
928
            lost_info = (name, lost_head)
929
            lost_heads.append(lost_info)
930
        return branch_tips, lost_heads
931
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
932
    def select_trunk(self, ref_names):
933
        """Given a set of ref names, choose one as the trunk."""
934
        for candidate in ['refs/heads/master']:
935
            if candidate in ref_names:
936
                return candidate
937
        # Use the last reference in the import stream
938
        return self.last_ref
939
0.64.37 by Ian Clatworthy
create branches as required
940
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
941
        """Make a branch in the repository if not already there."""
942
        try:
943
            return bzrdir.BzrDir.open(location).open_branch()
944
        except errors.NotBranchError, ex:
945
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
946
947
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
948
        """Generate Bazaar branch names from import ref names.
949
        
950
        :return: a dictionary with Bazaar names as keys and
951
          the original reference names as values.
952
        """
0.64.34 by Ian Clatworthy
report lost branches
953
        bazaar_names = {}
954
        for ref_name in sorted(ref_names):
955
            parts = ref_name.split('/')
956
            if parts[0] == 'refs':
957
                parts.pop(0)
958
            full_name = "--".join(parts)
959
            bazaar_name = parts[-1]
960
            if bazaar_name in bazaar_names:
961
                bazaar_name = full_name
962
            bazaar_names[bazaar_name] = ref_name
963
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
964
965
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
966
        """Update a branch with last revision and tag information.
967
        
968
        :return: whether the branch was changed or not
969
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
970
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
0.64.64 by Ian Clatworthy
save tags known about in each branch
971
        revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
972
        revno = len(revs)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
973
        existing_revno, existing_last_rev_id = br.last_revision_info()
974
        changed = False
975
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
976
            br.set_last_revision_info(revno, last_rev_id)
977
            changed = True
0.64.64 by Ian Clatworthy
save tags known about in each branch
978
        # apply tags known in this branch
979
        my_tags = {}
980
        if self.tags:
981
            for tag,rev in self.tags.items():
982
                if rev in revs:
983
                    my_tags[tag] = rev
984
            if my_tags:
985
                br.tags._set_tag_dict(my_tags)
986
                changed = True
987
        if changed:
988
            tagno = len(my_tags)
989
            note("\t branch %s now has %d %s and %d %s", br.nick,
990
                revno, helpers.single_plural(revno, "revision", "revisions"),
991
                tagno, helpers.single_plural(tagno, "tag", "tags"))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
992
        return changed