/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.67 by James Westby
Add support for -Dfast-import.
25
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
26
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    errors,
28
    generate_ids,
29
    inventory,
30
    lru_cache,
31
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
32
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
33
    revision,
34
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
35
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
36
    )
0.64.51 by Ian Clatworthy
disable autopacking
37
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
38
from bzrlib.trace import (
0.64.67 by James Westby
Add support for -Dfast-import.
39
    error,
40
    mutter,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    note,
42
    warning,
43
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
44
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
45
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
47
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
48
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
    processor,
50
    revisionloader,
51
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
53
0.64.41 by Ian Clatworthy
update multiple working trees if requested
54
# How many commits before automatically reporting progress
55
_DEFAULT_AUTO_PROGRESS = 1000
56
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
57
# How many commits before automatically checkpointing
58
_DEFAULT_AUTO_CHECKPOINT = 10000
59
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
60
# How many inventories to cache
61
_DEFAULT_INV_CACHE_SIZE = 10
62
0.64.41 by Ian Clatworthy
update multiple working trees if requested
63
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
64
class GenericProcessor(processor.ImportProcessor):
65
    """An import processor that handles basic imports.
66
67
    Current features supported:
68
0.64.16 by Ian Clatworthy
safe processing tweaks
69
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
70
    * files and symlinks commits are supported
71
    * checkpoints automatically happen at a configurable frequency
72
      over and above the stream requested checkpoints
73
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
74
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
75
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
76
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
77
    At checkpoints and on completion, the commit-id -> revision-id map is
78
    saved to a file called 'fastimport-id-map'. If the import crashes
79
    or is interrupted, it can be started again and this file will be
80
    used to skip over already loaded revisions. The format of each line
81
    is "commit-id revision-id" so commit-ids cannot include spaces.
82
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
83
    Here are the supported parameters:
84
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
85
    * info - name of a hints file holding the analysis generated
86
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
87
      importing large repositories, this parameter is needed so
88
      that the importer knows what blobs to intelligently cache.
89
0.64.41 by Ian Clatworthy
update multiple working trees if requested
90
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
91
      By default, the importer updates the repository
92
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
93
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
94
95
    * checkpoint - automatically checkpoint every n commits over and
96
      above any checkpoints contained in the import stream.
97
      The default is 10000.
98
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
99
    * count - only import this many commits then exit. If not set
100
      or negative, all commits are imported.
101
    
102
    * inv-cache - number of inventories to cache.
103
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
104
105
    * experimental - enable experimental mode, i.e. use features
106
      not yet fully tested.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
107
    """
108
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
109
    known_params = [
110
        'info',
111
        'trees',
112
        'checkpoint',
113
        'count',
114
        'inv-cache',
115
        'experimental',
116
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
117
118
    def note(self, msg, *args):
119
        """Output a note but timestamp it."""
120
        msg = "%s %s" % (self._time_of_day(), msg)
121
        note(msg, *args)
122
123
    def warning(self, msg, *args):
124
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
125
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
126
        warning(msg, *args)
127
0.64.67 by James Westby
Add support for -Dfast-import.
128
    def debug(self, mgs, *args):
129
        """Output a debug message if the appropriate -D option was given."""
130
        if "fast-import" in debug.debug_flags:
131
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
132
            mutter(msg, *args)
133
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
134
    def _time_of_day(self):
135
        """Time of day as a string."""
136
        # Note: this is a separate method so tests can patch in a fixed value
137
        return time.strftime("%H:%M:%S")
0.64.67 by James Westby
Add support for -Dfast-import.
138
    
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
139
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
140
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
141
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
142
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
143
            self.inventory_cache_size)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
144
        self.skip_total = self._init_id_map()
145
        if self.skip_total:
146
            self.note("Found %d commits already loaded - "
147
                "skipping over these ...", self.skip_total)
148
        self._revision_count = 0
149
150
        # mapping of tag name to revision_id
151
        self.tags = {}
152
153
        # Create the revision loader needed for committing
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
154
        if self._experimental:
155
            loader_factory = revisionloader.ExperimentalRevisionLoader
156
        else:
157
            loader_factory = revisionloader.ImportRevisionLoader
158
        self.loader = loader_factory(self.repo, self.inventory_cache_size)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
159
0.64.51 by Ian Clatworthy
disable autopacking
160
        # Disable autopacking if the repo format supports it.
161
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
162
        if isinstance(self.repo, pack_repo.KnitPackRepository):
163
            self._original_max_pack_count = \
164
                self.repo._pack_collection._max_pack_count
165
            def _max_pack_count_for_import(total_revisions):
166
                return total_revisions + 1
167
            self.repo._pack_collection._max_pack_count = \
168
                _max_pack_count_for_import
169
        else:
170
            self._original_max_pack_count = None
171
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
172
        # Create a write group. This is committed at the end of the import.
173
        # Checkpointing closes the current one and starts a new one.
174
        self.repo.start_write_group()
175
176
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
177
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
178
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
179
        # This is currently hard-coded but might be configurable via
180
        # parameters one day if that's needed
181
        repo_transport = self.repo.control_files._transport
182
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
183
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
184
        # Load the info file, if any
185
        info_path = self.params.get('info')
186
        if info_path is not None:
187
            self.info = configobj.ConfigObj(info_path)
188
        else:
189
            self.info = None
190
0.64.41 by Ian Clatworthy
update multiple working trees if requested
191
        # Decide how often to automatically report progress
192
        # (not a parameter yet)
193
        self.progress_every = _DEFAULT_AUTO_PROGRESS
194
        if self.verbose:
195
            self.progress_every = self.progress_every / 10
196
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
197
        # Decide how often to automatically checkpoint
198
        self.checkpoint_every = int(self.params.get('checkpoint',
199
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
200
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
201
        # Decide how big to make the inventory cache
202
        self.inventory_cache_size = int(self.params.get('inv-cache',
203
            _DEFAULT_INV_CACHE_SIZE))
204
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
205
        # Find the maximum number of commits to import (None means all)
206
        # and prepare progress reporting. Just in case the info file
207
        # has an outdated count of commits, we store the max counts
208
        # at which we need to terminate separately to the total used
209
        # for progress tracking.
210
        try:
211
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
212
            if self.max_commits < 0:
213
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
214
        except KeyError:
215
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
216
        if self.info is not None:
217
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
218
            if (self.max_commits is not None and
219
                self.total_commits > self.max_commits):
220
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
221
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
222
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
223
0.64.27 by Ian Clatworthy
1st cut at performance tuning
224
    def _process(self, command_iter):
225
        # if anything goes wrong, abort the write group if any
226
        try:
227
            processor.ImportProcessor._process(self, command_iter)
228
        except:
229
            if self.repo is not None and self.repo.is_in_write_group():
230
                self.repo.abort_write_group()
231
            raise
232
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
233
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
234
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
235
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
236
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
237
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
238
        # Update the branches
239
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
240
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
241
            helpers.invert_dict(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
242
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
243
        branches_updated, branches_lost = updater.update()
244
        self._branch_count = len(branches_updated)
245
246
        # Tell the user about branches that were not created
247
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
248
            if not self.repo.is_shared():
249
                self.warning("Cannot import multiple branches into "
250
                    "an unshared repository")
251
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
252
            for lost_info in branches_lost:
253
                head_revision = lost_info[1]
254
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
255
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
256
257
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
258
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
259
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
260
        if self._branch_count == 0:
261
            self.note("no branches to update")
262
            self.note("no working trees to update")
263
            remind_about_update = False
264
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
265
            trees = self._get_working_trees(branches_updated)
266
            if trees:
267
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
268
                if self.verbose:
269
                    report = delta._ChangeReporter()
270
                else:
271
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
272
                for wt in trees:
273
                    wt.update(reporter)
274
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
275
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
276
            else:
277
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
278
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
279
280
        # Finish up by telling the user what to do next.
281
        if self._original_max_pack_count:
282
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
283
            # checkpoint instead. We now pack the repository to optimise
284
            # how data is stored.
285
            if self._revision_count > self.checkpoint_every:
286
                self.note("Packing repository ...")
287
                self.repo.pack()
288
                # To be conservative, packing puts the old packs and
289
                # indices in obsolete_packs. We err on the side of
290
                # optimism and clear out that directory to save space.
291
                self.note("Removing obsolete packs ...")
292
                # TODO: Use a public API for this once one exists
293
                repo_transport = self.repo._pack_collection.transport
294
                repo_transport.clone('obsolete_packs').delete_multi(
295
                    repo_transport.list_dir('obsolete_packs'))
0.64.34 by Ian Clatworthy
report lost branches
296
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
297
            # This message is explicitly not timestamped.
0.64.51 by Ian Clatworthy
disable autopacking
298
            note("To refresh the working tree for a branch, "
299
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
300
301
    def _get_working_trees(self, branches):
302
        """Get the working trees for branches in the repository."""
303
        result = []
304
        wt_expected = self.repo.make_working_trees()
305
        for br in branches:
306
            if br == self.branch and br is not None:
307
                wt = self.working_tree
308
            elif wt_expected:
309
                try:
310
                    wt = br.bzrdir.open_workingtree()
311
                except errors.NoWorkingTree:
312
                    self.warning("No working tree for branch %s", br)
313
                    continue
314
            else:
315
                continue
316
            result.append(wt)
317
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
318
319
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
320
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
321
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
322
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
323
        wtc = self._tree_count
324
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
325
            rc, helpers.single_plural(rc, "revision", "revisions"),
326
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
327
            wtc, helpers.single_plural(wtc, "tree", "trees"),
328
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
329
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
330
    def _init_id_map(self):
331
        """Load the id-map and check it matches the repository.
332
        
333
        :return: the number of entries in the map
334
        """
335
        # Currently, we just check the size. In the future, we might
336
        # decide to be more paranoid and check that the revision-ids
337
        # are identical as well.
338
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
339
            self.id_map_path)
340
        existing_count = len(self.repo.all_revision_ids())
341
        if existing_count != known:
342
            raise plugin_errors.BadRepositorySize(known, existing_count)
343
        return known
344
345
    def _save_id_map(self):
346
        """Save the id-map."""
347
        # Save the whole lot every time. If this proves a problem, we can
348
        # change to 'append just the new ones' at a later time.
349
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
350
0.64.5 by Ian Clatworthy
first cut at generic processing method
351
    def blob_handler(self, cmd):
352
        """Process a BlobCommand."""
353
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
354
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
355
        else:
356
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
357
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
358
359
    def checkpoint_handler(self, cmd):
360
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
361
        # Commit the current write group and start a new one
362
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
363
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
364
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
365
366
    def commit_handler(self, cmd):
367
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
368
        if self.skip_total and self._revision_count < self.skip_total:
369
            _track_heads(cmd, self.cache_mgr)
370
            # Check that we really do know about this commit-id
371
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
372
                raise plugin_errors.BadRestart(cmd.id)
373
            # Consume the file commands and free any non-sticky blobs
374
            for fc in cmd.file_iter():
375
                pass
376
            self.cache_mgr._blobs = {}
377
            self._revision_count += 1
378
            # If we're finished getting back to where we were,
379
            # load the file-ids cache
380
            if self._revision_count == self.skip_total:
381
                self._gen_file_ids_cache()
382
                self.note("Generated the file-ids cache - %d entries",
383
                    len(self.cache_mgr.file_ids.keys()))
384
            return
385
386
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
387
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
388
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
389
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
390
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
391
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
392
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
393
394
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
395
        if (self.max_commits is not None and
396
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
397
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
398
            self.finished = True
399
        elif self._revision_count % self.checkpoint_every == 0:
400
            self.note("%d commits - automatic checkpoint triggered",
401
                self._revision_count)
402
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
403
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
404
    def _gen_file_ids_cache(self):
405
        """Generate the file-id cache by searching repository inventories.
406
        """
407
        # Get the interesting revisions - the heads
408
        head_ids = self.cache_mgr.heads.keys()
409
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
410
411
        # Update the fileid cache
412
        file_ids = {}
413
        for revision_id in revision_ids:
414
            inv = self.repo.revision_tree(revision_id).inventory
415
            # Cache the inventoires while we're at it
416
            self.cache_mgr.inventories[revision_id] = inv
417
            for path, ie in inv.iter_entries():
418
                file_ids[path] = ie.file_id
419
        self.cache_mgr.file_ids = file_ids
420
0.64.25 by Ian Clatworthy
slightly better progress reporting
421
    def report_progress(self, details=''):
422
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
423
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
424
            if self.total_commits is not None:
425
                counts = "%d/%d" % (self._revision_count, self.total_commits)
426
                eta = progress.get_eta(self._start_time, self._revision_count,
427
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
428
                eta_str = progress.str_tdelta(eta)
429
                if eta_str.endswith('--'):
430
                    eta_str = ''
431
                else:
432
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
433
            else:
434
                counts = "%d" % (self._revision_count,)
435
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
436
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
437
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
438
    def progress_handler(self, cmd):
439
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
440
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
441
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
442
443
    def reset_handler(self, cmd):
444
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
445
        if cmd.ref.startswith('refs/tags/'):
446
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
447
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
448
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
449
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
450
451
    def tag_handler(self, cmd):
452
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
453
        self._set_tag(cmd.id, cmd.from_)
454
455
    def _set_tag(self, name, from_):
456
        """Define a tag given a name an import 'from' reference."""
457
        bzr_tag_name = name.decode('utf-8', 'replace')
458
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
459
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
460
461
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
462
class GenericCacheManager(object):
463
    """A manager of caches for the GenericProcessor."""
464
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
465
    def __init__(self, info, verbose=False, inventory_cache_size=10):
466
        """Create a manager of caches.
467
468
        :param info: a ConfigObj holding the output from
469
            the --info processor, or None if no hints are available
470
        """
471
        self.verbose = verbose
472
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
473
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
474
        # Sticky blobs aren't removed after being referenced.
475
        self._blobs = {}
476
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
477
478
        # revision-id -> Inventory cache
479
        # these are large and we probably don't need too many as
480
        # most parents are recent in history
481
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
482
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
483
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
484
        # we need to keep all of these but they are small
485
        self.revision_ids = {}
486
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
487
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
488
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
489
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
490
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
491
        self.last_ref = None
492
        self.last_ids = {}
493
        self.heads = {}
494
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
495
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
496
        self._blobs_to_keep = None
497
        if info is not None:
498
            try:
499
                self._blobs_to_keep = info['Blob usage tracking']['multi']
500
            except KeyError:
501
                # info not in file - possible when no blobs used
502
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
503
504
    def store_blob(self, id, data):
505
        """Store a blob of data."""
506
        if (self._blobs_to_keep is None or data == '' or
507
            id in self._blobs_to_keep):
508
            self._sticky_blobs[id] = data
509
        else:
510
            self._blobs[id] = data
511
512
    def fetch_blob(self, id):
513
        """Fetch a blob of data."""
514
        try:
515
            return self._sticky_blobs[id]
516
        except KeyError:
517
            return self._blobs.pop(id)
518
0.64.16 by Ian Clatworthy
safe processing tweaks
519
    def _delete_path(self, path):
520
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
521
        # we actually want to remember what file-id we gave a path,
522
        # even when that file is deleted, so doing nothing is correct
523
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
524
525
    def _rename_path(self, old_path, new_path):
526
        """Rename a path in the caches."""
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
527
        # In this case, we need to forget the file-id we gave a path,
528
        # otherwise, we'll get duplicate file-ids in the repository.
0.64.16 by Ian Clatworthy
safe processing tweaks
529
        self.file_ids[new_path] = self.file_ids[old_path]
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
530
        del self.file_ids[old_path]
0.64.16 by Ian Clatworthy
safe processing tweaks
531
532
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
533
def _track_heads(cmd, cache_mgr):
534
    """Track the repository heads given a CommitCommand.
535
    
536
    :return: the list of parents in terms of commit-ids
537
    """
538
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
539
    if cmd.from_ is not None:
540
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
541
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
542
        last_id = cache_mgr.last_ids.get(cmd.ref)
543
        if last_id is not None:
544
            parents = [last_id]
545
        else:
546
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
547
    parents.extend(cmd.merges)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
548
    # Track the heads
549
    for parent in parents:
550
        try:
551
            del cache_mgr.heads[parent]
552
        except KeyError:
553
            # it's ok if the parent isn't there - another
554
            # commit may have already removed it
555
            pass
556
    cache_mgr.heads[cmd.id] = cmd.ref
557
    cache_mgr.last_ids[cmd.ref] = cmd.id
558
    cache_mgr.last_ref = cmd.ref
559
    return parents
560
561
0.64.5 by Ian Clatworthy
first cut at generic processing method
562
class GenericCommitHandler(processor.CommitHandler):
563
0.64.48 by Ian Clatworthy
one revision loader instance
564
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
565
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
566
        processor.CommitHandler.__init__(self, command)
567
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
568
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
569
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
570
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
571
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
572
0.64.43 by Ian Clatworthy
verbose mode cleanup
573
    def note(self, msg, *args):
574
        """Output a note but add context."""
575
        msg = "%s (%s)" % (msg, self.command.id)
576
        note(msg, *args)
577
578
    def warning(self, msg, *args):
579
        """Output a warning but add context."""
580
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
581
        warning(msg, *args)
582
0.64.67 by James Westby
Add support for -Dfast-import.
583
    def debug(self, msg, *args):
584
        """Output a mutter if the appropriate -D option was given."""
585
        if "fast-import" in debug.debug_flags:
586
            msg = "%s (%s)" % (msg, self.command.id)
587
            mutter(msg, *args)
588
0.64.5 by Ian Clatworthy
first cut at generic processing method
589
    def pre_process_files(self):
590
        """Prepare for committing."""
591
        self.revision_id = self.gen_revision_id()
592
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
593
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
594
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
595
        # Track the heads and get the real parent list
596
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
597
0.64.14 by Ian Clatworthy
commit of modified files working
598
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
599
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
600
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
601
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
602
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
603
            self.parents = []
0.64.67 by James Westby
Add support for -Dfast-import.
604
        self.debug("revision parents are %s", str(self.parents))
0.64.7 by Ian Clatworthy
start of multiple commit handling
605
0.64.14 by Ian Clatworthy
commit of modified files working
606
        # Seed the inventory from the previous one
607
        if len(self.parents) == 0:
608
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
609
        else:
610
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
611
            inv = self.get_inventory(self.parents[0])
612
            # TODO: Shallow copy - deep inventory copying is expensive
613
            self.inventory = inv.copy()
0.64.13 by Ian Clatworthy
commit of new files working
614
        if not self.repo.supports_rich_root():
615
            # In this repository, root entries have no knit or weave. When
616
            # serializing out to disk and back in, root.revision is always
617
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
618
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
619
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
620
        # directory-path -> inventory-entry for current inventory
621
        self.directory_entries = dict(self.inventory.directories())
622
0.64.14 by Ian Clatworthy
commit of modified files working
623
    def post_process_files(self):
624
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
625
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
626
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
627
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
628
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
629
        committer = self.command.committer
630
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
631
        author = self.command.author
632
        if author is not None:
633
            author_id = "%s <%s>" % (author[0],author[1])
634
            if author_id != who:
635
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
636
        rev = revision.Revision(
637
           timestamp=committer[2],
638
           timezone=committer[3],
639
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
640
           message=self._escape_commit_message(self.command.message),
641
           revision_id=self.revision_id,
642
           properties=rev_props,
643
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
644
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
645
            lambda file_id: self._get_lines(file_id),
646
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
647
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
648
    def _escape_commit_message(self, message):
649
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
650
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
651
        # Code copied from bzrlib.commit.
652
        
653
        # Python strings can include characters that can't be
654
        # represented in well-formed XML; escape characters that
655
        # aren't listed in the XML specification
656
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
657
        message, _ = re.subn(
658
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
659
            lambda match: match.group(0).encode('unicode_escape'),
660
            message)
661
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
662
663
    def modify_handler(self, filecmd):
664
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
665
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
666
        else:
667
            data = filecmd.data
0.64.67 by James Westby
Add support for -Dfast-import.
668
        self.debug("modifying %s", filecmd.path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
669
        self._modify_inventory(filecmd.path, filecmd.kind,
670
            filecmd.is_executable, data)
671
672
    def delete_handler(self, filecmd):
673
        path = filecmd.path
0.64.67 by James Westby
Add support for -Dfast-import.
674
        self.debug("deleting %s", path)
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
675
        fileid = self.bzr_file_id(path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
676
        try:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
677
            del self.inventory[fileid]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
678
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
679
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
680
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
681
            self._warn_unless_in_merges(fileid, path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
682
        try:
683
            self.cache_mgr._delete_path(path)
684
        except KeyError:
685
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
686
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
687
    def _warn_unless_in_merges(self, fileid, path):
688
        if len(self.parents) <= 1:
689
            return
690
        for parent in self.parents[1:]:
691
            if fileid in self.get_inventory(parent):
692
                return
693
        self.warning("ignoring delete of %s as not in parent inventories", path)
694
0.64.5 by Ian Clatworthy
first cut at generic processing method
695
    def copy_handler(self, filecmd):
696
        raise NotImplementedError(self.copy_handler)
697
698
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
699
        old_path = filecmd.old_path
700
        new_path = filecmd.new_path
0.64.67 by James Westby
Add support for -Dfast-import.
701
        self.debug("renaming %s to %s", old_path, new_path)
0.64.16 by Ian Clatworthy
safe processing tweaks
702
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
703
        basename, new_parent_ie = self._ensure_directory(new_path)
704
        new_parent_id = new_parent_ie.file_id
0.64.67 by James Westby
Add support for -Dfast-import.
705
        existing_id = self.inventory.path2id(new_path)
706
        if existing_id is not None:
707
            self.inventory.remove_recursive_id(existing_id)
0.65.4 by James Westby
Make the rename handling more robust.
708
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
709
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
710
711
    def deleteall_handler(self, filecmd):
712
        raise NotImplementedError(self.deleteall_handler)
713
0.64.16 by Ian Clatworthy
safe processing tweaks
714
    def bzr_file_id_and_new(self, path):
715
        """Get a Bazaar file identifier and new flag for a path.
716
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
717
        :return: file_id, is_new where
718
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
719
        """
720
        try:
0.64.67 by James Westby
Add support for -Dfast-import.
721
            id = self.cache_mgr.file_ids[path]
722
            return id, False
0.64.16 by Ian Clatworthy
safe processing tweaks
723
        except KeyError:
724
            id = generate_ids.gen_file_id(path)
725
            self.cache_mgr.file_ids[path] = id
0.64.67 by James Westby
Add support for -Dfast-import.
726
            self.debug("Generated new file id %s for '%s'", id, path)
0.64.16 by Ian Clatworthy
safe processing tweaks
727
            return id, True
728
0.64.5 by Ian Clatworthy
first cut at generic processing method
729
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
730
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
731
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
732
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
733
    def gen_initial_inventory(self):
734
        """Generate an inventory for a parentless revision."""
735
        inv = inventory.Inventory(revision_id=self.revision_id)
736
        return inv
737
0.64.5 by Ian Clatworthy
first cut at generic processing method
738
    def gen_revision_id(self):
739
        """Generate a revision id.
740
741
        Subclasses may override this to produce deterministic ids say.
742
        """
743
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
744
        # Perhaps 'who' being the person running the import is ok? If so,
745
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
746
        who = "%s <%s>" % (committer[0],committer[1])
747
        timestamp = committer[2]
748
        return generate_ids.gen_revision_id(who, timestamp)
749
0.64.7 by Ian Clatworthy
start of multiple commit handling
750
    def get_inventory(self, revision_id):
751
        """Get the inventory for a revision id."""
752
        try:
753
            inv = self.cache_mgr.inventories[revision_id]
754
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
755
            if self.verbose:
756
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
757
            # Not cached so reconstruct from repository
758
            inv = self.repo.revision_tree(revision_id).inventory
759
            self.cache_mgr.inventories[revision_id] = inv
760
        return inv
761
0.64.5 by Ian Clatworthy
first cut at generic processing method
762
    def _get_inventories(self, revision_ids):
763
        """Get the inventories for revision-ids.
764
        
765
        This is a callback used by the RepositoryLoader to
766
        speed up inventory reconstruction."""
767
        present = []
768
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
769
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
770
        # successfully loaded into the repsoitory
771
        for revision_id in revision_ids:
772
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
773
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
774
                present.append(revision_id)
775
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
776
                if self.verbose:
777
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
778
                # Not cached so reconstruct from repository
779
                if self.repo.has_revision(revision_id):
780
                    rev_tree = self.repo.revision_tree(revision_id)
781
                    present.append(revision_id)
782
                else:
783
                    rev_tree = self.repo.revision_tree(None)
784
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
785
                self.cache_mgr.inventories[revision_id] = inv
786
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
787
        return present, inventories
788
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
789
    def _get_lines(self, file_id):
790
        """Get the lines for a file-id."""
791
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
792
793
    def _modify_inventory(self, path, kind, is_executable, data):
794
        """Add to or change an item in the inventory."""
795
        # Create the new InventoryEntry
796
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
797
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
798
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
799
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
800
        if isinstance(ie, inventory.InventoryFile):
801
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
802
            lines = osutils.split_lines(data)
803
            ie.text_sha1 = osutils.sha_strings(lines)
804
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
805
            self.lines_for_commit[file_id] = lines
0.64.73 by James Westby
Correct typo: InventoryLnk -> InventoryLink
806
        elif isinstance(ie, inventory.InventoryLink):
0.64.74 by Ian Clatworthy
fix symlink importing
807
            ie.symlink_target = data.encode('utf8')
808
            # There are no lines stored for a symlink so
809
            # make sure the cache used by get_lines knows that
810
            self.lines_for_commit[file_id] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
811
        else:
812
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
813
                (kind,))
814
0.64.16 by Ian Clatworthy
safe processing tweaks
815
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
816
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
817
            # HACK: no API for this (del+add does more than it needs to)
818
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
819
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
820
        else:
821
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
822
823
    def _ensure_directory(self, path):
824
        """Ensure that the containing directory exists for 'path'"""
825
        dirname, basename = osutils.split(path)
826
        if dirname == '':
827
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
828
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
829
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
830
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
831
        except KeyError:
832
            # We will create this entry, since it doesn't exist
833
            pass
834
        else:
835
            return basename, ie
836
837
        # No directory existed, we will just create one, first, make sure
838
        # the parent exists
839
        dir_basename, parent_ie = self._ensure_directory(dirname)
840
        dir_file_id = self.bzr_file_id(dirname)
841
        ie = inventory.entry_factory['directory'](dir_file_id,
842
                                                  dir_basename,
843
                                                  parent_ie.file_id)
844
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
845
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
846
        # There are no lines stored for a directory so
847
        # make sure the cache used by get_lines knows that
848
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
849
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
850
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
851
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
852
853
0.64.34 by Ian Clatworthy
report lost branches
854
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
855
0.64.64 by Ian Clatworthy
save tags known about in each branch
856
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
857
        """Create an object responsible for updating branches.
858
859
        :param heads_by_ref: a dictionary where
860
          names are git-style references like refs/heads/master;
861
          values are one item lists of commits marks.
862
        """
0.64.37 by Ian Clatworthy
create branches as required
863
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
864
        self.branch = branch
865
        self.cache_mgr = cache_mgr
866
        self.heads_by_ref = heads_by_ref
867
        self.last_ref = last_ref
0.64.64 by Ian Clatworthy
save tags known about in each branch
868
        self.tags = tags
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
869
870
    def update(self):
871
        """Update the Bazaar branches and tips matching the heads.
872
873
        If the repository is shared, this routine creates branches
874
        as required. If it isn't, warnings are produced about the
875
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
876
0.64.34 by Ian Clatworthy
report lost branches
877
        :return: updated, lost_heads where
878
          updated = the list of branches updated
879
          lost_heads = a list of (bazaar-name,revision) for branches that
880
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
881
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
882
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
883
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
884
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
885
            if self._update_branch(br, tip):
886
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
887
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
888
889
    def _get_matching_branches(self):
890
        """Get the Bazaar branches.
891
0.64.34 by Ian Clatworthy
report lost branches
892
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
893
          default_tip = the last commit mark for the default branch
894
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
895
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
896
            would have been created had the repository been shared and
897
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
898
        """
0.64.37 by Ian Clatworthy
create branches as required
899
        branch_tips = []
900
        lost_heads = []
901
        ref_names = self.heads_by_ref.keys()
902
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
903
            trunk = self.select_trunk(ref_names)
904
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
905
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
906
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
907
908
        # Convert the reference names into Bazaar speak
909
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
910
0.64.37 by Ian Clatworthy
create branches as required
911
        # Policy for locating branches
912
        def dir_under_current(name, ref_name):
913
            # Using the Bazaar name, get a directory under the current one
914
            return name
915
        def dir_sister_branch(name, ref_name):
916
            # Using the Bazaar name, get a sister directory to the branch
917
            return osutils.pathjoin(self.branch.base, "..", name)
918
        if self.branch is not None:
919
            dir_policy = dir_sister_branch
920
        else:
921
            dir_policy = dir_under_current
922
0.64.34 by Ian Clatworthy
report lost branches
923
        # Create/track missing branches
924
        shared_repo = self.repo.is_shared()
925
        for name in sorted(bzr_names.keys()):
926
            ref_name = bzr_names[name]
927
            tip = self.heads_by_ref[ref_name][0]
928
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
929
                location = dir_policy(name, ref_name)
930
                try:
931
                    br = self.make_branch(location)
932
                    branch_tips.append((br,tip))
933
                    continue
934
                except errors.BzrError, ex:
935
                    error("ERROR: failed to create branch %s: %s",
936
                        location, ex)
937
            lost_head = self.cache_mgr.revision_ids[tip]
938
            lost_info = (name, lost_head)
939
            lost_heads.append(lost_info)
940
        return branch_tips, lost_heads
941
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
942
    def select_trunk(self, ref_names):
943
        """Given a set of ref names, choose one as the trunk."""
944
        for candidate in ['refs/heads/master']:
945
            if candidate in ref_names:
946
                return candidate
947
        # Use the last reference in the import stream
948
        return self.last_ref
949
0.64.37 by Ian Clatworthy
create branches as required
950
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
951
        """Make a branch in the repository if not already there."""
952
        try:
953
            return bzrdir.BzrDir.open(location).open_branch()
954
        except errors.NotBranchError, ex:
955
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
956
957
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
958
        """Generate Bazaar branch names from import ref names.
959
        
960
        :return: a dictionary with Bazaar names as keys and
961
          the original reference names as values.
962
        """
0.64.34 by Ian Clatworthy
report lost branches
963
        bazaar_names = {}
964
        for ref_name in sorted(ref_names):
965
            parts = ref_name.split('/')
966
            if parts[0] == 'refs':
967
                parts.pop(0)
968
            full_name = "--".join(parts)
969
            bazaar_name = parts[-1]
970
            if bazaar_name in bazaar_names:
971
                bazaar_name = full_name
972
            bazaar_names[bazaar_name] = ref_name
973
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
974
975
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
976
        """Update a branch with last revision and tag information.
977
        
978
        :return: whether the branch was changed or not
979
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
980
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
0.64.64 by Ian Clatworthy
save tags known about in each branch
981
        revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
982
        revno = len(revs)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
983
        existing_revno, existing_last_rev_id = br.last_revision_info()
984
        changed = False
985
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
986
            br.set_last_revision_info(revno, last_rev_id)
987
            changed = True
0.64.64 by Ian Clatworthy
save tags known about in each branch
988
        # apply tags known in this branch
989
        my_tags = {}
990
        if self.tags:
991
            for tag,rev in self.tags.items():
992
                if rev in revs:
993
                    my_tags[tag] = rev
994
            if my_tags:
995
                br.tags._set_tag_dict(my_tags)
996
                changed = True
997
        if changed:
998
            tagno = len(my_tags)
999
            note("\t branch %s now has %d %s and %d %s", br.nick,
1000
                revno, helpers.single_plural(revno, "revision", "revisions"),
1001
                tagno, helpers.single_plural(tagno, "tag", "tags"))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1002
        return changed