/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
26
    errors,
27
    generate_ids,
28
    inventory,
29
    lru_cache,
30
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
31
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
32
    revision,
33
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
34
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
    )
0.64.51 by Ian Clatworthy
disable autopacking
36
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
37
from bzrlib.trace import (
38
    note,
39
    warning,
0.64.37 by Ian Clatworthy
create branches as required
40
    error,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
42
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
43
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
44
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
45
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
47
    processor,
48
    revisionloader,
49
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
50
51
0.64.41 by Ian Clatworthy
update multiple working trees if requested
52
# How many commits before automatically reporting progress
53
_DEFAULT_AUTO_PROGRESS = 1000
54
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
55
# How many commits before automatically checkpointing
56
_DEFAULT_AUTO_CHECKPOINT = 10000
57
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
58
# How many inventories to cache
59
_DEFAULT_INV_CACHE_SIZE = 10
60
0.64.41 by Ian Clatworthy
update multiple working trees if requested
61
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
62
class GenericProcessor(processor.ImportProcessor):
63
    """An import processor that handles basic imports.
64
65
    Current features supported:
66
0.64.16 by Ian Clatworthy
safe processing tweaks
67
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
68
    * files and symlinks commits are supported
69
    * checkpoints automatically happen at a configurable frequency
70
      over and above the stream requested checkpoints
71
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
72
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
73
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
74
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
75
    At checkpoints and on completion, the commit-id -> revision-id map is
76
    saved to a file called 'fastimport-id-map'. If the import crashes
77
    or is interrupted, it can be started again and this file will be
78
    used to skip over already loaded revisions. The format of each line
79
    is "commit-id revision-id" so commit-ids cannot include spaces.
80
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
81
    Here are the supported parameters:
82
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
83
    * info - name of a hints file holding the analysis generated
84
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
85
      importing large repositories, this parameter is needed so
86
      that the importer knows what blobs to intelligently cache.
87
0.64.41 by Ian Clatworthy
update multiple working trees if requested
88
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
89
      By default, the importer updates the repository
90
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
91
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
92
93
    * checkpoint - automatically checkpoint every n commits over and
94
      above any checkpoints contained in the import stream.
95
      The default is 10000.
96
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
97
    * count - only import this many commits then exit. If not set
98
      or negative, all commits are imported.
99
    
100
    * inv-cache - number of inventories to cache.
101
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
102
103
    * experimental - enable experimental mode, i.e. use features
104
      not yet fully tested.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
105
    """
106
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
107
    known_params = [
108
        'info',
109
        'trees',
110
        'checkpoint',
111
        'count',
112
        'inv-cache',
113
        'experimental',
114
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
115
116
    def note(self, msg, *args):
117
        """Output a note but timestamp it."""
118
        msg = "%s %s" % (self._time_of_day(), msg)
119
        note(msg, *args)
120
121
    def warning(self, msg, *args):
122
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
123
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
124
        warning(msg, *args)
125
126
    def _time_of_day(self):
127
        """Time of day as a string."""
128
        # Note: this is a separate method so tests can patch in a fixed value
129
        return time.strftime("%H:%M:%S")
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
130
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
131
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
132
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
133
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
134
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
135
            self.inventory_cache_size)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
136
        self.skip_total = self._init_id_map()
137
        if self.skip_total:
138
            self.note("Found %d commits already loaded - "
139
                "skipping over these ...", self.skip_total)
140
        self._revision_count = 0
141
142
        # mapping of tag name to revision_id
143
        self.tags = {}
144
145
        # Create the revision loader needed for committing
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
146
        if self._experimental:
147
            loader_factory = revisionloader.ExperimentalRevisionLoader
148
        else:
149
            loader_factory = revisionloader.ImportRevisionLoader
150
        self.loader = loader_factory(self.repo, self.inventory_cache_size)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
151
0.64.51 by Ian Clatworthy
disable autopacking
152
        # Disable autopacking if the repo format supports it.
153
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
154
        if isinstance(self.repo, pack_repo.KnitPackRepository):
155
            self._original_max_pack_count = \
156
                self.repo._pack_collection._max_pack_count
157
            def _max_pack_count_for_import(total_revisions):
158
                return total_revisions + 1
159
            self.repo._pack_collection._max_pack_count = \
160
                _max_pack_count_for_import
161
        else:
162
            self._original_max_pack_count = None
163
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
164
        # Create a write group. This is committed at the end of the import.
165
        # Checkpointing closes the current one and starts a new one.
166
        self.repo.start_write_group()
167
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
168
        # Turn on caching for the inventory versioned file
169
        inv_vf = self.repo.get_inventory_weave()
170
        inv_vf.enable_cache()
171
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
172
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
173
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
174
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
175
        # This is currently hard-coded but might be configurable via
176
        # parameters one day if that's needed
177
        repo_transport = self.repo.control_files._transport
178
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
179
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
180
        # Load the info file, if any
181
        info_path = self.params.get('info')
182
        if info_path is not None:
183
            self.info = configobj.ConfigObj(info_path)
184
        else:
185
            self.info = None
186
0.64.41 by Ian Clatworthy
update multiple working trees if requested
187
        # Decide how often to automatically report progress
188
        # (not a parameter yet)
189
        self.progress_every = _DEFAULT_AUTO_PROGRESS
190
        if self.verbose:
191
            self.progress_every = self.progress_every / 10
192
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
193
        # Decide how often to automatically checkpoint
194
        self.checkpoint_every = int(self.params.get('checkpoint',
195
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
196
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
197
        # Decide how big to make the inventory cache
198
        self.inventory_cache_size = int(self.params.get('inv-cache',
199
            _DEFAULT_INV_CACHE_SIZE))
200
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
201
        # Find the maximum number of commits to import (None means all)
202
        # and prepare progress reporting. Just in case the info file
203
        # has an outdated count of commits, we store the max counts
204
        # at which we need to terminate separately to the total used
205
        # for progress tracking.
206
        try:
207
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
208
            if self.max_commits < 0:
209
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
210
        except KeyError:
211
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
212
        if self.info is not None:
213
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
214
            if (self.max_commits is not None and
215
                self.total_commits > self.max_commits):
216
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
217
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
218
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
219
0.64.27 by Ian Clatworthy
1st cut at performance tuning
220
    def _process(self, command_iter):
221
        # if anything goes wrong, abort the write group if any
222
        try:
223
            processor.ImportProcessor._process(self, command_iter)
224
        except:
225
            if self.repo is not None and self.repo.is_in_write_group():
226
                self.repo.abort_write_group()
227
            raise
228
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
229
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
230
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
231
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
232
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
233
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
234
        # Update the branches
235
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
236
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
237
            helpers.invert_dict(self.cache_mgr.heads),
238
            self.cache_mgr.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
239
        branches_updated, branches_lost = updater.update()
240
        self._branch_count = len(branches_updated)
241
242
        # Tell the user about branches that were not created
243
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
244
            if not self.repo.is_shared():
245
                self.warning("Cannot import multiple branches into "
246
                    "an unshared repository")
247
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
248
            for lost_info in branches_lost:
249
                head_revision = lost_info[1]
250
                branch_name = lost_info[0]
251
                note("\t %s = %s", head_revision, branch_name)
252
253
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
254
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
255
        remind_about_update = True
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
256
        if self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
257
            trees = self._get_working_trees(branches_updated)
258
            if trees:
259
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
260
                if self.verbose:
261
                    report = delta._ChangeReporter()
262
                else:
263
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
264
                for wt in trees:
265
                    wt.update(reporter)
266
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
267
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
268
            else:
269
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
270
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
271
272
        # Finish up by telling the user what to do next.
273
        # (These messages are explicitly not timestamped.)
274
        if self._original_max_pack_count:
275
            # We earlier disabled autopacking, creating one pack every
276
            # checkpoint instead. If we checkpointed more than 10 times,
277
            # Bazaar would have auto-packed. For massive repositories,
278
            # this can take a *very* long time so we suggest it to the user
279
            # instead of doing it implicitly.
280
            if self._revision_count >= self.checkpoint_every * 10:
281
                note("To further optimize how data is stored, use 'bzr pack'.")
0.64.34 by Ian Clatworthy
report lost branches
282
        if remind_about_update:
0.64.51 by Ian Clatworthy
disable autopacking
283
            note("To refresh the working tree for a branch, "
284
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
285
286
    def _get_working_trees(self, branches):
287
        """Get the working trees for branches in the repository."""
288
        result = []
289
        wt_expected = self.repo.make_working_trees()
290
        for br in branches:
291
            if br == self.branch and br is not None:
292
                wt = self.working_tree
293
            elif wt_expected:
294
                try:
295
                    wt = br.bzrdir.open_workingtree()
296
                except errors.NoWorkingTree:
297
                    self.warning("No working tree for branch %s", br)
298
                    continue
299
            else:
300
                continue
301
            result.append(wt)
302
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
303
304
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
305
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
306
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
307
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
308
        wtc = self._tree_count
309
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
310
            rc, helpers.single_plural(rc, "revision", "revisions"),
311
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
312
            wtc, helpers.single_plural(wtc, "tree", "trees"),
313
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
314
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
315
    def _init_id_map(self):
316
        """Load the id-map and check it matches the repository.
317
        
318
        :return: the number of entries in the map
319
        """
320
        # Currently, we just check the size. In the future, we might
321
        # decide to be more paranoid and check that the revision-ids
322
        # are identical as well.
323
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
324
            self.id_map_path)
325
        existing_count = len(self.repo.all_revision_ids())
326
        if existing_count != known:
327
            raise plugin_errors.BadRepositorySize(known, existing_count)
328
        return known
329
330
    def _save_id_map(self):
331
        """Save the id-map."""
332
        # Save the whole lot every time. If this proves a problem, we can
333
        # change to 'append just the new ones' at a later time.
334
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
335
0.64.5 by Ian Clatworthy
first cut at generic processing method
336
    def blob_handler(self, cmd):
337
        """Process a BlobCommand."""
338
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
339
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
340
        else:
341
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
342
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
343
344
    def checkpoint_handler(self, cmd):
345
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
346
        # Commit the current write group and start a new one
347
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
348
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
349
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
350
351
    def commit_handler(self, cmd):
352
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
353
        if self.skip_total and self._revision_count < self.skip_total:
354
            _track_heads(cmd, self.cache_mgr)
355
            # Check that we really do know about this commit-id
356
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
357
                raise plugin_errors.BadRestart(cmd.id)
358
            # Consume the file commands and free any non-sticky blobs
359
            for fc in cmd.file_iter():
360
                pass
361
            self.cache_mgr._blobs = {}
362
            self._revision_count += 1
363
            # If we're finished getting back to where we were,
364
            # load the file-ids cache
365
            if self._revision_count == self.skip_total:
366
                self._gen_file_ids_cache()
367
                self.note("Generated the file-ids cache - %d entries",
368
                    len(self.cache_mgr.file_ids.keys()))
369
            return
370
371
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
372
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
373
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
374
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
375
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
376
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
377
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
378
379
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
380
        if (self.max_commits is not None and
381
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
382
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
383
            self.finished = True
384
        elif self._revision_count % self.checkpoint_every == 0:
385
            self.note("%d commits - automatic checkpoint triggered",
386
                self._revision_count)
387
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
388
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
389
    def _gen_file_ids_cache(self):
390
        """Generate the file-id cache by searching repository inventories.
391
        """
392
        # Get the interesting revisions - the heads
393
        head_ids = self.cache_mgr.heads.keys()
394
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
395
396
        # Update the fileid cache
397
        file_ids = {}
398
        for revision_id in revision_ids:
399
            inv = self.repo.revision_tree(revision_id).inventory
400
            # Cache the inventoires while we're at it
401
            self.cache_mgr.inventories[revision_id] = inv
402
            for path, ie in inv.iter_entries():
403
                file_ids[path] = ie.file_id
404
        self.cache_mgr.file_ids = file_ids
405
0.64.25 by Ian Clatworthy
slightly better progress reporting
406
    def report_progress(self, details=''):
407
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
408
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
409
            if self.total_commits is not None:
410
                counts = "%d/%d" % (self._revision_count, self.total_commits)
411
                eta = progress.get_eta(self._start_time, self._revision_count,
412
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
413
                eta_str = progress.str_tdelta(eta)
414
                if eta_str.endswith('--'):
415
                    eta_str = ''
416
                else:
417
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
418
            else:
419
                counts = "%d" % (self._revision_count,)
420
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
421
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
422
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
423
    def progress_handler(self, cmd):
424
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
425
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
426
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
427
428
    def reset_handler(self, cmd):
429
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
430
        if cmd.ref.startswith('refs/tags/'):
431
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
432
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
433
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
434
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
435
436
    def tag_handler(self, cmd):
437
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
438
        self._set_tag(cmd.id, cmd.from_)
439
440
    def _set_tag(self, name, from_):
441
        """Define a tag given a name an import 'from' reference."""
442
        bzr_tag_name = name.decode('utf-8', 'replace')
443
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
444
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
445
446
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
447
class GenericCacheManager(object):
448
    """A manager of caches for the GenericProcessor."""
449
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
450
    def __init__(self, info, verbose=False, inventory_cache_size=10):
451
        """Create a manager of caches.
452
453
        :param info: a ConfigObj holding the output from
454
            the --info processor, or None if no hints are available
455
        """
456
        self.verbose = verbose
457
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
458
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
459
        # Sticky blobs aren't removed after being referenced.
460
        self._blobs = {}
461
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
462
463
        # revision-id -> Inventory cache
464
        # these are large and we probably don't need too many as
465
        # most parents are recent in history
466
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
467
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
468
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
469
        # we need to keep all of these but they are small
470
        self.revision_ids = {}
471
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
472
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
473
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
474
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
475
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
476
        self.last_ref = None
477
        self.last_ids = {}
478
        self.heads = {}
479
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
480
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
481
        self._blobs_to_keep = None
482
        if info is not None:
483
            try:
484
                self._blobs_to_keep = info['Blob usage tracking']['multi']
485
            except KeyError:
486
                # info not in file - possible when no blobs used
487
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
488
489
    def store_blob(self, id, data):
490
        """Store a blob of data."""
491
        if (self._blobs_to_keep is None or data == '' or
492
            id in self._blobs_to_keep):
493
            self._sticky_blobs[id] = data
494
        else:
495
            self._blobs[id] = data
496
497
    def fetch_blob(self, id):
498
        """Fetch a blob of data."""
499
        try:
500
            return self._sticky_blobs[id]
501
        except KeyError:
502
            return self._blobs.pop(id)
503
0.64.16 by Ian Clatworthy
safe processing tweaks
504
    def _delete_path(self, path):
505
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
506
        # we actually want to remember what file-id we gave a path,
507
        # even when that file is deleted, so doing nothing is correct
508
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
509
510
    def _rename_path(self, old_path, new_path):
511
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
512
        # we actually want to remember what file-id we gave a path,
513
        # even when that file is renamed, so both paths should have
514
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
515
        self.file_ids[new_path] = self.file_ids[old_path]
516
517
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
518
def _track_heads(cmd, cache_mgr):
519
    """Track the repository heads given a CommitCommand.
520
    
521
    :return: the list of parents in terms of commit-ids
522
    """
523
    # Get the true set of parents
524
    if cmd.mark is None:
525
        last_id = cache_mgr.last_ids.get(cmd.ref)
526
        if last_id is not None:
527
            parents = [last_id]
528
        else:
529
            parents = []
530
    else:
531
        parents = cmd.parents
532
    # Track the heads
533
    for parent in parents:
534
        try:
535
            del cache_mgr.heads[parent]
536
        except KeyError:
537
            # it's ok if the parent isn't there - another
538
            # commit may have already removed it
539
            pass
540
    cache_mgr.heads[cmd.id] = cmd.ref
541
    cache_mgr.last_ids[cmd.ref] = cmd.id
542
    cache_mgr.last_ref = cmd.ref
543
    return parents
544
545
0.64.5 by Ian Clatworthy
first cut at generic processing method
546
class GenericCommitHandler(processor.CommitHandler):
547
0.64.48 by Ian Clatworthy
one revision loader instance
548
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
549
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
550
        processor.CommitHandler.__init__(self, command)
551
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
552
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
553
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
554
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
555
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
556
0.64.43 by Ian Clatworthy
verbose mode cleanup
557
    def note(self, msg, *args):
558
        """Output a note but add context."""
559
        msg = "%s (%s)" % (msg, self.command.id)
560
        note(msg, *args)
561
562
    def warning(self, msg, *args):
563
        """Output a warning but add context."""
564
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
565
        warning(msg, *args)
566
0.64.5 by Ian Clatworthy
first cut at generic processing method
567
    def pre_process_files(self):
568
        """Prepare for committing."""
569
        self.revision_id = self.gen_revision_id()
570
        self.inv_delta = []
571
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
572
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
573
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
574
        # Track the heads and get the real parent list
575
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
576
0.64.14 by Ian Clatworthy
commit of modified files working
577
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
578
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
579
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
580
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
581
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
582
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
583
0.64.14 by Ian Clatworthy
commit of modified files working
584
        # Seed the inventory from the previous one
585
        if len(self.parents) == 0:
586
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
587
        else:
588
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
589
            inv = self.get_inventory(self.parents[0])
590
            # TODO: Shallow copy - deep inventory copying is expensive
591
            self.inventory = inv.copy()
0.64.13 by Ian Clatworthy
commit of new files working
592
        if not self.repo.supports_rich_root():
593
            # In this repository, root entries have no knit or weave. When
594
            # serializing out to disk and back in, root.revision is always
595
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
596
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
597
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
598
        # directory-path -> inventory-entry for current inventory
599
        self.directory_entries = dict(self.inventory.directories())
600
0.64.14 by Ian Clatworthy
commit of modified files working
601
    def post_process_files(self):
602
        """Save the revision."""
603
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
604
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
605
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
606
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
607
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
608
        committer = self.command.committer
609
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
610
        author = self.command.author
611
        if author is not None:
612
            author_id = "%s <%s>" % (author[0],author[1])
613
            if author_id != who:
614
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
615
        rev = revision.Revision(
616
           timestamp=committer[2],
617
           timezone=committer[3],
618
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
619
           message=self._escape_commit_message(self.command.message),
620
           revision_id=self.revision_id,
621
           properties=rev_props,
622
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
623
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
624
            lambda file_id: self._get_lines(file_id),
625
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
626
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
627
    def _escape_commit_message(self, message):
628
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
629
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
630
        # Code copied from bzrlib.commit.
631
        
632
        # Python strings can include characters that can't be
633
        # represented in well-formed XML; escape characters that
634
        # aren't listed in the XML specification
635
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
636
        message, _ = re.subn(
637
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
638
            lambda match: match.group(0).encode('unicode_escape'),
639
            message)
640
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
641
642
    def modify_handler(self, filecmd):
643
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
644
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
645
        else:
646
            data = filecmd.data
647
        self._modify_inventory(filecmd.path, filecmd.kind,
648
            filecmd.is_executable, data)
649
650
    def delete_handler(self, filecmd):
651
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
652
        try:
653
            del self.inventory[self.bzr_file_id(path)]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
654
        except KeyError:
655
            self.warning("ignoring delete of %s as not in inventory", path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
656
        except errors.NoSuchId:
0.64.43 by Ian Clatworthy
verbose mode cleanup
657
            self.warning("ignoring delete of %s as not in inventory", path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
658
        try:
659
            self.cache_mgr._delete_path(path)
660
        except KeyError:
661
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
662
663
    def copy_handler(self, filecmd):
664
        raise NotImplementedError(self.copy_handler)
665
666
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
667
        old_path = filecmd.old_path
668
        new_path = filecmd.new_path
669
        file_id = self.bzr_file_id(old_path)
670
        ie = self.inventory[file_id]
671
        self.inv_delta.append((old_path, new_path, file_id, ie))
672
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
673
674
    def deleteall_handler(self, filecmd):
675
        raise NotImplementedError(self.deleteall_handler)
676
0.64.16 by Ian Clatworthy
safe processing tweaks
677
    def bzr_file_id_and_new(self, path):
678
        """Get a Bazaar file identifier and new flag for a path.
679
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
680
        :return: file_id, is_new where
681
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
682
        """
683
        try:
684
            return self.cache_mgr.file_ids[path], False
685
        except KeyError:
686
            id = generate_ids.gen_file_id(path)
687
            self.cache_mgr.file_ids[path] = id
688
            return id, True
689
0.64.5 by Ian Clatworthy
first cut at generic processing method
690
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
691
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
692
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
693
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
694
    def gen_initial_inventory(self):
695
        """Generate an inventory for a parentless revision."""
696
        inv = inventory.Inventory(revision_id=self.revision_id)
697
        return inv
698
0.64.5 by Ian Clatworthy
first cut at generic processing method
699
    def gen_revision_id(self):
700
        """Generate a revision id.
701
702
        Subclasses may override this to produce deterministic ids say.
703
        """
704
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
705
        # Perhaps 'who' being the person running the import is ok? If so,
706
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
707
        who = "%s <%s>" % (committer[0],committer[1])
708
        timestamp = committer[2]
709
        return generate_ids.gen_revision_id(who, timestamp)
710
0.64.7 by Ian Clatworthy
start of multiple commit handling
711
    def get_inventory(self, revision_id):
712
        """Get the inventory for a revision id."""
713
        try:
714
            inv = self.cache_mgr.inventories[revision_id]
715
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
716
            if self.verbose:
717
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
718
            # Not cached so reconstruct from repository
719
            inv = self.repo.revision_tree(revision_id).inventory
720
            self.cache_mgr.inventories[revision_id] = inv
721
        return inv
722
0.64.5 by Ian Clatworthy
first cut at generic processing method
723
    def _get_inventories(self, revision_ids):
724
        """Get the inventories for revision-ids.
725
        
726
        This is a callback used by the RepositoryLoader to
727
        speed up inventory reconstruction."""
728
        present = []
729
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
730
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
731
        # successfully loaded into the repsoitory
732
        for revision_id in revision_ids:
733
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
734
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
735
                present.append(revision_id)
736
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
737
                if self.verbose:
738
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
739
                # Not cached so reconstruct from repository
740
                if self.repo.has_revision(revision_id):
741
                    rev_tree = self.repo.revision_tree(revision_id)
742
                    present.append(revision_id)
743
                else:
744
                    rev_tree = self.repo.revision_tree(None)
745
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
746
                self.cache_mgr.inventories[revision_id] = inv
747
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
748
        return present, inventories
749
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
750
    def _get_lines(self, file_id):
751
        """Get the lines for a file-id."""
752
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
753
754
    def _modify_inventory(self, path, kind, is_executable, data):
755
        """Add to or change an item in the inventory."""
756
        # Create the new InventoryEntry
757
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
758
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
759
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
760
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
761
        if isinstance(ie, inventory.InventoryFile):
762
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
763
            lines = osutils.split_lines(data)
764
            ie.text_sha1 = osutils.sha_strings(lines)
765
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
766
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
767
        elif isinstance(ie, inventory.InventoryLnk):
768
            ie.symlink_target = data
769
        else:
770
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
771
                (kind,))
772
0.64.16 by Ian Clatworthy
safe processing tweaks
773
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
774
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
775
            # HACK: no API for this (del+add does more than it needs to)
776
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
777
        else:
778
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
779
780
    def _ensure_directory(self, path):
781
        """Ensure that the containing directory exists for 'path'"""
782
        dirname, basename = osutils.split(path)
783
        if dirname == '':
784
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
785
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
786
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
787
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
788
        except KeyError:
789
            # We will create this entry, since it doesn't exist
790
            pass
791
        else:
792
            return basename, ie
793
794
        # No directory existed, we will just create one, first, make sure
795
        # the parent exists
796
        dir_basename, parent_ie = self._ensure_directory(dirname)
797
        dir_file_id = self.bzr_file_id(dirname)
798
        ie = inventory.entry_factory['directory'](dir_file_id,
799
                                                  dir_basename,
800
                                                  parent_ie.file_id)
801
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
802
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
803
        # There are no lines stored for a directory so
804
        # make sure the cache used by get_lines knows that
805
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
806
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
807
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
808
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
809
810
0.64.34 by Ian Clatworthy
report lost branches
811
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
812
0.64.37 by Ian Clatworthy
create branches as required
813
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
814
        """Create an object responsible for updating branches.
815
816
        :param heads_by_ref: a dictionary where
817
          names are git-style references like refs/heads/master;
818
          values are one item lists of commits marks.
819
        """
0.64.37 by Ian Clatworthy
create branches as required
820
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
821
        self.branch = branch
822
        self.cache_mgr = cache_mgr
823
        self.heads_by_ref = heads_by_ref
824
        self.last_ref = last_ref
825
826
    def update(self):
827
        """Update the Bazaar branches and tips matching the heads.
828
829
        If the repository is shared, this routine creates branches
830
        as required. If it isn't, warnings are produced about the
831
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
832
0.64.34 by Ian Clatworthy
report lost branches
833
        :return: updated, lost_heads where
834
          updated = the list of branches updated
835
          lost_heads = a list of (bazaar-name,revision) for branches that
836
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
837
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
838
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
839
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
840
        for br, tip in branch_tips:
841
            self._update_branch(br, tip)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
842
            updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
843
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
844
845
    def _get_matching_branches(self):
846
        """Get the Bazaar branches.
847
0.64.34 by Ian Clatworthy
report lost branches
848
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
849
          default_tip = the last commit mark for the default branch
850
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
851
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
852
            would have been created had the repository been shared and
853
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
854
        """
0.64.37 by Ian Clatworthy
create branches as required
855
        branch_tips = []
856
        lost_heads = []
857
        ref_names = self.heads_by_ref.keys()
858
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
859
            trunk = self.select_trunk(ref_names)
860
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
861
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
862
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
863
864
        # Convert the reference names into Bazaar speak
865
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
866
0.64.37 by Ian Clatworthy
create branches as required
867
        # Policy for locating branches
868
        def dir_under_current(name, ref_name):
869
            # Using the Bazaar name, get a directory under the current one
870
            return name
871
        def dir_sister_branch(name, ref_name):
872
            # Using the Bazaar name, get a sister directory to the branch
873
            return osutils.pathjoin(self.branch.base, "..", name)
874
        if self.branch is not None:
875
            dir_policy = dir_sister_branch
876
        else:
877
            dir_policy = dir_under_current
878
0.64.34 by Ian Clatworthy
report lost branches
879
        # Create/track missing branches
880
        shared_repo = self.repo.is_shared()
881
        for name in sorted(bzr_names.keys()):
882
            ref_name = bzr_names[name]
883
            tip = self.heads_by_ref[ref_name][0]
884
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
885
                location = dir_policy(name, ref_name)
886
                try:
887
                    br = self.make_branch(location)
888
                    branch_tips.append((br,tip))
889
                    continue
890
                except errors.BzrError, ex:
891
                    error("ERROR: failed to create branch %s: %s",
892
                        location, ex)
893
            lost_head = self.cache_mgr.revision_ids[tip]
894
            lost_info = (name, lost_head)
895
            lost_heads.append(lost_info)
896
        return branch_tips, lost_heads
897
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
898
    def select_trunk(self, ref_names):
899
        """Given a set of ref names, choose one as the trunk."""
900
        for candidate in ['refs/heads/master']:
901
            if candidate in ref_names:
902
                return candidate
903
        # Use the last reference in the import stream
904
        return self.last_ref
905
0.64.37 by Ian Clatworthy
create branches as required
906
    def make_branch(self, location):
907
        """Create a branch in the repository."""
908
        return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
909
910
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
911
        """Generate Bazaar branch names from import ref names.
912
        
913
        :return: a dictionary with Bazaar names as keys and
914
          the original reference names as values.
915
        """
0.64.34 by Ian Clatworthy
report lost branches
916
        bazaar_names = {}
917
        for ref_name in sorted(ref_names):
918
            parts = ref_name.split('/')
919
            if parts[0] == 'refs':
920
                parts.pop(0)
921
            full_name = "--".join(parts)
922
            bazaar_name = parts[-1]
923
            if bazaar_name in bazaar_names:
924
                bazaar_name = full_name
925
            bazaar_names[bazaar_name] = ref_name
926
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
927
928
    def _update_branch(self, br, last_mark):
929
        """Update a branch with last revision and tag information."""
930
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
931
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
932
        br.set_last_revision_info(revno, last_rev_id)
933
        # TODO: apply tags known in this branch
934
        #if self.tags:
935
        #    br.tags._set_tag_dict(self.tags)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
936
        note("\t branch %s has %d revisions", br.nick, revno)