/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
26
    errors,
27
    generate_ids,
28
    inventory,
29
    lru_cache,
30
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
31
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
32
    revision,
33
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
34
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
    )
0.64.51 by Ian Clatworthy
disable autopacking
36
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
37
from bzrlib.trace import (
38
    note,
39
    warning,
0.64.37 by Ian Clatworthy
create branches as required
40
    error,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
42
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
43
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
44
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
45
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
47
    processor,
48
    revisionloader,
49
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
50
51
0.64.41 by Ian Clatworthy
update multiple working trees if requested
52
# How many commits before automatically reporting progress
53
_DEFAULT_AUTO_PROGRESS = 1000
54
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
55
# How many commits before automatically checkpointing
56
_DEFAULT_AUTO_CHECKPOINT = 10000
57
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
58
# How many inventories to cache
59
_DEFAULT_INV_CACHE_SIZE = 10
60
0.64.41 by Ian Clatworthy
update multiple working trees if requested
61
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
62
class GenericProcessor(processor.ImportProcessor):
63
    """An import processor that handles basic imports.
64
65
    Current features supported:
66
0.64.16 by Ian Clatworthy
safe processing tweaks
67
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
68
    * files and symlinks commits are supported
69
    * checkpoints automatically happen at a configurable frequency
70
      over and above the stream requested checkpoints
71
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
72
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
73
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
74
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
75
    At checkpoints and on completion, the commit-id -> revision-id map is
76
    saved to a file called 'fastimport-id-map'. If the import crashes
77
    or is interrupted, it can be started again and this file will be
78
    used to skip over already loaded revisions. The format of each line
79
    is "commit-id revision-id" so commit-ids cannot include spaces.
80
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
81
    Here are the supported parameters:
82
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
83
    * info - name of a hints file holding the analysis generated
84
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
85
      importing large repositories, this parameter is needed so
86
      that the importer knows what blobs to intelligently cache.
87
0.64.41 by Ian Clatworthy
update multiple working trees if requested
88
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
89
      By default, the importer updates the repository
90
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
91
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
92
93
    * checkpoint - automatically checkpoint every n commits over and
94
      above any checkpoints contained in the import stream.
95
      The default is 10000.
96
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
97
    * count - only import this many commits then exit. If not set
98
      or negative, all commits are imported.
99
    
100
    * inv-cache - number of inventories to cache.
101
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
102
103
    * experimental - enable experimental mode, i.e. use features
104
      not yet fully tested.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
105
    """
106
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
107
    known_params = [
108
        'info',
109
        'trees',
110
        'checkpoint',
111
        'count',
112
        'inv-cache',
113
        'experimental',
114
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
115
116
    def note(self, msg, *args):
117
        """Output a note but timestamp it."""
118
        msg = "%s %s" % (self._time_of_day(), msg)
119
        note(msg, *args)
120
121
    def warning(self, msg, *args):
122
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
123
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
124
        warning(msg, *args)
125
126
    def _time_of_day(self):
127
        """Time of day as a string."""
128
        # Note: this is a separate method so tests can patch in a fixed value
129
        return time.strftime("%H:%M:%S")
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
130
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
131
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
132
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
133
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
134
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
135
            self.inventory_cache_size)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
136
        self.skip_total = self._init_id_map()
137
        if self.skip_total:
138
            self.note("Found %d commits already loaded - "
139
                "skipping over these ...", self.skip_total)
140
        self._revision_count = 0
141
142
        # mapping of tag name to revision_id
143
        self.tags = {}
144
145
        # Create the revision loader needed for committing
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
146
        if self._experimental:
147
            loader_factory = revisionloader.ExperimentalRevisionLoader
148
        else:
149
            loader_factory = revisionloader.ImportRevisionLoader
150
        self.loader = loader_factory(self.repo, self.inventory_cache_size)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
151
0.64.51 by Ian Clatworthy
disable autopacking
152
        # Disable autopacking if the repo format supports it.
153
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
154
        if isinstance(self.repo, pack_repo.KnitPackRepository):
155
            self._original_max_pack_count = \
156
                self.repo._pack_collection._max_pack_count
157
            def _max_pack_count_for_import(total_revisions):
158
                return total_revisions + 1
159
            self.repo._pack_collection._max_pack_count = \
160
                _max_pack_count_for_import
161
        else:
162
            self._original_max_pack_count = None
163
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
164
        # Create a write group. This is committed at the end of the import.
165
        # Checkpointing closes the current one and starts a new one.
166
        self.repo.start_write_group()
167
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
168
        # Turn on caching for the inventory versioned file
169
        inv_vf = self.repo.get_inventory_weave()
170
        inv_vf.enable_cache()
171
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
172
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
173
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
174
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
175
        # This is currently hard-coded but might be configurable via
176
        # parameters one day if that's needed
177
        repo_transport = self.repo.control_files._transport
178
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
179
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
180
        # Load the info file, if any
181
        info_path = self.params.get('info')
182
        if info_path is not None:
183
            self.info = configobj.ConfigObj(info_path)
184
        else:
185
            self.info = None
186
0.64.41 by Ian Clatworthy
update multiple working trees if requested
187
        # Decide how often to automatically report progress
188
        # (not a parameter yet)
189
        self.progress_every = _DEFAULT_AUTO_PROGRESS
190
        if self.verbose:
191
            self.progress_every = self.progress_every / 10
192
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
193
        # Decide how often to automatically checkpoint
194
        self.checkpoint_every = int(self.params.get('checkpoint',
195
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
196
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
197
        # Decide how big to make the inventory cache
198
        self.inventory_cache_size = int(self.params.get('inv-cache',
199
            _DEFAULT_INV_CACHE_SIZE))
200
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
201
        # Find the maximum number of commits to import (None means all)
202
        # and prepare progress reporting. Just in case the info file
203
        # has an outdated count of commits, we store the max counts
204
        # at which we need to terminate separately to the total used
205
        # for progress tracking.
206
        try:
207
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
208
            if self.max_commits < 0:
209
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
210
        except KeyError:
211
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
212
        if self.info is not None:
213
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
214
            if (self.max_commits is not None and
215
                self.total_commits > self.max_commits):
216
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
217
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
218
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
219
0.64.27 by Ian Clatworthy
1st cut at performance tuning
220
    def _process(self, command_iter):
221
        # if anything goes wrong, abort the write group if any
222
        try:
223
            processor.ImportProcessor._process(self, command_iter)
224
        except:
225
            if self.repo is not None and self.repo.is_in_write_group():
226
                self.repo.abort_write_group()
227
            raise
228
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
229
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
230
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
231
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
232
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
233
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
234
        # Update the branches
235
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
236
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
237
            helpers.invert_dict(self.cache_mgr.heads),
238
            self.cache_mgr.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
239
        branches_updated, branches_lost = updater.update()
240
        self._branch_count = len(branches_updated)
241
242
        # Tell the user about branches that were not created
243
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
244
            if not self.repo.is_shared():
245
                self.warning("Cannot import multiple branches into "
246
                    "an unshared repository")
247
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
248
            for lost_info in branches_lost:
249
                head_revision = lost_info[1]
250
                branch_name = lost_info[0]
251
                note("\t %s = %s", head_revision, branch_name)
252
253
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
254
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
255
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
256
        if self._branch_count == 0:
257
            self.note("no branches to update")
258
            self.note("no working trees to update")
259
            remind_about_update = False
260
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
261
            trees = self._get_working_trees(branches_updated)
262
            if trees:
263
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
264
                if self.verbose:
265
                    report = delta._ChangeReporter()
266
                else:
267
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
268
                for wt in trees:
269
                    wt.update(reporter)
270
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
271
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
272
            else:
273
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
274
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
275
276
        # Finish up by telling the user what to do next.
277
        # (These messages are explicitly not timestamped.)
278
        if self._original_max_pack_count:
279
            # We earlier disabled autopacking, creating one pack every
280
            # checkpoint instead. If we checkpointed more than 10 times,
281
            # Bazaar would have auto-packed. For massive repositories,
282
            # this can take a *very* long time so we suggest it to the user
283
            # instead of doing it implicitly.
284
            if self._revision_count >= self.checkpoint_every * 10:
285
                note("To further optimize how data is stored, use 'bzr pack'.")
0.64.34 by Ian Clatworthy
report lost branches
286
        if remind_about_update:
0.64.51 by Ian Clatworthy
disable autopacking
287
            note("To refresh the working tree for a branch, "
288
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
289
290
    def _get_working_trees(self, branches):
291
        """Get the working trees for branches in the repository."""
292
        result = []
293
        wt_expected = self.repo.make_working_trees()
294
        for br in branches:
295
            if br == self.branch and br is not None:
296
                wt = self.working_tree
297
            elif wt_expected:
298
                try:
299
                    wt = br.bzrdir.open_workingtree()
300
                except errors.NoWorkingTree:
301
                    self.warning("No working tree for branch %s", br)
302
                    continue
303
            else:
304
                continue
305
            result.append(wt)
306
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
307
308
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
309
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
310
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
311
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
312
        wtc = self._tree_count
313
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
314
            rc, helpers.single_plural(rc, "revision", "revisions"),
315
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
316
            wtc, helpers.single_plural(wtc, "tree", "trees"),
317
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
318
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
319
    def _init_id_map(self):
320
        """Load the id-map and check it matches the repository.
321
        
322
        :return: the number of entries in the map
323
        """
324
        # Currently, we just check the size. In the future, we might
325
        # decide to be more paranoid and check that the revision-ids
326
        # are identical as well.
327
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
328
            self.id_map_path)
329
        existing_count = len(self.repo.all_revision_ids())
330
        if existing_count != known:
331
            raise plugin_errors.BadRepositorySize(known, existing_count)
332
        return known
333
334
    def _save_id_map(self):
335
        """Save the id-map."""
336
        # Save the whole lot every time. If this proves a problem, we can
337
        # change to 'append just the new ones' at a later time.
338
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
339
0.64.5 by Ian Clatworthy
first cut at generic processing method
340
    def blob_handler(self, cmd):
341
        """Process a BlobCommand."""
342
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
343
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
344
        else:
345
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
346
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
347
348
    def checkpoint_handler(self, cmd):
349
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
350
        # Commit the current write group and start a new one
351
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
352
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
353
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
354
355
    def commit_handler(self, cmd):
356
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
357
        if self.skip_total and self._revision_count < self.skip_total:
358
            _track_heads(cmd, self.cache_mgr)
359
            # Check that we really do know about this commit-id
360
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
361
                raise plugin_errors.BadRestart(cmd.id)
362
            # Consume the file commands and free any non-sticky blobs
363
            for fc in cmd.file_iter():
364
                pass
365
            self.cache_mgr._blobs = {}
366
            self._revision_count += 1
367
            # If we're finished getting back to where we were,
368
            # load the file-ids cache
369
            if self._revision_count == self.skip_total:
370
                self._gen_file_ids_cache()
371
                self.note("Generated the file-ids cache - %d entries",
372
                    len(self.cache_mgr.file_ids.keys()))
373
            return
374
375
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
376
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
377
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
378
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
379
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
380
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
381
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
382
383
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
384
        if (self.max_commits is not None and
385
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
386
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
387
            self.finished = True
388
        elif self._revision_count % self.checkpoint_every == 0:
389
            self.note("%d commits - automatic checkpoint triggered",
390
                self._revision_count)
391
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
392
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
393
    def _gen_file_ids_cache(self):
394
        """Generate the file-id cache by searching repository inventories.
395
        """
396
        # Get the interesting revisions - the heads
397
        head_ids = self.cache_mgr.heads.keys()
398
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
399
400
        # Update the fileid cache
401
        file_ids = {}
402
        for revision_id in revision_ids:
403
            inv = self.repo.revision_tree(revision_id).inventory
404
            # Cache the inventoires while we're at it
405
            self.cache_mgr.inventories[revision_id] = inv
406
            for path, ie in inv.iter_entries():
407
                file_ids[path] = ie.file_id
408
        self.cache_mgr.file_ids = file_ids
409
0.64.25 by Ian Clatworthy
slightly better progress reporting
410
    def report_progress(self, details=''):
411
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
412
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
413
            if self.total_commits is not None:
414
                counts = "%d/%d" % (self._revision_count, self.total_commits)
415
                eta = progress.get_eta(self._start_time, self._revision_count,
416
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
417
                eta_str = progress.str_tdelta(eta)
418
                if eta_str.endswith('--'):
419
                    eta_str = ''
420
                else:
421
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
422
            else:
423
                counts = "%d" % (self._revision_count,)
424
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
425
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
426
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
427
    def progress_handler(self, cmd):
428
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
429
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
430
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
431
432
    def reset_handler(self, cmd):
433
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
434
        if cmd.ref.startswith('refs/tags/'):
435
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
436
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
437
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
438
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
439
440
    def tag_handler(self, cmd):
441
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
442
        self._set_tag(cmd.id, cmd.from_)
443
444
    def _set_tag(self, name, from_):
445
        """Define a tag given a name an import 'from' reference."""
446
        bzr_tag_name = name.decode('utf-8', 'replace')
447
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
448
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
449
450
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
451
class GenericCacheManager(object):
452
    """A manager of caches for the GenericProcessor."""
453
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
454
    def __init__(self, info, verbose=False, inventory_cache_size=10):
455
        """Create a manager of caches.
456
457
        :param info: a ConfigObj holding the output from
458
            the --info processor, or None if no hints are available
459
        """
460
        self.verbose = verbose
461
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
462
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
463
        # Sticky blobs aren't removed after being referenced.
464
        self._blobs = {}
465
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
466
467
        # revision-id -> Inventory cache
468
        # these are large and we probably don't need too many as
469
        # most parents are recent in history
470
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
471
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
472
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
473
        # we need to keep all of these but they are small
474
        self.revision_ids = {}
475
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
476
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
477
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
478
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
479
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
480
        self.last_ref = None
481
        self.last_ids = {}
482
        self.heads = {}
483
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
484
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
485
        self._blobs_to_keep = None
486
        if info is not None:
487
            try:
488
                self._blobs_to_keep = info['Blob usage tracking']['multi']
489
            except KeyError:
490
                # info not in file - possible when no blobs used
491
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
492
493
    def store_blob(self, id, data):
494
        """Store a blob of data."""
495
        if (self._blobs_to_keep is None or data == '' or
496
            id in self._blobs_to_keep):
497
            self._sticky_blobs[id] = data
498
        else:
499
            self._blobs[id] = data
500
501
    def fetch_blob(self, id):
502
        """Fetch a blob of data."""
503
        try:
504
            return self._sticky_blobs[id]
505
        except KeyError:
506
            return self._blobs.pop(id)
507
0.64.16 by Ian Clatworthy
safe processing tweaks
508
    def _delete_path(self, path):
509
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
510
        # we actually want to remember what file-id we gave a path,
511
        # even when that file is deleted, so doing nothing is correct
512
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
513
514
    def _rename_path(self, old_path, new_path):
515
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
516
        # we actually want to remember what file-id we gave a path,
517
        # even when that file is renamed, so both paths should have
518
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
519
        self.file_ids[new_path] = self.file_ids[old_path]
520
521
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
522
def _track_heads(cmd, cache_mgr):
523
    """Track the repository heads given a CommitCommand.
524
    
525
    :return: the list of parents in terms of commit-ids
526
    """
527
    # Get the true set of parents
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
528
    if cmd.parents:
529
        parents = cmd.parents
530
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
531
        last_id = cache_mgr.last_ids.get(cmd.ref)
532
        if last_id is not None:
533
            parents = [last_id]
534
        else:
535
            parents = []
536
    # Track the heads
537
    for parent in parents:
538
        try:
539
            del cache_mgr.heads[parent]
540
        except KeyError:
541
            # it's ok if the parent isn't there - another
542
            # commit may have already removed it
543
            pass
544
    cache_mgr.heads[cmd.id] = cmd.ref
545
    cache_mgr.last_ids[cmd.ref] = cmd.id
546
    cache_mgr.last_ref = cmd.ref
547
    return parents
548
549
0.64.5 by Ian Clatworthy
first cut at generic processing method
550
class GenericCommitHandler(processor.CommitHandler):
551
0.64.48 by Ian Clatworthy
one revision loader instance
552
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
553
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
554
        processor.CommitHandler.__init__(self, command)
555
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
556
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
557
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
558
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
559
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
560
0.64.43 by Ian Clatworthy
verbose mode cleanup
561
    def note(self, msg, *args):
562
        """Output a note but add context."""
563
        msg = "%s (%s)" % (msg, self.command.id)
564
        note(msg, *args)
565
566
    def warning(self, msg, *args):
567
        """Output a warning but add context."""
568
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
569
        warning(msg, *args)
570
0.64.5 by Ian Clatworthy
first cut at generic processing method
571
    def pre_process_files(self):
572
        """Prepare for committing."""
573
        self.revision_id = self.gen_revision_id()
574
        self.inv_delta = []
575
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
576
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
577
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
578
        # Track the heads and get the real parent list
579
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
580
0.64.14 by Ian Clatworthy
commit of modified files working
581
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
582
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
583
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
584
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
585
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
586
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
587
0.64.14 by Ian Clatworthy
commit of modified files working
588
        # Seed the inventory from the previous one
589
        if len(self.parents) == 0:
590
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
591
        else:
592
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
593
            inv = self.get_inventory(self.parents[0])
594
            # TODO: Shallow copy - deep inventory copying is expensive
595
            self.inventory = inv.copy()
0.64.13 by Ian Clatworthy
commit of new files working
596
        if not self.repo.supports_rich_root():
597
            # In this repository, root entries have no knit or weave. When
598
            # serializing out to disk and back in, root.revision is always
599
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
600
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
601
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
602
        # directory-path -> inventory-entry for current inventory
603
        self.directory_entries = dict(self.inventory.directories())
604
0.64.14 by Ian Clatworthy
commit of modified files working
605
    def post_process_files(self):
606
        """Save the revision."""
607
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
608
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
609
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
610
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
611
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
612
        committer = self.command.committer
613
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
614
        author = self.command.author
615
        if author is not None:
616
            author_id = "%s <%s>" % (author[0],author[1])
617
            if author_id != who:
618
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
619
        rev = revision.Revision(
620
           timestamp=committer[2],
621
           timezone=committer[3],
622
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
623
           message=self._escape_commit_message(self.command.message),
624
           revision_id=self.revision_id,
625
           properties=rev_props,
626
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
627
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
628
            lambda file_id: self._get_lines(file_id),
629
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
630
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
631
    def _escape_commit_message(self, message):
632
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
633
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
634
        # Code copied from bzrlib.commit.
635
        
636
        # Python strings can include characters that can't be
637
        # represented in well-formed XML; escape characters that
638
        # aren't listed in the XML specification
639
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
640
        message, _ = re.subn(
641
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
642
            lambda match: match.group(0).encode('unicode_escape'),
643
            message)
644
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
645
646
    def modify_handler(self, filecmd):
647
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
648
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
649
        else:
650
            data = filecmd.data
651
        self._modify_inventory(filecmd.path, filecmd.kind,
652
            filecmd.is_executable, data)
653
654
    def delete_handler(self, filecmd):
655
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
656
        try:
657
            del self.inventory[self.bzr_file_id(path)]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
658
        except KeyError:
659
            self.warning("ignoring delete of %s as not in inventory", path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
660
        except errors.NoSuchId:
0.64.43 by Ian Clatworthy
verbose mode cleanup
661
            self.warning("ignoring delete of %s as not in inventory", path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
662
        try:
663
            self.cache_mgr._delete_path(path)
664
        except KeyError:
665
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
666
667
    def copy_handler(self, filecmd):
668
        raise NotImplementedError(self.copy_handler)
669
670
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
671
        old_path = filecmd.old_path
672
        new_path = filecmd.new_path
673
        file_id = self.bzr_file_id(old_path)
674
        ie = self.inventory[file_id]
675
        self.inv_delta.append((old_path, new_path, file_id, ie))
676
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
677
678
    def deleteall_handler(self, filecmd):
679
        raise NotImplementedError(self.deleteall_handler)
680
0.64.16 by Ian Clatworthy
safe processing tweaks
681
    def bzr_file_id_and_new(self, path):
682
        """Get a Bazaar file identifier and new flag for a path.
683
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
684
        :return: file_id, is_new where
685
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
686
        """
687
        try:
688
            return self.cache_mgr.file_ids[path], False
689
        except KeyError:
690
            id = generate_ids.gen_file_id(path)
691
            self.cache_mgr.file_ids[path] = id
692
            return id, True
693
0.64.5 by Ian Clatworthy
first cut at generic processing method
694
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
695
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
696
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
697
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
698
    def gen_initial_inventory(self):
699
        """Generate an inventory for a parentless revision."""
700
        inv = inventory.Inventory(revision_id=self.revision_id)
701
        return inv
702
0.64.5 by Ian Clatworthy
first cut at generic processing method
703
    def gen_revision_id(self):
704
        """Generate a revision id.
705
706
        Subclasses may override this to produce deterministic ids say.
707
        """
708
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
709
        # Perhaps 'who' being the person running the import is ok? If so,
710
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
711
        who = "%s <%s>" % (committer[0],committer[1])
712
        timestamp = committer[2]
713
        return generate_ids.gen_revision_id(who, timestamp)
714
0.64.7 by Ian Clatworthy
start of multiple commit handling
715
    def get_inventory(self, revision_id):
716
        """Get the inventory for a revision id."""
717
        try:
718
            inv = self.cache_mgr.inventories[revision_id]
719
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
720
            if self.verbose:
721
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
722
            # Not cached so reconstruct from repository
723
            inv = self.repo.revision_tree(revision_id).inventory
724
            self.cache_mgr.inventories[revision_id] = inv
725
        return inv
726
0.64.5 by Ian Clatworthy
first cut at generic processing method
727
    def _get_inventories(self, revision_ids):
728
        """Get the inventories for revision-ids.
729
        
730
        This is a callback used by the RepositoryLoader to
731
        speed up inventory reconstruction."""
732
        present = []
733
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
734
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
735
        # successfully loaded into the repsoitory
736
        for revision_id in revision_ids:
737
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
738
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
739
                present.append(revision_id)
740
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
741
                if self.verbose:
742
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
743
                # Not cached so reconstruct from repository
744
                if self.repo.has_revision(revision_id):
745
                    rev_tree = self.repo.revision_tree(revision_id)
746
                    present.append(revision_id)
747
                else:
748
                    rev_tree = self.repo.revision_tree(None)
749
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
750
                self.cache_mgr.inventories[revision_id] = inv
751
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
752
        return present, inventories
753
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
754
    def _get_lines(self, file_id):
755
        """Get the lines for a file-id."""
756
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
757
758
    def _modify_inventory(self, path, kind, is_executable, data):
759
        """Add to or change an item in the inventory."""
760
        # Create the new InventoryEntry
761
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
762
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
763
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
764
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
765
        if isinstance(ie, inventory.InventoryFile):
766
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
767
            lines = osutils.split_lines(data)
768
            ie.text_sha1 = osutils.sha_strings(lines)
769
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
770
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
771
        elif isinstance(ie, inventory.InventoryLnk):
772
            ie.symlink_target = data
773
        else:
774
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
775
                (kind,))
776
0.64.16 by Ian Clatworthy
safe processing tweaks
777
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
778
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
779
            # HACK: no API for this (del+add does more than it needs to)
780
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
781
        else:
782
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
783
784
    def _ensure_directory(self, path):
785
        """Ensure that the containing directory exists for 'path'"""
786
        dirname, basename = osutils.split(path)
787
        if dirname == '':
788
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
789
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
790
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
791
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
792
        except KeyError:
793
            # We will create this entry, since it doesn't exist
794
            pass
795
        else:
796
            return basename, ie
797
798
        # No directory existed, we will just create one, first, make sure
799
        # the parent exists
800
        dir_basename, parent_ie = self._ensure_directory(dirname)
801
        dir_file_id = self.bzr_file_id(dirname)
802
        ie = inventory.entry_factory['directory'](dir_file_id,
803
                                                  dir_basename,
804
                                                  parent_ie.file_id)
805
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
806
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
807
        # There are no lines stored for a directory so
808
        # make sure the cache used by get_lines knows that
809
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
810
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
811
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
812
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
813
814
0.64.34 by Ian Clatworthy
report lost branches
815
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
816
0.64.37 by Ian Clatworthy
create branches as required
817
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
818
        """Create an object responsible for updating branches.
819
820
        :param heads_by_ref: a dictionary where
821
          names are git-style references like refs/heads/master;
822
          values are one item lists of commits marks.
823
        """
0.64.37 by Ian Clatworthy
create branches as required
824
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
825
        self.branch = branch
826
        self.cache_mgr = cache_mgr
827
        self.heads_by_ref = heads_by_ref
828
        self.last_ref = last_ref
829
830
    def update(self):
831
        """Update the Bazaar branches and tips matching the heads.
832
833
        If the repository is shared, this routine creates branches
834
        as required. If it isn't, warnings are produced about the
835
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
836
0.64.34 by Ian Clatworthy
report lost branches
837
        :return: updated, lost_heads where
838
          updated = the list of branches updated
839
          lost_heads = a list of (bazaar-name,revision) for branches that
840
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
841
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
842
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
843
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
844
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
845
            if self._update_branch(br, tip):
846
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
847
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
848
849
    def _get_matching_branches(self):
850
        """Get the Bazaar branches.
851
0.64.34 by Ian Clatworthy
report lost branches
852
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
853
          default_tip = the last commit mark for the default branch
854
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
855
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
856
            would have been created had the repository been shared and
857
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
858
        """
0.64.37 by Ian Clatworthy
create branches as required
859
        branch_tips = []
860
        lost_heads = []
861
        ref_names = self.heads_by_ref.keys()
862
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
863
            trunk = self.select_trunk(ref_names)
864
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
865
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
866
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
867
868
        # Convert the reference names into Bazaar speak
869
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
870
0.64.37 by Ian Clatworthy
create branches as required
871
        # Policy for locating branches
872
        def dir_under_current(name, ref_name):
873
            # Using the Bazaar name, get a directory under the current one
874
            return name
875
        def dir_sister_branch(name, ref_name):
876
            # Using the Bazaar name, get a sister directory to the branch
877
            return osutils.pathjoin(self.branch.base, "..", name)
878
        if self.branch is not None:
879
            dir_policy = dir_sister_branch
880
        else:
881
            dir_policy = dir_under_current
882
0.64.34 by Ian Clatworthy
report lost branches
883
        # Create/track missing branches
884
        shared_repo = self.repo.is_shared()
885
        for name in sorted(bzr_names.keys()):
886
            ref_name = bzr_names[name]
887
            tip = self.heads_by_ref[ref_name][0]
888
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
889
                location = dir_policy(name, ref_name)
890
                try:
891
                    br = self.make_branch(location)
892
                    branch_tips.append((br,tip))
893
                    continue
894
                except errors.BzrError, ex:
895
                    error("ERROR: failed to create branch %s: %s",
896
                        location, ex)
897
            lost_head = self.cache_mgr.revision_ids[tip]
898
            lost_info = (name, lost_head)
899
            lost_heads.append(lost_info)
900
        return branch_tips, lost_heads
901
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
902
    def select_trunk(self, ref_names):
903
        """Given a set of ref names, choose one as the trunk."""
904
        for candidate in ['refs/heads/master']:
905
            if candidate in ref_names:
906
                return candidate
907
        # Use the last reference in the import stream
908
        return self.last_ref
909
0.64.37 by Ian Clatworthy
create branches as required
910
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
911
        """Make a branch in the repository if not already there."""
912
        try:
913
            return bzrdir.BzrDir.open(location).open_branch()
914
        except errors.NotBranchError, ex:
915
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
916
917
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
918
        """Generate Bazaar branch names from import ref names.
919
        
920
        :return: a dictionary with Bazaar names as keys and
921
          the original reference names as values.
922
        """
0.64.34 by Ian Clatworthy
report lost branches
923
        bazaar_names = {}
924
        for ref_name in sorted(ref_names):
925
            parts = ref_name.split('/')
926
            if parts[0] == 'refs':
927
                parts.pop(0)
928
            full_name = "--".join(parts)
929
            bazaar_name = parts[-1]
930
            if bazaar_name in bazaar_names:
931
                bazaar_name = full_name
932
            bazaar_names[bazaar_name] = ref_name
933
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
934
935
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
936
        """Update a branch with last revision and tag information.
937
        
938
        :return: whether the branch was changed or not
939
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
940
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
941
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
942
        existing_revno, existing_last_rev_id = br.last_revision_info()
943
        changed = False
944
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
945
            br.set_last_revision_info(revno, last_rev_id)
946
            changed = True
947
            note("\t branch %s now has %d revisions", br.nick, revno)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
948
        # TODO: apply tags known in this branch
949
        #if self.tags:
950
        #    br.tags._set_tag_dict(self.tags)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
951
        return changed