/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
26
    errors,
27
    generate_ids,
28
    inventory,
29
    lru_cache,
30
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
31
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
32
    revision,
33
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
34
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
    )
0.64.51 by Ian Clatworthy
disable autopacking
36
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
37
from bzrlib.trace import (
38
    note,
39
    warning,
0.64.37 by Ian Clatworthy
create branches as required
40
    error,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
42
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
43
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
44
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
45
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
47
    processor,
48
    revisionloader,
49
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
50
51
0.64.41 by Ian Clatworthy
update multiple working trees if requested
52
# How many commits before automatically reporting progress
53
_DEFAULT_AUTO_PROGRESS = 1000
54
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
55
# How many commits before automatically checkpointing
56
_DEFAULT_AUTO_CHECKPOINT = 10000
57
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
58
# How many inventories to cache
59
_DEFAULT_INV_CACHE_SIZE = 10
60
0.64.41 by Ian Clatworthy
update multiple working trees if requested
61
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
62
class GenericProcessor(processor.ImportProcessor):
63
    """An import processor that handles basic imports.
64
65
    Current features supported:
66
0.64.16 by Ian Clatworthy
safe processing tweaks
67
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
68
    * files and symlinks commits are supported
69
    * checkpoints automatically happen at a configurable frequency
70
      over and above the stream requested checkpoints
71
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
72
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
73
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
74
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
75
    At checkpoints and on completion, the commit-id -> revision-id map is
76
    saved to a file called 'fastimport-id-map'. If the import crashes
77
    or is interrupted, it can be started again and this file will be
78
    used to skip over already loaded revisions. The format of each line
79
    is "commit-id revision-id" so commit-ids cannot include spaces.
80
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
81
    Here are the supported parameters:
82
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
83
    * info - name of a hints file holding the analysis generated
84
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
85
      importing large repositories, this parameter is needed so
86
      that the importer knows what blobs to intelligently cache.
87
0.64.41 by Ian Clatworthy
update multiple working trees if requested
88
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
89
      By default, the importer updates the repository
90
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
91
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
92
93
    * checkpoint - automatically checkpoint every n commits over and
94
      above any checkpoints contained in the import stream.
95
      The default is 10000.
96
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
97
    * count - only import this many commits then exit. If not set
98
      or negative, all commits are imported.
99
    
100
    * inv-cache - number of inventories to cache.
101
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
102
103
    * experimental - enable experimental mode, i.e. use features
104
      not yet fully tested.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
105
    """
106
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
107
    known_params = [
108
        'info',
109
        'trees',
110
        'checkpoint',
111
        'count',
112
        'inv-cache',
113
        'experimental',
114
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
115
116
    def note(self, msg, *args):
117
        """Output a note but timestamp it."""
118
        msg = "%s %s" % (self._time_of_day(), msg)
119
        note(msg, *args)
120
121
    def warning(self, msg, *args):
122
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
123
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
124
        warning(msg, *args)
125
126
    def _time_of_day(self):
127
        """Time of day as a string."""
128
        # Note: this is a separate method so tests can patch in a fixed value
129
        return time.strftime("%H:%M:%S")
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
130
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
131
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
132
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
133
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
134
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
135
            self.inventory_cache_size)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
136
        self.skip_total = self._init_id_map()
137
        if self.skip_total:
138
            self.note("Found %d commits already loaded - "
139
                "skipping over these ...", self.skip_total)
140
        self._revision_count = 0
141
142
        # mapping of tag name to revision_id
143
        self.tags = {}
144
145
        # Create the revision loader needed for committing
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
146
        if self._experimental:
147
            loader_factory = revisionloader.ExperimentalRevisionLoader
148
        else:
149
            loader_factory = revisionloader.ImportRevisionLoader
150
        self.loader = loader_factory(self.repo, self.inventory_cache_size)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
151
0.64.51 by Ian Clatworthy
disable autopacking
152
        # Disable autopacking if the repo format supports it.
153
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
154
        if isinstance(self.repo, pack_repo.KnitPackRepository):
155
            self._original_max_pack_count = \
156
                self.repo._pack_collection._max_pack_count
157
            def _max_pack_count_for_import(total_revisions):
158
                return total_revisions + 1
159
            self.repo._pack_collection._max_pack_count = \
160
                _max_pack_count_for_import
161
        else:
162
            self._original_max_pack_count = None
163
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
164
        # Create a write group. This is committed at the end of the import.
165
        # Checkpointing closes the current one and starts a new one.
166
        self.repo.start_write_group()
167
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
168
        # Turn on caching for the inventory versioned file
169
        inv_vf = self.repo.get_inventory_weave()
170
        inv_vf.enable_cache()
171
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
172
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
173
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
174
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
175
        # This is currently hard-coded but might be configurable via
176
        # parameters one day if that's needed
177
        repo_transport = self.repo.control_files._transport
178
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
179
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
180
        # Load the info file, if any
181
        info_path = self.params.get('info')
182
        if info_path is not None:
183
            self.info = configobj.ConfigObj(info_path)
184
        else:
185
            self.info = None
186
0.64.41 by Ian Clatworthy
update multiple working trees if requested
187
        # Decide how often to automatically report progress
188
        # (not a parameter yet)
189
        self.progress_every = _DEFAULT_AUTO_PROGRESS
190
        if self.verbose:
191
            self.progress_every = self.progress_every / 10
192
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
193
        # Decide how often to automatically checkpoint
194
        self.checkpoint_every = int(self.params.get('checkpoint',
195
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
196
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
197
        # Decide how big to make the inventory cache
198
        self.inventory_cache_size = int(self.params.get('inv-cache',
199
            _DEFAULT_INV_CACHE_SIZE))
200
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
201
        # Find the maximum number of commits to import (None means all)
202
        # and prepare progress reporting. Just in case the info file
203
        # has an outdated count of commits, we store the max counts
204
        # at which we need to terminate separately to the total used
205
        # for progress tracking.
206
        try:
207
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
208
            if self.max_commits < 0:
209
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
210
        except KeyError:
211
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
212
        if self.info is not None:
213
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
214
            if (self.max_commits is not None and
215
                self.total_commits > self.max_commits):
216
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
217
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
218
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
219
0.64.27 by Ian Clatworthy
1st cut at performance tuning
220
    def _process(self, command_iter):
221
        # if anything goes wrong, abort the write group if any
222
        try:
223
            processor.ImportProcessor._process(self, command_iter)
224
        except:
225
            if self.repo is not None and self.repo.is_in_write_group():
226
                self.repo.abort_write_group()
227
            raise
228
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
229
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
230
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
231
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
232
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
233
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
234
        # Update the branches
235
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
236
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
237
            helpers.invert_dict(self.cache_mgr.heads),
238
            self.cache_mgr.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
239
        branches_updated, branches_lost = updater.update()
240
        self._branch_count = len(branches_updated)
241
242
        # Tell the user about branches that were not created
243
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
244
            if not self.repo.is_shared():
245
                self.warning("Cannot import multiple branches into "
246
                    "an unshared repository")
247
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
248
            for lost_info in branches_lost:
249
                head_revision = lost_info[1]
250
                branch_name = lost_info[0]
251
                note("\t %s = %s", head_revision, branch_name)
252
253
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
254
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
255
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
256
        if self._branch_count == 0:
257
            self.note("no branches to update")
258
            self.note("no working trees to update")
259
            remind_about_update = False
260
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
261
            trees = self._get_working_trees(branches_updated)
262
            if trees:
263
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
264
                if self.verbose:
265
                    report = delta._ChangeReporter()
266
                else:
267
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
268
                for wt in trees:
269
                    wt.update(reporter)
270
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
271
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
272
            else:
273
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
274
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
275
276
        # Finish up by telling the user what to do next.
277
        # (These messages are explicitly not timestamped.)
278
        if self._original_max_pack_count:
279
            # We earlier disabled autopacking, creating one pack every
280
            # checkpoint instead. If we checkpointed more than 10 times,
281
            # Bazaar would have auto-packed. For massive repositories,
282
            # this can take a *very* long time so we suggest it to the user
283
            # instead of doing it implicitly.
284
            if self._revision_count >= self.checkpoint_every * 10:
285
                note("To further optimize how data is stored, use 'bzr pack'.")
0.64.34 by Ian Clatworthy
report lost branches
286
        if remind_about_update:
0.64.51 by Ian Clatworthy
disable autopacking
287
            note("To refresh the working tree for a branch, "
288
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
289
290
    def _get_working_trees(self, branches):
291
        """Get the working trees for branches in the repository."""
292
        result = []
293
        wt_expected = self.repo.make_working_trees()
294
        for br in branches:
295
            if br == self.branch and br is not None:
296
                wt = self.working_tree
297
            elif wt_expected:
298
                try:
299
                    wt = br.bzrdir.open_workingtree()
300
                except errors.NoWorkingTree:
301
                    self.warning("No working tree for branch %s", br)
302
                    continue
303
            else:
304
                continue
305
            result.append(wt)
306
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
307
308
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
309
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
310
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
311
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
312
        wtc = self._tree_count
313
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
314
            rc, helpers.single_plural(rc, "revision", "revisions"),
315
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
316
            wtc, helpers.single_plural(wtc, "tree", "trees"),
317
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
318
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
319
    def _init_id_map(self):
320
        """Load the id-map and check it matches the repository.
321
        
322
        :return: the number of entries in the map
323
        """
324
        # Currently, we just check the size. In the future, we might
325
        # decide to be more paranoid and check that the revision-ids
326
        # are identical as well.
327
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
328
            self.id_map_path)
329
        existing_count = len(self.repo.all_revision_ids())
330
        if existing_count != known:
331
            raise plugin_errors.BadRepositorySize(known, existing_count)
332
        return known
333
334
    def _save_id_map(self):
335
        """Save the id-map."""
336
        # Save the whole lot every time. If this proves a problem, we can
337
        # change to 'append just the new ones' at a later time.
338
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
339
0.64.5 by Ian Clatworthy
first cut at generic processing method
340
    def blob_handler(self, cmd):
341
        """Process a BlobCommand."""
342
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
343
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
344
        else:
345
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
346
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
347
348
    def checkpoint_handler(self, cmd):
349
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
350
        # Commit the current write group and start a new one
351
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
352
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
353
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
354
355
    def commit_handler(self, cmd):
356
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
357
        if self.skip_total and self._revision_count < self.skip_total:
358
            _track_heads(cmd, self.cache_mgr)
359
            # Check that we really do know about this commit-id
360
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
361
                raise plugin_errors.BadRestart(cmd.id)
362
            # Consume the file commands and free any non-sticky blobs
363
            for fc in cmd.file_iter():
364
                pass
365
            self.cache_mgr._blobs = {}
366
            self._revision_count += 1
367
            # If we're finished getting back to where we were,
368
            # load the file-ids cache
369
            if self._revision_count == self.skip_total:
370
                self._gen_file_ids_cache()
371
                self.note("Generated the file-ids cache - %d entries",
372
                    len(self.cache_mgr.file_ids.keys()))
373
            return
374
375
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
376
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
377
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
378
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
379
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
380
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
381
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
382
383
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
384
        if (self.max_commits is not None and
385
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
386
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
387
            self.finished = True
388
        elif self._revision_count % self.checkpoint_every == 0:
389
            self.note("%d commits - automatic checkpoint triggered",
390
                self._revision_count)
391
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
392
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
393
    def _gen_file_ids_cache(self):
394
        """Generate the file-id cache by searching repository inventories.
395
        """
396
        # Get the interesting revisions - the heads
397
        head_ids = self.cache_mgr.heads.keys()
398
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
399
400
        # Update the fileid cache
401
        file_ids = {}
402
        for revision_id in revision_ids:
403
            inv = self.repo.revision_tree(revision_id).inventory
404
            # Cache the inventoires while we're at it
405
            self.cache_mgr.inventories[revision_id] = inv
406
            for path, ie in inv.iter_entries():
407
                file_ids[path] = ie.file_id
408
        self.cache_mgr.file_ids = file_ids
409
0.64.25 by Ian Clatworthy
slightly better progress reporting
410
    def report_progress(self, details=''):
411
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
412
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
413
            if self.total_commits is not None:
414
                counts = "%d/%d" % (self._revision_count, self.total_commits)
415
                eta = progress.get_eta(self._start_time, self._revision_count,
416
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
417
                eta_str = progress.str_tdelta(eta)
418
                if eta_str.endswith('--'):
419
                    eta_str = ''
420
                else:
421
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
422
            else:
423
                counts = "%d" % (self._revision_count,)
424
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
425
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
426
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
427
    def progress_handler(self, cmd):
428
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
429
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
430
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
431
432
    def reset_handler(self, cmd):
433
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
434
        if cmd.ref.startswith('refs/tags/'):
435
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
436
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
437
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
438
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
439
440
    def tag_handler(self, cmd):
441
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
442
        self._set_tag(cmd.id, cmd.from_)
443
444
    def _set_tag(self, name, from_):
445
        """Define a tag given a name an import 'from' reference."""
446
        bzr_tag_name = name.decode('utf-8', 'replace')
447
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
448
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
449
450
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
451
class GenericCacheManager(object):
452
    """A manager of caches for the GenericProcessor."""
453
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
454
    def __init__(self, info, verbose=False, inventory_cache_size=10):
455
        """Create a manager of caches.
456
457
        :param info: a ConfigObj holding the output from
458
            the --info processor, or None if no hints are available
459
        """
460
        self.verbose = verbose
461
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
462
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
463
        # Sticky blobs aren't removed after being referenced.
464
        self._blobs = {}
465
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
466
467
        # revision-id -> Inventory cache
468
        # these are large and we probably don't need too many as
469
        # most parents are recent in history
470
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
471
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
472
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
473
        # we need to keep all of these but they are small
474
        self.revision_ids = {}
475
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
476
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
477
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
478
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
479
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
480
        self.last_ref = None
481
        self.last_ids = {}
482
        self.heads = {}
483
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
484
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
485
        self._blobs_to_keep = None
486
        if info is not None:
487
            try:
488
                self._blobs_to_keep = info['Blob usage tracking']['multi']
489
            except KeyError:
490
                # info not in file - possible when no blobs used
491
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
492
493
    def store_blob(self, id, data):
494
        """Store a blob of data."""
495
        if (self._blobs_to_keep is None or data == '' or
496
            id in self._blobs_to_keep):
497
            self._sticky_blobs[id] = data
498
        else:
499
            self._blobs[id] = data
500
501
    def fetch_blob(self, id):
502
        """Fetch a blob of data."""
503
        try:
504
            return self._sticky_blobs[id]
505
        except KeyError:
506
            return self._blobs.pop(id)
507
0.64.16 by Ian Clatworthy
safe processing tweaks
508
    def _delete_path(self, path):
509
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
510
        # we actually want to remember what file-id we gave a path,
511
        # even when that file is deleted, so doing nothing is correct
512
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
513
514
    def _rename_path(self, old_path, new_path):
515
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
516
        # we actually want to remember what file-id we gave a path,
517
        # even when that file is renamed, so both paths should have
518
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
519
        self.file_ids[new_path] = self.file_ids[old_path]
520
521
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
522
def _track_heads(cmd, cache_mgr):
523
    """Track the repository heads given a CommitCommand.
524
    
525
    :return: the list of parents in terms of commit-ids
526
    """
527
    # Get the true set of parents
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
528
    if cmd.parents:
529
        parents = cmd.parents
530
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
531
        last_id = cache_mgr.last_ids.get(cmd.ref)
532
        if last_id is not None:
533
            parents = [last_id]
534
        else:
535
            parents = []
536
    # Track the heads
537
    for parent in parents:
538
        try:
539
            del cache_mgr.heads[parent]
540
        except KeyError:
541
            # it's ok if the parent isn't there - another
542
            # commit may have already removed it
543
            pass
544
    cache_mgr.heads[cmd.id] = cmd.ref
545
    cache_mgr.last_ids[cmd.ref] = cmd.id
546
    cache_mgr.last_ref = cmd.ref
547
    return parents
548
549
0.64.5 by Ian Clatworthy
first cut at generic processing method
550
class GenericCommitHandler(processor.CommitHandler):
551
0.64.48 by Ian Clatworthy
one revision loader instance
552
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
553
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
554
        processor.CommitHandler.__init__(self, command)
555
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
556
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
557
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
558
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
559
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
560
0.64.43 by Ian Clatworthy
verbose mode cleanup
561
    def note(self, msg, *args):
562
        """Output a note but add context."""
563
        msg = "%s (%s)" % (msg, self.command.id)
564
        note(msg, *args)
565
566
    def warning(self, msg, *args):
567
        """Output a warning but add context."""
568
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
569
        warning(msg, *args)
570
0.64.5 by Ian Clatworthy
first cut at generic processing method
571
    def pre_process_files(self):
572
        """Prepare for committing."""
573
        self.revision_id = self.gen_revision_id()
574
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
575
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
576
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
577
        # Track the heads and get the real parent list
578
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
579
0.64.14 by Ian Clatworthy
commit of modified files working
580
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
581
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
582
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
583
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
584
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
585
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
586
0.64.14 by Ian Clatworthy
commit of modified files working
587
        # Seed the inventory from the previous one
588
        if len(self.parents) == 0:
589
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
590
        else:
591
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
592
            inv = self.get_inventory(self.parents[0])
593
            # TODO: Shallow copy - deep inventory copying is expensive
594
            self.inventory = inv.copy()
0.64.13 by Ian Clatworthy
commit of new files working
595
        if not self.repo.supports_rich_root():
596
            # In this repository, root entries have no knit or weave. When
597
            # serializing out to disk and back in, root.revision is always
598
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
599
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
600
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
601
        # directory-path -> inventory-entry for current inventory
602
        self.directory_entries = dict(self.inventory.directories())
603
0.64.14 by Ian Clatworthy
commit of modified files working
604
    def post_process_files(self):
605
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
606
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
607
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
608
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
609
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
610
        committer = self.command.committer
611
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
612
        author = self.command.author
613
        if author is not None:
614
            author_id = "%s <%s>" % (author[0],author[1])
615
            if author_id != who:
616
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
617
        rev = revision.Revision(
618
           timestamp=committer[2],
619
           timezone=committer[3],
620
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
621
           message=self._escape_commit_message(self.command.message),
622
           revision_id=self.revision_id,
623
           properties=rev_props,
624
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
625
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
626
            lambda file_id: self._get_lines(file_id),
627
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
628
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
629
    def _escape_commit_message(self, message):
630
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
631
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
632
        # Code copied from bzrlib.commit.
633
        
634
        # Python strings can include characters that can't be
635
        # represented in well-formed XML; escape characters that
636
        # aren't listed in the XML specification
637
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
638
        message, _ = re.subn(
639
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
640
            lambda match: match.group(0).encode('unicode_escape'),
641
            message)
642
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
643
644
    def modify_handler(self, filecmd):
645
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
646
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
647
        else:
648
            data = filecmd.data
649
        self._modify_inventory(filecmd.path, filecmd.kind,
650
            filecmd.is_executable, data)
651
652
    def delete_handler(self, filecmd):
653
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
654
        try:
655
            del self.inventory[self.bzr_file_id(path)]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
656
        except KeyError:
657
            self.warning("ignoring delete of %s as not in inventory", path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
658
        except errors.NoSuchId:
0.64.43 by Ian Clatworthy
verbose mode cleanup
659
            self.warning("ignoring delete of %s as not in inventory", path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
660
        try:
661
            self.cache_mgr._delete_path(path)
662
        except KeyError:
663
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
664
665
    def copy_handler(self, filecmd):
666
        raise NotImplementedError(self.copy_handler)
667
668
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
669
        old_path = filecmd.old_path
670
        new_path = filecmd.new_path
671
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
672
        basename, new_parent_ie = self._ensure_directory(new_path)
673
        new_parent_id = new_parent_ie.file_id
674
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
675
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
676
677
    def deleteall_handler(self, filecmd):
678
        raise NotImplementedError(self.deleteall_handler)
679
0.64.16 by Ian Clatworthy
safe processing tweaks
680
    def bzr_file_id_and_new(self, path):
681
        """Get a Bazaar file identifier and new flag for a path.
682
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
683
        :return: file_id, is_new where
684
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
685
        """
686
        try:
687
            return self.cache_mgr.file_ids[path], False
688
        except KeyError:
689
            id = generate_ids.gen_file_id(path)
690
            self.cache_mgr.file_ids[path] = id
691
            return id, True
692
0.64.5 by Ian Clatworthy
first cut at generic processing method
693
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
694
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
695
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
696
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
697
    def gen_initial_inventory(self):
698
        """Generate an inventory for a parentless revision."""
699
        inv = inventory.Inventory(revision_id=self.revision_id)
700
        return inv
701
0.64.5 by Ian Clatworthy
first cut at generic processing method
702
    def gen_revision_id(self):
703
        """Generate a revision id.
704
705
        Subclasses may override this to produce deterministic ids say.
706
        """
707
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
708
        # Perhaps 'who' being the person running the import is ok? If so,
709
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
710
        who = "%s <%s>" % (committer[0],committer[1])
711
        timestamp = committer[2]
712
        return generate_ids.gen_revision_id(who, timestamp)
713
0.64.7 by Ian Clatworthy
start of multiple commit handling
714
    def get_inventory(self, revision_id):
715
        """Get the inventory for a revision id."""
716
        try:
717
            inv = self.cache_mgr.inventories[revision_id]
718
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
719
            if self.verbose:
720
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
721
            # Not cached so reconstruct from repository
722
            inv = self.repo.revision_tree(revision_id).inventory
723
            self.cache_mgr.inventories[revision_id] = inv
724
        return inv
725
0.64.5 by Ian Clatworthy
first cut at generic processing method
726
    def _get_inventories(self, revision_ids):
727
        """Get the inventories for revision-ids.
728
        
729
        This is a callback used by the RepositoryLoader to
730
        speed up inventory reconstruction."""
731
        present = []
732
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
733
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
734
        # successfully loaded into the repsoitory
735
        for revision_id in revision_ids:
736
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
737
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
738
                present.append(revision_id)
739
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
740
                if self.verbose:
741
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
742
                # Not cached so reconstruct from repository
743
                if self.repo.has_revision(revision_id):
744
                    rev_tree = self.repo.revision_tree(revision_id)
745
                    present.append(revision_id)
746
                else:
747
                    rev_tree = self.repo.revision_tree(None)
748
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
749
                self.cache_mgr.inventories[revision_id] = inv
750
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
751
        return present, inventories
752
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
753
    def _get_lines(self, file_id):
754
        """Get the lines for a file-id."""
755
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
756
757
    def _modify_inventory(self, path, kind, is_executable, data):
758
        """Add to or change an item in the inventory."""
759
        # Create the new InventoryEntry
760
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
761
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
762
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
763
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
764
        if isinstance(ie, inventory.InventoryFile):
765
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
766
            lines = osutils.split_lines(data)
767
            ie.text_sha1 = osutils.sha_strings(lines)
768
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
769
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
770
        elif isinstance(ie, inventory.InventoryLnk):
771
            ie.symlink_target = data
772
        else:
773
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
774
                (kind,))
775
0.64.16 by Ian Clatworthy
safe processing tweaks
776
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
777
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
778
            # HACK: no API for this (del+add does more than it needs to)
779
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
780
        else:
781
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
782
783
    def _ensure_directory(self, path):
784
        """Ensure that the containing directory exists for 'path'"""
785
        dirname, basename = osutils.split(path)
786
        if dirname == '':
787
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
788
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
789
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
790
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
791
        except KeyError:
792
            # We will create this entry, since it doesn't exist
793
            pass
794
        else:
795
            return basename, ie
796
797
        # No directory existed, we will just create one, first, make sure
798
        # the parent exists
799
        dir_basename, parent_ie = self._ensure_directory(dirname)
800
        dir_file_id = self.bzr_file_id(dirname)
801
        ie = inventory.entry_factory['directory'](dir_file_id,
802
                                                  dir_basename,
803
                                                  parent_ie.file_id)
804
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
805
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
806
        # There are no lines stored for a directory so
807
        # make sure the cache used by get_lines knows that
808
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
809
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
810
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
811
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
812
813
0.64.34 by Ian Clatworthy
report lost branches
814
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
815
0.64.37 by Ian Clatworthy
create branches as required
816
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
817
        """Create an object responsible for updating branches.
818
819
        :param heads_by_ref: a dictionary where
820
          names are git-style references like refs/heads/master;
821
          values are one item lists of commits marks.
822
        """
0.64.37 by Ian Clatworthy
create branches as required
823
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
824
        self.branch = branch
825
        self.cache_mgr = cache_mgr
826
        self.heads_by_ref = heads_by_ref
827
        self.last_ref = last_ref
828
829
    def update(self):
830
        """Update the Bazaar branches and tips matching the heads.
831
832
        If the repository is shared, this routine creates branches
833
        as required. If it isn't, warnings are produced about the
834
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
835
0.64.34 by Ian Clatworthy
report lost branches
836
        :return: updated, lost_heads where
837
          updated = the list of branches updated
838
          lost_heads = a list of (bazaar-name,revision) for branches that
839
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
840
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
841
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
842
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
843
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
844
            if self._update_branch(br, tip):
845
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
846
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
847
848
    def _get_matching_branches(self):
849
        """Get the Bazaar branches.
850
0.64.34 by Ian Clatworthy
report lost branches
851
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
852
          default_tip = the last commit mark for the default branch
853
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
854
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
855
            would have been created had the repository been shared and
856
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
857
        """
0.64.37 by Ian Clatworthy
create branches as required
858
        branch_tips = []
859
        lost_heads = []
860
        ref_names = self.heads_by_ref.keys()
861
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
862
            trunk = self.select_trunk(ref_names)
863
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
864
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
865
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
866
867
        # Convert the reference names into Bazaar speak
868
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
869
0.64.37 by Ian Clatworthy
create branches as required
870
        # Policy for locating branches
871
        def dir_under_current(name, ref_name):
872
            # Using the Bazaar name, get a directory under the current one
873
            return name
874
        def dir_sister_branch(name, ref_name):
875
            # Using the Bazaar name, get a sister directory to the branch
876
            return osutils.pathjoin(self.branch.base, "..", name)
877
        if self.branch is not None:
878
            dir_policy = dir_sister_branch
879
        else:
880
            dir_policy = dir_under_current
881
0.64.34 by Ian Clatworthy
report lost branches
882
        # Create/track missing branches
883
        shared_repo = self.repo.is_shared()
884
        for name in sorted(bzr_names.keys()):
885
            ref_name = bzr_names[name]
886
            tip = self.heads_by_ref[ref_name][0]
887
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
888
                location = dir_policy(name, ref_name)
889
                try:
890
                    br = self.make_branch(location)
891
                    branch_tips.append((br,tip))
892
                    continue
893
                except errors.BzrError, ex:
894
                    error("ERROR: failed to create branch %s: %s",
895
                        location, ex)
896
            lost_head = self.cache_mgr.revision_ids[tip]
897
            lost_info = (name, lost_head)
898
            lost_heads.append(lost_info)
899
        return branch_tips, lost_heads
900
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
901
    def select_trunk(self, ref_names):
902
        """Given a set of ref names, choose one as the trunk."""
903
        for candidate in ['refs/heads/master']:
904
            if candidate in ref_names:
905
                return candidate
906
        # Use the last reference in the import stream
907
        return self.last_ref
908
0.64.37 by Ian Clatworthy
create branches as required
909
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
910
        """Make a branch in the repository if not already there."""
911
        try:
912
            return bzrdir.BzrDir.open(location).open_branch()
913
        except errors.NotBranchError, ex:
914
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
915
916
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
917
        """Generate Bazaar branch names from import ref names.
918
        
919
        :return: a dictionary with Bazaar names as keys and
920
          the original reference names as values.
921
        """
0.64.34 by Ian Clatworthy
report lost branches
922
        bazaar_names = {}
923
        for ref_name in sorted(ref_names):
924
            parts = ref_name.split('/')
925
            if parts[0] == 'refs':
926
                parts.pop(0)
927
            full_name = "--".join(parts)
928
            bazaar_name = parts[-1]
929
            if bazaar_name in bazaar_names:
930
                bazaar_name = full_name
931
            bazaar_names[bazaar_name] = ref_name
932
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
933
934
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
935
        """Update a branch with last revision and tag information.
936
        
937
        :return: whether the branch was changed or not
938
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
939
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
940
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
941
        existing_revno, existing_last_rev_id = br.last_revision_info()
942
        changed = False
943
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
944
            br.set_last_revision_info(revno, last_rev_id)
945
            changed = True
946
            note("\t branch %s now has %d revisions", br.nick, revno)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
947
        # TODO: apply tags known in this branch
948
        #if self.tags:
949
        #    br.tags._set_tag_dict(self.tags)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
950
        return changed