/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
26
    errors,
27
    generate_ids,
28
    inventory,
29
    lru_cache,
30
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
31
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
32
    revision,
33
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
34
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
    )
0.64.51 by Ian Clatworthy
disable autopacking
36
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
37
from bzrlib.trace import (
38
    note,
39
    warning,
0.64.37 by Ian Clatworthy
create branches as required
40
    error,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
42
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
43
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
44
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
45
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
47
    processor,
48
    revisionloader,
49
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
50
51
0.64.41 by Ian Clatworthy
update multiple working trees if requested
52
# How many commits before automatically reporting progress
53
_DEFAULT_AUTO_PROGRESS = 1000
54
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
55
# How many commits before automatically checkpointing
56
_DEFAULT_AUTO_CHECKPOINT = 10000
57
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
58
# How many inventories to cache
59
_DEFAULT_INV_CACHE_SIZE = 10
60
0.64.41 by Ian Clatworthy
update multiple working trees if requested
61
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
62
class GenericProcessor(processor.ImportProcessor):
63
    """An import processor that handles basic imports.
64
65
    Current features supported:
66
0.64.16 by Ian Clatworthy
safe processing tweaks
67
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
68
    * files and symlinks commits are supported
69
    * checkpoints automatically happen at a configurable frequency
70
      over and above the stream requested checkpoints
71
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
72
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
73
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
74
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
75
    At checkpoints and on completion, the commit-id -> revision-id map is
76
    saved to a file called 'fastimport-id-map'. If the import crashes
77
    or is interrupted, it can be started again and this file will be
78
    used to skip over already loaded revisions. The format of each line
79
    is "commit-id revision-id" so commit-ids cannot include spaces.
80
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
81
    Here are the supported parameters:
82
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
83
    * info - name of a hints file holding the analysis generated
84
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
85
      importing large repositories, this parameter is needed so
86
      that the importer knows what blobs to intelligently cache.
87
0.64.41 by Ian Clatworthy
update multiple working trees if requested
88
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
89
      By default, the importer updates the repository
90
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
91
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
92
93
    * checkpoint - automatically checkpoint every n commits over and
94
      above any checkpoints contained in the import stream.
95
      The default is 10000.
96
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
97
    * count - only import this many commits then exit. If not set
98
      or negative, all commits are imported.
99
    
100
    * inv-cache - number of inventories to cache.
101
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
102
103
    * experimental - enable experimental mode, i.e. use features
104
      not yet fully tested.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
105
    """
106
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
107
    known_params = [
108
        'info',
109
        'trees',
110
        'checkpoint',
111
        'count',
112
        'inv-cache',
113
        'experimental',
114
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
115
116
    def note(self, msg, *args):
117
        """Output a note but timestamp it."""
118
        msg = "%s %s" % (self._time_of_day(), msg)
119
        note(msg, *args)
120
121
    def warning(self, msg, *args):
122
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
123
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
124
        warning(msg, *args)
125
126
    def _time_of_day(self):
127
        """Time of day as a string."""
128
        # Note: this is a separate method so tests can patch in a fixed value
129
        return time.strftime("%H:%M:%S")
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
130
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
131
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
132
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
133
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
134
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
135
            self.inventory_cache_size)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
136
        self.skip_total = self._init_id_map()
137
        if self.skip_total:
138
            self.note("Found %d commits already loaded - "
139
                "skipping over these ...", self.skip_total)
140
        self._revision_count = 0
141
142
        # mapping of tag name to revision_id
143
        self.tags = {}
144
145
        # Create the revision loader needed for committing
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
146
        if self._experimental:
147
            loader_factory = revisionloader.ExperimentalRevisionLoader
148
        else:
149
            loader_factory = revisionloader.ImportRevisionLoader
150
        self.loader = loader_factory(self.repo, self.inventory_cache_size)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
151
0.64.51 by Ian Clatworthy
disable autopacking
152
        # Disable autopacking if the repo format supports it.
153
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
154
        if isinstance(self.repo, pack_repo.KnitPackRepository):
155
            self._original_max_pack_count = \
156
                self.repo._pack_collection._max_pack_count
157
            def _max_pack_count_for_import(total_revisions):
158
                return total_revisions + 1
159
            self.repo._pack_collection._max_pack_count = \
160
                _max_pack_count_for_import
161
        else:
162
            self._original_max_pack_count = None
163
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
164
        # Create a write group. This is committed at the end of the import.
165
        # Checkpointing closes the current one and starts a new one.
166
        self.repo.start_write_group()
167
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
168
        # Turn on caching for the inventory versioned file
169
        inv_vf = self.repo.get_inventory_weave()
170
        inv_vf.enable_cache()
171
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
172
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
173
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
174
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
175
        # This is currently hard-coded but might be configurable via
176
        # parameters one day if that's needed
177
        repo_transport = self.repo.control_files._transport
178
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
179
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
180
        # Load the info file, if any
181
        info_path = self.params.get('info')
182
        if info_path is not None:
183
            self.info = configobj.ConfigObj(info_path)
184
        else:
185
            self.info = None
186
0.64.41 by Ian Clatworthy
update multiple working trees if requested
187
        # Decide how often to automatically report progress
188
        # (not a parameter yet)
189
        self.progress_every = _DEFAULT_AUTO_PROGRESS
190
        if self.verbose:
191
            self.progress_every = self.progress_every / 10
192
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
193
        # Decide how often to automatically checkpoint
194
        self.checkpoint_every = int(self.params.get('checkpoint',
195
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
196
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
197
        # Decide how big to make the inventory cache
198
        self.inventory_cache_size = int(self.params.get('inv-cache',
199
            _DEFAULT_INV_CACHE_SIZE))
200
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
201
        # Find the maximum number of commits to import (None means all)
202
        # and prepare progress reporting. Just in case the info file
203
        # has an outdated count of commits, we store the max counts
204
        # at which we need to terminate separately to the total used
205
        # for progress tracking.
206
        try:
207
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
208
            if self.max_commits < 0:
209
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
210
        except KeyError:
211
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
212
        if self.info is not None:
213
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
214
            if (self.max_commits is not None and
215
                self.total_commits > self.max_commits):
216
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
217
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
218
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
219
0.64.27 by Ian Clatworthy
1st cut at performance tuning
220
    def _process(self, command_iter):
221
        # if anything goes wrong, abort the write group if any
222
        try:
223
            processor.ImportProcessor._process(self, command_iter)
224
        except:
225
            if self.repo is not None and self.repo.is_in_write_group():
226
                self.repo.abort_write_group()
227
            raise
228
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
229
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
230
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
231
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
232
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
233
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
234
        # Update the branches
235
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
236
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
237
            helpers.invert_dict(self.cache_mgr.heads),
238
            self.cache_mgr.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
239
        branches_updated, branches_lost = updater.update()
240
        self._branch_count = len(branches_updated)
241
242
        # Tell the user about branches that were not created
243
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
244
            if not self.repo.is_shared():
245
                self.warning("Cannot import multiple branches into "
246
                    "an unshared repository")
247
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
248
            for lost_info in branches_lost:
249
                head_revision = lost_info[1]
250
                branch_name = lost_info[0]
251
                note("\t %s = %s", head_revision, branch_name)
252
253
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
254
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
255
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
256
        if self._branch_count == 0:
257
            self.note("no branches to update")
258
            self.note("no working trees to update")
259
            remind_about_update = False
260
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
261
            trees = self._get_working_trees(branches_updated)
262
            if trees:
263
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
264
                if self.verbose:
265
                    report = delta._ChangeReporter()
266
                else:
267
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
268
                for wt in trees:
269
                    wt.update(reporter)
270
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
271
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
272
            else:
273
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
274
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
275
276
        # Finish up by telling the user what to do next.
277
        # (These messages are explicitly not timestamped.)
278
        if self._original_max_pack_count:
279
            # We earlier disabled autopacking, creating one pack every
280
            # checkpoint instead. If we checkpointed more than 10 times,
281
            # Bazaar would have auto-packed. For massive repositories,
282
            # this can take a *very* long time so we suggest it to the user
283
            # instead of doing it implicitly.
284
            if self._revision_count >= self.checkpoint_every * 10:
285
                note("To further optimize how data is stored, use 'bzr pack'.")
0.64.34 by Ian Clatworthy
report lost branches
286
        if remind_about_update:
0.64.51 by Ian Clatworthy
disable autopacking
287
            note("To refresh the working tree for a branch, "
288
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
289
290
    def _get_working_trees(self, branches):
291
        """Get the working trees for branches in the repository."""
292
        result = []
293
        wt_expected = self.repo.make_working_trees()
294
        for br in branches:
295
            if br == self.branch and br is not None:
296
                wt = self.working_tree
297
            elif wt_expected:
298
                try:
299
                    wt = br.bzrdir.open_workingtree()
300
                except errors.NoWorkingTree:
301
                    self.warning("No working tree for branch %s", br)
302
                    continue
303
            else:
304
                continue
305
            result.append(wt)
306
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
307
308
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
309
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
310
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
311
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
312
        wtc = self._tree_count
313
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
314
            rc, helpers.single_plural(rc, "revision", "revisions"),
315
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
316
            wtc, helpers.single_plural(wtc, "tree", "trees"),
317
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
318
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
319
    def _init_id_map(self):
320
        """Load the id-map and check it matches the repository.
321
        
322
        :return: the number of entries in the map
323
        """
324
        # Currently, we just check the size. In the future, we might
325
        # decide to be more paranoid and check that the revision-ids
326
        # are identical as well.
327
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
328
            self.id_map_path)
329
        existing_count = len(self.repo.all_revision_ids())
330
        if existing_count != known:
331
            raise plugin_errors.BadRepositorySize(known, existing_count)
332
        return known
333
334
    def _save_id_map(self):
335
        """Save the id-map."""
336
        # Save the whole lot every time. If this proves a problem, we can
337
        # change to 'append just the new ones' at a later time.
338
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
339
0.64.5 by Ian Clatworthy
first cut at generic processing method
340
    def blob_handler(self, cmd):
341
        """Process a BlobCommand."""
342
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
343
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
344
        else:
345
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
346
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
347
348
    def checkpoint_handler(self, cmd):
349
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
350
        # Commit the current write group and start a new one
351
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
352
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
353
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
354
355
    def commit_handler(self, cmd):
356
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
357
        if self.skip_total and self._revision_count < self.skip_total:
358
            _track_heads(cmd, self.cache_mgr)
359
            # Check that we really do know about this commit-id
360
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
361
                raise plugin_errors.BadRestart(cmd.id)
362
            # Consume the file commands and free any non-sticky blobs
363
            for fc in cmd.file_iter():
364
                pass
365
            self.cache_mgr._blobs = {}
366
            self._revision_count += 1
367
            # If we're finished getting back to where we were,
368
            # load the file-ids cache
369
            if self._revision_count == self.skip_total:
370
                self._gen_file_ids_cache()
371
                self.note("Generated the file-ids cache - %d entries",
372
                    len(self.cache_mgr.file_ids.keys()))
373
            return
374
375
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
376
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
377
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
378
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
379
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
380
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
381
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
382
383
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
384
        if (self.max_commits is not None and
385
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
386
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
387
            self.finished = True
388
        elif self._revision_count % self.checkpoint_every == 0:
389
            self.note("%d commits - automatic checkpoint triggered",
390
                self._revision_count)
391
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
392
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
393
    def _gen_file_ids_cache(self):
394
        """Generate the file-id cache by searching repository inventories.
395
        """
396
        # Get the interesting revisions - the heads
397
        head_ids = self.cache_mgr.heads.keys()
398
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
399
400
        # Update the fileid cache
401
        file_ids = {}
402
        for revision_id in revision_ids:
403
            inv = self.repo.revision_tree(revision_id).inventory
404
            # Cache the inventoires while we're at it
405
            self.cache_mgr.inventories[revision_id] = inv
406
            for path, ie in inv.iter_entries():
407
                file_ids[path] = ie.file_id
408
        self.cache_mgr.file_ids = file_ids
409
0.64.25 by Ian Clatworthy
slightly better progress reporting
410
    def report_progress(self, details=''):
411
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
412
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
413
            if self.total_commits is not None:
414
                counts = "%d/%d" % (self._revision_count, self.total_commits)
415
                eta = progress.get_eta(self._start_time, self._revision_count,
416
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
417
                eta_str = progress.str_tdelta(eta)
418
                if eta_str.endswith('--'):
419
                    eta_str = ''
420
                else:
421
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
422
            else:
423
                counts = "%d" % (self._revision_count,)
424
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
425
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
426
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
427
    def progress_handler(self, cmd):
428
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
429
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
430
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
431
432
    def reset_handler(self, cmd):
433
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
434
        if cmd.ref.startswith('refs/tags/'):
435
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
436
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
437
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
438
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
439
440
    def tag_handler(self, cmd):
441
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
442
        self._set_tag(cmd.id, cmd.from_)
443
444
    def _set_tag(self, name, from_):
445
        """Define a tag given a name an import 'from' reference."""
446
        bzr_tag_name = name.decode('utf-8', 'replace')
447
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
448
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
449
450
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
451
class GenericCacheManager(object):
452
    """A manager of caches for the GenericProcessor."""
453
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
454
    def __init__(self, info, verbose=False, inventory_cache_size=10):
455
        """Create a manager of caches.
456
457
        :param info: a ConfigObj holding the output from
458
            the --info processor, or None if no hints are available
459
        """
460
        self.verbose = verbose
461
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
462
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
463
        # Sticky blobs aren't removed after being referenced.
464
        self._blobs = {}
465
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
466
467
        # revision-id -> Inventory cache
468
        # these are large and we probably don't need too many as
469
        # most parents are recent in history
470
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
471
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
472
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
473
        # we need to keep all of these but they are small
474
        self.revision_ids = {}
475
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
476
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
477
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
478
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
479
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
480
        self.last_ref = None
481
        self.last_ids = {}
482
        self.heads = {}
483
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
484
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
485
        self._blobs_to_keep = None
486
        if info is not None:
487
            try:
488
                self._blobs_to_keep = info['Blob usage tracking']['multi']
489
            except KeyError:
490
                # info not in file - possible when no blobs used
491
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
492
493
    def store_blob(self, id, data):
494
        """Store a blob of data."""
495
        if (self._blobs_to_keep is None or data == '' or
496
            id in self._blobs_to_keep):
497
            self._sticky_blobs[id] = data
498
        else:
499
            self._blobs[id] = data
500
501
    def fetch_blob(self, id):
502
        """Fetch a blob of data."""
503
        try:
504
            return self._sticky_blobs[id]
505
        except KeyError:
506
            return self._blobs.pop(id)
507
0.64.16 by Ian Clatworthy
safe processing tweaks
508
    def _delete_path(self, path):
509
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
510
        # we actually want to remember what file-id we gave a path,
511
        # even when that file is deleted, so doing nothing is correct
512
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
513
514
    def _rename_path(self, old_path, new_path):
515
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
516
        # we actually want to remember what file-id we gave a path,
517
        # even when that file is renamed, so both paths should have
518
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
519
        self.file_ids[new_path] = self.file_ids[old_path]
520
521
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
522
def _track_heads(cmd, cache_mgr):
523
    """Track the repository heads given a CommitCommand.
524
    
525
    :return: the list of parents in terms of commit-ids
526
    """
527
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
528
    if cmd.from_ is not None:
529
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
530
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
531
        last_id = cache_mgr.last_ids.get(cmd.ref)
532
        if last_id is not None:
533
            parents = [last_id]
534
        else:
535
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
536
    parents.extend(cmd.merges)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
537
    # Track the heads
538
    for parent in parents:
539
        try:
540
            del cache_mgr.heads[parent]
541
        except KeyError:
542
            # it's ok if the parent isn't there - another
543
            # commit may have already removed it
544
            pass
545
    cache_mgr.heads[cmd.id] = cmd.ref
546
    cache_mgr.last_ids[cmd.ref] = cmd.id
547
    cache_mgr.last_ref = cmd.ref
548
    return parents
549
550
0.64.5 by Ian Clatworthy
first cut at generic processing method
551
class GenericCommitHandler(processor.CommitHandler):
552
0.64.48 by Ian Clatworthy
one revision loader instance
553
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
554
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
555
        processor.CommitHandler.__init__(self, command)
556
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
557
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
558
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
559
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
560
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
561
0.64.43 by Ian Clatworthy
verbose mode cleanup
562
    def note(self, msg, *args):
563
        """Output a note but add context."""
564
        msg = "%s (%s)" % (msg, self.command.id)
565
        note(msg, *args)
566
567
    def warning(self, msg, *args):
568
        """Output a warning but add context."""
569
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
570
        warning(msg, *args)
571
0.64.5 by Ian Clatworthy
first cut at generic processing method
572
    def pre_process_files(self):
573
        """Prepare for committing."""
574
        self.revision_id = self.gen_revision_id()
575
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
576
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
577
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
578
        # Track the heads and get the real parent list
579
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
580
0.64.14 by Ian Clatworthy
commit of modified files working
581
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
582
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
583
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
584
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
585
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
586
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
587
0.64.14 by Ian Clatworthy
commit of modified files working
588
        # Seed the inventory from the previous one
589
        if len(self.parents) == 0:
590
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
591
        else:
592
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
593
            inv = self.get_inventory(self.parents[0])
594
            # TODO: Shallow copy - deep inventory copying is expensive
595
            self.inventory = inv.copy()
0.64.13 by Ian Clatworthy
commit of new files working
596
        if not self.repo.supports_rich_root():
597
            # In this repository, root entries have no knit or weave. When
598
            # serializing out to disk and back in, root.revision is always
599
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
600
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
601
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
602
        # directory-path -> inventory-entry for current inventory
603
        self.directory_entries = dict(self.inventory.directories())
604
0.64.14 by Ian Clatworthy
commit of modified files working
605
    def post_process_files(self):
606
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
607
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
608
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
609
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
610
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
611
        committer = self.command.committer
612
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
613
        author = self.command.author
614
        if author is not None:
615
            author_id = "%s <%s>" % (author[0],author[1])
616
            if author_id != who:
617
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
618
        rev = revision.Revision(
619
           timestamp=committer[2],
620
           timezone=committer[3],
621
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
622
           message=self._escape_commit_message(self.command.message),
623
           revision_id=self.revision_id,
624
           properties=rev_props,
625
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
626
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
627
            lambda file_id: self._get_lines(file_id),
628
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
629
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
630
    def _escape_commit_message(self, message):
631
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
632
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
633
        # Code copied from bzrlib.commit.
634
        
635
        # Python strings can include characters that can't be
636
        # represented in well-formed XML; escape characters that
637
        # aren't listed in the XML specification
638
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
639
        message, _ = re.subn(
640
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
641
            lambda match: match.group(0).encode('unicode_escape'),
642
            message)
643
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
644
645
    def modify_handler(self, filecmd):
646
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
647
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
648
        else:
649
            data = filecmd.data
650
        self._modify_inventory(filecmd.path, filecmd.kind,
651
            filecmd.is_executable, data)
652
653
    def delete_handler(self, filecmd):
654
        path = filecmd.path
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
655
        fileid = self.bzr_file_id(path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
656
        try:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
657
            del self.inventory[fileid]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
658
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
659
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
660
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
661
            self._warn_unless_in_merges(fileid, path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
662
        try:
663
            self.cache_mgr._delete_path(path)
664
        except KeyError:
665
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
666
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
667
    def _warn_unless_in_merges(self, fileid, path):
668
        if len(self.parents) <= 1:
669
            return
670
        for parent in self.parents[1:]:
671
            if fileid in self.get_inventory(parent):
672
                return
673
        self.warning("ignoring delete of %s as not in parent inventories", path)
674
0.64.5 by Ian Clatworthy
first cut at generic processing method
675
    def copy_handler(self, filecmd):
676
        raise NotImplementedError(self.copy_handler)
677
678
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
679
        old_path = filecmd.old_path
680
        new_path = filecmd.new_path
681
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
682
        basename, new_parent_ie = self._ensure_directory(new_path)
683
        new_parent_id = new_parent_ie.file_id
684
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
685
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
686
687
    def deleteall_handler(self, filecmd):
688
        raise NotImplementedError(self.deleteall_handler)
689
0.64.16 by Ian Clatworthy
safe processing tweaks
690
    def bzr_file_id_and_new(self, path):
691
        """Get a Bazaar file identifier and new flag for a path.
692
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
693
        :return: file_id, is_new where
694
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
695
        """
696
        try:
697
            return self.cache_mgr.file_ids[path], False
698
        except KeyError:
699
            id = generate_ids.gen_file_id(path)
700
            self.cache_mgr.file_ids[path] = id
701
            return id, True
702
0.64.5 by Ian Clatworthy
first cut at generic processing method
703
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
704
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
705
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
706
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
707
    def gen_initial_inventory(self):
708
        """Generate an inventory for a parentless revision."""
709
        inv = inventory.Inventory(revision_id=self.revision_id)
710
        return inv
711
0.64.5 by Ian Clatworthy
first cut at generic processing method
712
    def gen_revision_id(self):
713
        """Generate a revision id.
714
715
        Subclasses may override this to produce deterministic ids say.
716
        """
717
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
718
        # Perhaps 'who' being the person running the import is ok? If so,
719
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
720
        who = "%s <%s>" % (committer[0],committer[1])
721
        timestamp = committer[2]
722
        return generate_ids.gen_revision_id(who, timestamp)
723
0.64.7 by Ian Clatworthy
start of multiple commit handling
724
    def get_inventory(self, revision_id):
725
        """Get the inventory for a revision id."""
726
        try:
727
            inv = self.cache_mgr.inventories[revision_id]
728
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
729
            if self.verbose:
730
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
731
            # Not cached so reconstruct from repository
732
            inv = self.repo.revision_tree(revision_id).inventory
733
            self.cache_mgr.inventories[revision_id] = inv
734
        return inv
735
0.64.5 by Ian Clatworthy
first cut at generic processing method
736
    def _get_inventories(self, revision_ids):
737
        """Get the inventories for revision-ids.
738
        
739
        This is a callback used by the RepositoryLoader to
740
        speed up inventory reconstruction."""
741
        present = []
742
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
743
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
744
        # successfully loaded into the repsoitory
745
        for revision_id in revision_ids:
746
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
747
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
748
                present.append(revision_id)
749
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
750
                if self.verbose:
751
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
752
                # Not cached so reconstruct from repository
753
                if self.repo.has_revision(revision_id):
754
                    rev_tree = self.repo.revision_tree(revision_id)
755
                    present.append(revision_id)
756
                else:
757
                    rev_tree = self.repo.revision_tree(None)
758
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
759
                self.cache_mgr.inventories[revision_id] = inv
760
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
761
        return present, inventories
762
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
763
    def _get_lines(self, file_id):
764
        """Get the lines for a file-id."""
765
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
766
767
    def _modify_inventory(self, path, kind, is_executable, data):
768
        """Add to or change an item in the inventory."""
769
        # Create the new InventoryEntry
770
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
771
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
772
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
773
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
774
        if isinstance(ie, inventory.InventoryFile):
775
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
776
            lines = osutils.split_lines(data)
777
            ie.text_sha1 = osutils.sha_strings(lines)
778
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
779
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
780
        elif isinstance(ie, inventory.InventoryLnk):
781
            ie.symlink_target = data
782
        else:
783
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
784
                (kind,))
785
0.64.16 by Ian Clatworthy
safe processing tweaks
786
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
787
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
788
            # HACK: no API for this (del+add does more than it needs to)
789
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
790
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
791
        else:
792
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
793
794
    def _ensure_directory(self, path):
795
        """Ensure that the containing directory exists for 'path'"""
796
        dirname, basename = osutils.split(path)
797
        if dirname == '':
798
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
799
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
800
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
801
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
802
        except KeyError:
803
            # We will create this entry, since it doesn't exist
804
            pass
805
        else:
806
            return basename, ie
807
808
        # No directory existed, we will just create one, first, make sure
809
        # the parent exists
810
        dir_basename, parent_ie = self._ensure_directory(dirname)
811
        dir_file_id = self.bzr_file_id(dirname)
812
        ie = inventory.entry_factory['directory'](dir_file_id,
813
                                                  dir_basename,
814
                                                  parent_ie.file_id)
815
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
816
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
817
        # There are no lines stored for a directory so
818
        # make sure the cache used by get_lines knows that
819
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
820
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
821
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
822
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
823
824
0.64.34 by Ian Clatworthy
report lost branches
825
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
826
0.64.37 by Ian Clatworthy
create branches as required
827
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
828
        """Create an object responsible for updating branches.
829
830
        :param heads_by_ref: a dictionary where
831
          names are git-style references like refs/heads/master;
832
          values are one item lists of commits marks.
833
        """
0.64.37 by Ian Clatworthy
create branches as required
834
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
835
        self.branch = branch
836
        self.cache_mgr = cache_mgr
837
        self.heads_by_ref = heads_by_ref
838
        self.last_ref = last_ref
839
840
    def update(self):
841
        """Update the Bazaar branches and tips matching the heads.
842
843
        If the repository is shared, this routine creates branches
844
        as required. If it isn't, warnings are produced about the
845
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
846
0.64.34 by Ian Clatworthy
report lost branches
847
        :return: updated, lost_heads where
848
          updated = the list of branches updated
849
          lost_heads = a list of (bazaar-name,revision) for branches that
850
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
851
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
852
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
853
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
854
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
855
            if self._update_branch(br, tip):
856
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
857
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
858
859
    def _get_matching_branches(self):
860
        """Get the Bazaar branches.
861
0.64.34 by Ian Clatworthy
report lost branches
862
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
863
          default_tip = the last commit mark for the default branch
864
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
865
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
866
            would have been created had the repository been shared and
867
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
868
        """
0.64.37 by Ian Clatworthy
create branches as required
869
        branch_tips = []
870
        lost_heads = []
871
        ref_names = self.heads_by_ref.keys()
872
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
873
            trunk = self.select_trunk(ref_names)
874
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
875
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
876
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
877
878
        # Convert the reference names into Bazaar speak
879
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
880
0.64.37 by Ian Clatworthy
create branches as required
881
        # Policy for locating branches
882
        def dir_under_current(name, ref_name):
883
            # Using the Bazaar name, get a directory under the current one
884
            return name
885
        def dir_sister_branch(name, ref_name):
886
            # Using the Bazaar name, get a sister directory to the branch
887
            return osutils.pathjoin(self.branch.base, "..", name)
888
        if self.branch is not None:
889
            dir_policy = dir_sister_branch
890
        else:
891
            dir_policy = dir_under_current
892
0.64.34 by Ian Clatworthy
report lost branches
893
        # Create/track missing branches
894
        shared_repo = self.repo.is_shared()
895
        for name in sorted(bzr_names.keys()):
896
            ref_name = bzr_names[name]
897
            tip = self.heads_by_ref[ref_name][0]
898
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
899
                location = dir_policy(name, ref_name)
900
                try:
901
                    br = self.make_branch(location)
902
                    branch_tips.append((br,tip))
903
                    continue
904
                except errors.BzrError, ex:
905
                    error("ERROR: failed to create branch %s: %s",
906
                        location, ex)
907
            lost_head = self.cache_mgr.revision_ids[tip]
908
            lost_info = (name, lost_head)
909
            lost_heads.append(lost_info)
910
        return branch_tips, lost_heads
911
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
912
    def select_trunk(self, ref_names):
913
        """Given a set of ref names, choose one as the trunk."""
914
        for candidate in ['refs/heads/master']:
915
            if candidate in ref_names:
916
                return candidate
917
        # Use the last reference in the import stream
918
        return self.last_ref
919
0.64.37 by Ian Clatworthy
create branches as required
920
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
921
        """Make a branch in the repository if not already there."""
922
        try:
923
            return bzrdir.BzrDir.open(location).open_branch()
924
        except errors.NotBranchError, ex:
925
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
926
927
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
928
        """Generate Bazaar branch names from import ref names.
929
        
930
        :return: a dictionary with Bazaar names as keys and
931
          the original reference names as values.
932
        """
0.64.34 by Ian Clatworthy
report lost branches
933
        bazaar_names = {}
934
        for ref_name in sorted(ref_names):
935
            parts = ref_name.split('/')
936
            if parts[0] == 'refs':
937
                parts.pop(0)
938
            full_name = "--".join(parts)
939
            bazaar_name = parts[-1]
940
            if bazaar_name in bazaar_names:
941
                bazaar_name = full_name
942
            bazaar_names[bazaar_name] = ref_name
943
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
944
945
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
946
        """Update a branch with last revision and tag information.
947
        
948
        :return: whether the branch was changed or not
949
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
950
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
951
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
952
        existing_revno, existing_last_rev_id = br.last_revision_info()
953
        changed = False
954
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
955
            br.set_last_revision_info(revno, last_rev_id)
956
            changed = True
957
            note("\t branch %s now has %d revisions", br.nick, revno)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
958
        # TODO: apply tags known in this branch
959
        #if self.tags:
960
        #    br.tags._set_tag_dict(self.tags)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
961
        return changed